NVPTXIntrinsics.td 289 KB


  1. //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. def immFloat0 : PatLeaf<(fpimm), [{
  9. float f = (float)N->getValueAPF().convertToFloat();
  10. return (f==0.0f);
  11. }]>;
  12. def immFloat1 : PatLeaf<(fpimm), [{
  13. float f = (float)N->getValueAPF().convertToFloat();
  14. return (f==1.0f);
  15. }]>;
  16. def immDouble0 : PatLeaf<(fpimm), [{
  17. double d = (double)N->getValueAPF().convertToDouble();
  18. return (d==0.0);
  19. }]>;
  20. def immDouble1 : PatLeaf<(fpimm), [{
  21. double d = (double)N->getValueAPF().convertToDouble();
  22. return (d==1.0);
  23. }]>;
  24. def AS_match {
  25. code generic = [{
  26. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
  27. }];
  28. code shared = [{
  29. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
  30. }];
  31. code global = [{
  32. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
  33. }];
  34. }
  35. // A node that will be replaced with the current PTX version.
  36. class PTX {
  37. SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
  38. return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
  39. }]>;
  40. // (i32 0) will be XForm'ed to the currently used PTX version.
  41. dag version = (PTXVerXform (i32 0));
  42. }
  43. def ptx : PTX;
  44. // Generates list of n sequential register names.
  45. // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
  46. class RegSeq<int n, string prefix> {
  47. list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
  48. [prefix # !sub(n, 1)]),
  49. []);
  50. }
  51. class THREADMASK_INFO<bit sync> {
  52. list<bit> ret = !if(sync, [0, 1], [0]);
  53. }
  54. //-----------------------------------
  55. // Synchronization and shuffle functions
  56. //-----------------------------------
  57. let isConvergent = true in {
  58. def INT_BARRIER0 : NVPTXInst<(outs), (ins),
  59. "bar.sync \t0;",
  60. [(int_nvvm_barrier0)]>;
  61. def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
  62. "bar.sync \t$src1;",
  63. [(int_nvvm_barrier_n Int32Regs:$src1)]>;
  64. def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
  65. "bar.sync \t$src1, $src2;",
  66. [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
  67. def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  68. !strconcat("{{ \n\t",
  69. ".reg .pred \t%p1; \n\t",
  70. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  71. "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
  72. "}}"),
  73. [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
  74. def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  75. !strconcat("{{ \n\t",
  76. ".reg .pred \t%p1; \n\t",
  77. ".reg .pred \t%p2; \n\t",
  78. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  79. "bar.red.and.pred \t%p2, 0, %p1; \n\t",
  80. "selp.u32 \t$dst, 1, 0, %p2; \n\t",
  81. "}}"),
  82. [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
  83. def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  84. !strconcat("{{ \n\t",
  85. ".reg .pred \t%p1; \n\t",
  86. ".reg .pred \t%p2; \n\t",
  87. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  88. "bar.red.or.pred \t%p2, 0, %p1; \n\t",
  89. "selp.u32 \t$dst, 1, 0, %p2; \n\t",
  90. "}}"),
  91. [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
  92. def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
  93. [(int_nvvm_bar_sync imm:$i)]>;
  94. def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
  95. [(int_nvvm_bar_warp_sync imm:$i)]>,
  96. Requires<[hasPTX60, hasSM30]>;
  97. def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
  98. [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
  99. Requires<[hasPTX60, hasSM30]>;
  100. def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
  101. [(int_nvvm_barrier_sync imm:$i)]>,
  102. Requires<[hasPTX60, hasSM30]>;
  103. def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
  104. [(int_nvvm_barrier_sync Int32Regs:$i)]>,
  105. Requires<[hasPTX60, hasSM30]>;
  106. def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
  107. "barrier.sync \t$id, $cnt;",
  108. [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
  109. Requires<[hasPTX60, hasSM30]>;
  110. def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
  111. "barrier.sync \t$id, $cnt;",
  112. [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
  113. Requires<[hasPTX60, hasSM30]>;
  114. def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
  115. "barrier.sync \t$id, $cnt;",
  116. [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
  117. Requires<[hasPTX60, hasSM30]>;
  118. def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
  119. "barrier.sync \t$id, $cnt;",
  120. [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
  121. Requires<[hasPTX60, hasSM30]>;
  122. class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
  123. bit offset_imm, bit mask_imm, bit threadmask_imm>
  124. : NVPTXInst<(outs), (ins), "?", []> {
  125. NVPTXRegClass rc = !cond(
  126. !eq(reg, "i32"): Int32Regs,
  127. !eq(reg, "f32"): Float32Regs);
  128. string IntrName = "int_nvvm_shfl_"
  129. # !if(sync, "sync_", "")
  130. # mode
  131. # "_" # reg
  132. # !if(return_pred, "p", "");
  133. Intrinsic Intr = !cast<Intrinsic>(IntrName);
  134. let InOperandList = !con(
  135. !if(sync,
  136. !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
  137. (ins)),
  138. (ins rc:$src),
  139. !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
  140. !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
  141. );
  142. let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
  143. let AsmString = "shfl."
  144. # !if(sync, "sync.", "")
  145. # mode # ".b32\t"
  146. # "$dst"
  147. # !if(return_pred, "|$pred", "") # ", "
  148. # "$src, $offset, $mask"
  149. # !if(sync, ", $threadmask", "")
  150. # ";"
  151. ;
  152. let Pattern = [!con(
  153. !foreach(tmp, OutOperandList,
  154. !subst(outs, set,
  155. !subst(i32imm, imm, tmp))),
  156. (set !foreach(tmp, InOperandList,
  157. !subst(ins, Intr,
  158. !subst(i32imm, imm, tmp))))
  159. )];
  160. }
  161. foreach sync = [false, true] in {
  162. foreach mode = ["up", "down", "bfly", "idx"] in {
  163. foreach regclass = ["i32", "f32"] in {
  164. foreach return_pred = [false, true] in {
  165. foreach offset_imm = [false, true] in {
  166. foreach mask_imm = [false, true] in {
  167. foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
  168. def : SHFL_INSTR<sync, mode, regclass, return_pred,
  169. offset_imm, mask_imm, threadmask_imm>,
  170. Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
  171. }
  172. }
  173. }
  174. }
  175. }
  176. }
  177. }
  178. // vote.{all,any,uni,ballot}
  179. multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
  180. def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
  181. "vote." # mode # " \t$dest, $pred;",
  182. [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
  183. Requires<[hasPTX60, hasSM30]>;
  184. }
  185. defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
  186. defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
  187. defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
  188. defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
  189. // vote.sync.{all,any,uni,ballot}
  190. multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
  191. def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
  192. "vote.sync." # mode # " \t$dest, $pred, $mask;",
  193. [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
  194. Requires<[hasPTX60, hasSM30]>;
  195. def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
  196. "vote.sync." # mode #" \t$dest, $pred, $mask;",
  197. [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
  198. Requires<[hasPTX60, hasSM30]>;
  199. }
  200. defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
  201. defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
  202. defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
  203. defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
  204. multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
  205. Operand ImmOp> {
  206. def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
  207. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  208. [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
  209. Requires<[hasPTX60, hasSM70]>;
  210. def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
  211. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  212. [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
  213. Requires<[hasPTX60, hasSM70]>;
  214. def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
  215. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  216. [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
  217. Requires<[hasPTX60, hasSM70]>;
  218. def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
  219. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  220. [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
  221. Requires<[hasPTX60, hasSM70]>;
  222. }
  223. defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
  224. i32imm>;
  225. defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
  226. i64imm>;
  227. multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
  228. Operand ImmOp> {
  229. def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
  230. (ins i32imm:$mask, ImmOp:$value),
  231. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  232. [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
  233. Requires<[hasPTX60, hasSM70]>;
  234. def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
  235. (ins Int32Regs:$mask, ImmOp:$value),
  236. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  237. [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
  238. Requires<[hasPTX60, hasSM70]>;
  239. def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
  240. (ins i32imm:$mask, regclass:$value),
  241. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  242. [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
  243. Requires<[hasPTX60, hasSM70]>;
  244. def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
  245. (ins Int32Regs:$mask, regclass:$value),
  246. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  247. [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
  248. Requires<[hasPTX60, hasSM70]>;
  249. }
  250. defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
  251. i32imm>;
  252. defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
  253. i64imm>;
  254. multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
  255. def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
  256. "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
  257. [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
  258. Requires<[hasPTX70, hasSM80]>;
  259. }
  260. defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
  261. defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
  262. defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
  263. defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
  264. defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
  265. defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
  266. defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
  267. defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
  268. } // isConvergent = true
  269. //-----------------------------------
  270. // Explicit Memory Fence Functions
  271. //-----------------------------------
  272. class MEMBAR<string StrOp, Intrinsic IntOP> :
  273. NVPTXInst<(outs), (ins),
  274. StrOp, [(IntOP)]>;
  275. def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
  276. def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
  277. def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
  278. //-----------------------------------
  279. // Async Copy Functions
  280. //-----------------------------------
  281. multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
  282. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
  283. !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
  284. [(Intrin Int32Regs:$addr)]>,
  285. Requires<[hasPTX70, hasSM80]>;
  286. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
  287. !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
  288. [(Intrin Int64Regs:$addr)]>,
  289. Requires<[hasPTX70, hasSM80]>;
  290. }
  291. defm CP_ASYNC_MBARRIER_ARRIVE :
  292. CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
  293. defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
  294. CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
  295. defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
  296. CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
  297. defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
  298. CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
  299. multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
  300. def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
  301. !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
  302. [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
  303. Requires<[hasPTX70, hasSM80]>;
  304. def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
  305. !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
  306. [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
  307. Requires<[hasPTX70, hasSM80]>;
  308. }
  309. defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
  310. CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
  311. defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
  312. CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
  313. defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
  314. CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
  315. multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
  316. def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
  317. !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
  318. [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
  319. Requires<[hasPTX70, hasSM80]>;
  320. def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
  321. !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
  322. [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
  323. Requires<[hasPTX70, hasSM80]>;
  324. }
  325. defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
  326. CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
  327. def CP_ASYNC_COMMIT_GROUP :
  328. NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
  329. Requires<[hasPTX70, hasSM80]>;
  330. def CP_ASYNC_WAIT_GROUP :
  331. NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
  332. [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
  333. Requires<[hasPTX70, hasSM80]>;
  334. def CP_ASYNC_WAIT_ALL :
  335. NVPTXInst<(outs), (ins), "cp.async.wait_all;",
  336. [(int_nvvm_cp_async_wait_all)]>,
  337. Requires<[hasPTX70, hasSM80]>;
  338. //-----------------------------------
  339. // MBarrier Functions
  340. //-----------------------------------
  341. multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
  342. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
  343. !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
  344. [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
  345. Requires<[hasPTX70, hasSM80]>;
  346. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
  347. !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
  348. [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
  349. Requires<[hasPTX70, hasSM80]>;
  350. }
  351. defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
  352. defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
  353. int_nvvm_mbarrier_init_shared>;
  354. multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
  355. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
  356. !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
  357. [(Intrin Int32Regs:$addr)]>,
  358. Requires<[hasPTX70, hasSM80]>;
  359. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
  360. !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
  361. [(Intrin Int64Regs:$addr)]>,
  362. Requires<[hasPTX70, hasSM80]>;
  363. }
  364. defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
  365. defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
  366. int_nvvm_mbarrier_inval_shared>;
  367. multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
  368. def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
  369. !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
  370. [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
  371. Requires<[hasPTX70, hasSM80]>;
  372. def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
  373. !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
  374. [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
  375. Requires<[hasPTX70, hasSM80]>;
  376. }
  377. defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
  378. defm MBARRIER_ARRIVE_SHARED :
  379. MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
  380. multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
  381. def _32 : NVPTXInst<(outs Int64Regs:$state),
  382. (ins Int32Regs:$addr, Int32Regs:$count),
  383. !strconcat("mbarrier.arrive.noComplete", AddrSpace,
  384. ".b64 $state, [$addr], $count;"),
  385. [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
  386. Requires<[hasPTX70, hasSM80]>;
  387. def _64 : NVPTXInst<(outs Int64Regs:$state),
  388. (ins Int64Regs:$addr, Int32Regs:$count),
  389. !strconcat("mbarrier.arrive.noComplete", AddrSpace,
  390. ".b64 $state, [$addr], $count;"),
  391. [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
  392. Requires<[hasPTX70, hasSM80]>;
  393. }
  394. defm MBARRIER_ARRIVE_NOCOMPLETE :
  395. MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
  396. defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
  397. MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
  398. multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
  399. def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
  400. !strconcat("mbarrier.arrive_drop", AddrSpace,
  401. ".b64 $state, [$addr];"),
  402. [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
  403. Requires<[hasPTX70, hasSM80]>;
  404. def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
  405. !strconcat("mbarrier.arrive_drop", AddrSpace,
  406. ".b64 $state, [$addr];"),
  407. [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
  408. Requires<[hasPTX70, hasSM80]>;
  409. }
  410. defm MBARRIER_ARRIVE_DROP :
  411. MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
  412. defm MBARRIER_ARRIVE_DROP_SHARED :
  413. MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
  414. multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
  415. def _32 : NVPTXInst<(outs Int64Regs:$state),
  416. (ins Int32Regs:$addr, Int32Regs:$count),
  417. !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
  418. ".b64 $state, [$addr], $count;"),
  419. [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
  420. Requires<[hasPTX70, hasSM80]>;
  421. def _64 : NVPTXInst<(outs Int64Regs:$state),
  422. (ins Int64Regs:$addr, Int32Regs:$count),
  423. !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
  424. ".b64 $state, [$addr], $count;"),
  425. [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
  426. Requires<[hasPTX70, hasSM80]>;
  427. }
  428. defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
  429. MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
  430. defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
  431. MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
  432. int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
  433. multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
  434. def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
  435. !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
  436. [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
  437. Requires<[hasPTX70, hasSM80]>;
  438. def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
  439. !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
  440. [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
  441. Requires<[hasPTX70, hasSM80]>;
  442. }
  443. defm MBARRIER_TEST_WAIT :
  444. MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
  445. defm MBARRIER_TEST_WAIT_SHARED :
  446. MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
  447. class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
  448. NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
  449. "mbarrier.pending_count.b64 $res, $state;",
  450. [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
  451. Requires<[hasPTX70, hasSM80]>;
  452. def MBARRIER_PENDING_COUNT :
  453. MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
  454. //-----------------------------------
  455. // Math Functions
  456. //-----------------------------------
  457. // Map min(1.0, max(0.0, x)) to sat(x)
  458. // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
  459. // NaN
  460. // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
  461. // Same story for fmax, fmin.
  462. def : Pat<(int_nvvm_fmin_f immFloat1,
  463. (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
  464. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  465. def : Pat<(int_nvvm_fmin_f immFloat1,
  466. (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
  467. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  468. def : Pat<(int_nvvm_fmin_f
  469. (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
  470. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  471. def : Pat<(int_nvvm_fmin_f
  472. (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
  473. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  474. def : Pat<(int_nvvm_fmin_d immDouble1,
  475. (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
  476. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  477. def : Pat<(int_nvvm_fmin_d immDouble1,
  478. (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
  479. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  480. def : Pat<(int_nvvm_fmin_d
  481. (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
  482. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  483. def : Pat<(int_nvvm_fmin_d
  484. (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
  485. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  486. // We need a full string for OpcStr here because we need to deal with case like
  487. // INT_PTX_RECIP.
  488. class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
  489. NVPTXRegClass src_regclass, Intrinsic IntOP>
  490. : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
  491. OpcStr,
  492. [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
  493. // We need a full string for OpcStr here because we need to deal with the case
  494. // like INT_PTX_NATIVE_POWR_F.
  495. class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
  496. NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
  497. : NVPTXInst<(outs t_regclass:$dst),
  498. (ins s0_regclass:$src0, s1_regclass:$src1),
  499. OpcStr,
  500. [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
  501. class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
  502. NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
  503. NVPTXRegClass s2_regclass, Intrinsic IntOP>
  504. : NVPTXInst<(outs t_regclass:$dst),
  505. (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
  506. OpcStr,
  507. [(set t_regclass:$dst,
  508. (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
  509. //
  510. // MISC
  511. //
  512. def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
  513. Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
  514. //
  515. // Min Max
  516. //
  517. def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
  518. Float32Regs, Float32Regs, int_nvvm_fmin_f>;
  519. def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
  520. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
  521. def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
  522. Float32Regs, Float32Regs, int_nvvm_fmax_f>;
  523. def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
  524. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
  525. def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
  526. Float64Regs, Float64Regs, int_nvvm_fmin_d>;
  527. def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
  528. Float64Regs, Float64Regs, int_nvvm_fmax_d>;
  529. //
  530. // Multiplication
  531. //
  532. def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
  533. Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
  534. def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
  535. Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
  536. def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
  537. Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
  538. def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
  539. Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
  540. def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
  541. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
  542. def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
  543. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
  544. def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
  545. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
  546. def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
  547. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
  548. def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
  549. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
  550. def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
  551. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
  552. def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
  553. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
  554. def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
  555. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
  556. def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
  557. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
  558. def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
  559. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
  560. def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
  561. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
  562. def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
  563. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
  564. def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
  565. Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
  566. def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
  567. Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
  568. //
  569. // Div
  570. //
  571. def INT_NVVM_DIV_APPROX_FTZ_F
  572. : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
  573. Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
  574. def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
  575. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
  576. def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
  577. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
  578. def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
  579. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
  580. def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
  581. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
  582. def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
  583. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
  584. def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
  585. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
  586. def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
  587. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
  588. def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
  589. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
  590. def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
  591. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
  592. def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
  593. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
  594. def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
  595. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
  596. def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
  597. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
  598. def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
  599. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
  600. //
  601. // Sad
  602. //
  603. def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
  604. Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
  605. def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
  606. Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
  607. //
  608. // Floor Ceil
  609. //
  610. def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
  611. (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  612. def : Pat<(int_nvvm_floor_f Float32Regs:$a),
  613. (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
  614. def : Pat<(int_nvvm_floor_d Float64Regs:$a),
  615. (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
  616. def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
  617. (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  618. def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
  619. (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
  620. def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
  621. (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
  622. //
  623. // Abs
  624. //
  625. def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
  626. Float32Regs, int_nvvm_fabs_ftz_f>;
  627. def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
  628. Float32Regs, int_nvvm_fabs_f>;
  629. def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
  630. Float64Regs, int_nvvm_fabs_d>;
  631. //
  632. // Round
  633. //
  634. def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
  635. (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  636. def : Pat<(int_nvvm_round_f Float32Regs:$a),
  637. (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
  638. def : Pat<(int_nvvm_round_d Float64Regs:$a),
  639. (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
  640. //
  641. // Trunc
  642. //
  643. def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
  644. (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  645. def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
  646. (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
  647. def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
  648. (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
  649. //
  650. // Saturate
  651. //
  652. def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
  653. (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
  654. def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
  655. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  656. def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
  657. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  658. //
  659. // Exp2 Log2
  660. //
  661. def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
  662. Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
  663. def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
  664. Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
  665. def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
  666. Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
  667. def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
  668. Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
  669. def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
  670. Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
  671. def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
  672. Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
  673. //
  674. // Sin Cos
  675. //
  676. def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
  677. Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
  678. def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
  679. Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
  680. def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
  681. Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
  682. def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
  683. Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
  684. //
  685. // Fma
  686. //
  687. def INT_NVVM_FMA_RN_FTZ_F
  688. : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
  689. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
  690. def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
  691. Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
  692. def INT_NVVM_FMA_RZ_FTZ_F
  693. : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
  694. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
  695. def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
  696. Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
  697. def INT_NVVM_FMA_RM_FTZ_F
  698. : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
  699. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
  700. def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
  701. Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
  702. def INT_NVVM_FMA_RP_FTZ_F
  703. : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
  704. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
  705. def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
  706. Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
  707. def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
  708. Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
  709. def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
  710. Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
  711. def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
  712. Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
  713. def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
  714. Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
  715. //
  716. // Rcp
  717. //
  718. def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
  719. Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
  720. def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
  721. Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
  722. def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
  723. Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
  724. def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
  725. Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
  726. def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
  727. Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
  728. def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
  729. Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
  730. def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
  731. Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
  732. def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
  733. Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
  734. def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
  735. Float64Regs, int_nvvm_rcp_rn_d>;
  736. def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
  737. Float64Regs, int_nvvm_rcp_rz_d>;
  738. def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
  739. Float64Regs, int_nvvm_rcp_rm_d>;
  740. def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
  741. Float64Regs, int_nvvm_rcp_rp_d>;
  742. def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
  743. Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
  744. //
  745. // Sqrt
  746. //
  747. def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
  748. Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
  749. def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
  750. Float32Regs, int_nvvm_sqrt_rn_f>;
  751. def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
  752. Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
  753. def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
  754. Float32Regs, int_nvvm_sqrt_rz_f>;
  755. def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
  756. Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
  757. def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
  758. Float32Regs, int_nvvm_sqrt_rm_f>;
  759. def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
  760. Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
  761. def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
  762. Float32Regs, int_nvvm_sqrt_rp_f>;
  763. def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
  764. Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
  765. def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
  766. Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
  767. def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
  768. Float64Regs, int_nvvm_sqrt_rn_d>;
  769. def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
  770. Float64Regs, int_nvvm_sqrt_rz_d>;
  771. def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
  772. Float64Regs, int_nvvm_sqrt_rm_d>;
  773. def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
  774. Float64Regs, int_nvvm_sqrt_rp_d>;
  775. // nvvm_sqrt intrinsic
  776. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  777. (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
  778. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  779. (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
  780. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  781. (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
  782. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  783. (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
  784. //
  785. // Rsqrt
  786. //
  787. def INT_NVVM_RSQRT_APPROX_FTZ_F
  788. : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
  789. int_nvvm_rsqrt_approx_ftz_f>;
  790. def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
  791. Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
  792. def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
  793. Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
  794. //
  795. // Add
  796. //
  797. def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
  798. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
  799. def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
  800. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
  801. def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
  802. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
  803. def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
  804. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
  805. def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
  806. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
  807. def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
  808. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
  809. def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
  810. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
  811. def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
  812. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
  813. def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
  814. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
  815. def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
  816. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
  817. def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
  818. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
  819. def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
  820. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
  821. //
  822. // Convert
  823. //
  824. def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
  825. (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
  826. def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
  827. (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
  828. def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
  829. (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
  830. def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
  831. (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
  832. def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
  833. (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
  834. def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
  835. (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
  836. def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
  837. (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
  838. def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
  839. (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
  840. def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
  841. (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
  842. def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
  843. (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
  844. def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
  845. (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
  846. def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
  847. (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
  848. def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
  849. (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
  850. def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
  851. (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
  852. def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
  853. (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
  854. def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
  855. (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
  856. def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
  857. (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
  858. def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
  859. (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
  860. def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
  861. (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
  862. def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
  863. (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
  864. def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
  865. (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
  866. def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
  867. (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
  868. def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
  869. (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
  870. def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
  871. (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
  872. def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
  873. (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  874. def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
  875. (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
  876. def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
  877. (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  878. def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
  879. (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
  880. def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
  881. (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  882. def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
  883. (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
  884. def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
  885. (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  886. def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
  887. (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
  888. def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
  889. (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  890. def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
  891. (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
  892. def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
  893. (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  894. def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
  895. (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
  896. def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
  897. (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  898. def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
  899. (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
  900. def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
  901. (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  902. def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
  903. (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
  904. def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
  905. (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
  906. def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
  907. (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
  908. def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
  909. (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
  910. def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
  911. (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
  912. def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
  913. (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
  914. def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
  915. (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
  916. def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
  917. (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
  918. def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
  919. (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
  920. def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
  921. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
  922. def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
  923. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
  924. def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
  925. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
  926. def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
  927. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
  928. def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
  929. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
  930. def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
  931. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
  932. def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
  933. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
  934. def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
  935. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
  936. def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
  937. (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
  938. def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
  939. (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
  940. def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
  941. (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
  942. def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
  943. (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
  944. def CVT_tf32_f32 :
  945. NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
  946. "cvt.rna.tf32.f32 \t$dest, $a;",
  947. [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
  948. def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
  949. Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
  950. def INT_NVVM_D2I_LO : F_MATH_1<
  951. !strconcat("{{\n\t",
  952. ".reg .b32 %temp; \n\t",
  953. "mov.b64 \t{$dst, %temp}, $src0;\n\t",
  954. "}}"),
  955. Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
  956. def INT_NVVM_D2I_HI : F_MATH_1<
  957. !strconcat("{{\n\t",
  958. ".reg .b32 %temp; \n\t",
  959. "mov.b64 \t{%temp, $dst}, $src0;\n\t",
  960. "}}"),
  961. Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
  962. def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
  963. (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  964. def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
  965. (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
  966. def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
  967. (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  968. def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
  969. (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
  970. def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
  971. (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  972. def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
  973. (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
  974. def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
  975. (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  976. def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
  977. (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
  978. def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
  979. (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  980. def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
  981. (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
  982. def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
  983. (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  984. def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
  985. (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
  986. def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
  987. (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  988. def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
  989. (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
  990. def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
  991. (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  992. def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
  993. (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
  994. def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
  995. (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
  996. def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
  997. (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
  998. def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
  999. (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
  1000. def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
  1001. (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
  1002. def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
  1003. (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
  1004. def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
  1005. (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
  1006. def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
  1007. (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
  1008. def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
  1009. (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
  1010. def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
  1011. (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
  1012. def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
  1013. (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
  1014. def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
  1015. (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
  1016. def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
  1017. (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
  1018. def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
  1019. (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
  1020. def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
  1021. (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
  1022. def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
  1023. (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
  1024. def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
  1025. (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
  1026. def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
  1027. (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
  1028. def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
  1029. (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
  1030. def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
  1031. (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
  1032. def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
  1033. (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
  1034. def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
  1035. (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
  1036. def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
  1037. (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
  1038. def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
  1039. (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
  1040. def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
  1041. (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
  1042. def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
  1043. (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
  1044. def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
  1045. (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
  1046. //
  1047. // Bitcast
  1048. //
  1049. def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
  1050. Float32Regs, int_nvvm_bitcast_f2i>;
  1051. def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
  1052. Int32Regs, int_nvvm_bitcast_i2f>;
  1053. def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
  1054. Int64Regs, int_nvvm_bitcast_ll2d>;
  1055. def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
  1056. Float64Regs, int_nvvm_bitcast_d2ll>;
  1057. //
  1058. // FNS
  1059. //
  1060. class INT_FNS_MBO<dag ins, dag Operands>
  1061. : NVPTXInst<(outs Int32Regs:$dst), ins,
  1062. "fns.b32 \t$dst, $mask, $base, $offset;",
  1063. [(set Int32Regs:$dst, Operands )]>,
  1064. Requires<[hasPTX60, hasSM30]>;
  1065. def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
  1066. (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
  1067. def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
  1068. (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
  1069. def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
  1070. (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
  1071. def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
  1072. (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
  1073. def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
  1074. (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
  1075. def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
  1076. (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
  1077. def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
  1078. (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
  1079. def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
  1080. (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
  1081. //-----------------------------------
  1082. // Atomic Functions
  1083. //-----------------------------------
  1084. class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
  1085. : PatFrag<ops, frag, AS_match.global>;
  1086. class ATOMIC_SHARED_CHK <dag ops, dag frag>
  1087. : PatFrag<ops, frag, AS_match.shared>;
  1088. class ATOMIC_GENERIC_CHK <dag ops, dag frag>
  1089. : PatFrag<ops, frag, AS_match.generic>;
  1090. multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1091. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1092. Operand IMMType, SDNode IMM, list<Predicate> Pred> {
  1093. def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
  1094. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
  1095. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
  1096. Requires<Pred>;
  1097. def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
  1098. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
  1099. [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
  1100. Requires<Pred>;
  1101. }
  1102. multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
  1103. string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
  1104. list<Predicate> Pred = []> {
  1105. defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1106. IntOp, IMMType, IMM, Pred>;
  1107. defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1108. IntOp, IMMType, IMM, Pred>;
  1109. }
  1110. // has 2 operands, neg the second one
  1111. multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1112. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1113. list<Predicate> Pred> {
  1114. def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
  1115. !strconcat(
  1116. "{{ \n\t",
  1117. ".reg \t.s", TypeStr, " temp; \n\t",
  1118. "neg.s", TypeStr, " \ttemp, $b; \n\t",
  1119. "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
  1120. "}}"),
  1121. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
  1122. Requires<Pred>;
  1123. }
  1124. multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
  1125. string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
  1126. defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1127. IntOp, Pred> ;
  1128. defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1129. IntOp, Pred> ;
  1130. }
  1131. // has 3 operands
  1132. multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1133. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1134. Operand IMMType, list<Predicate> Pred> {
  1135. def reg : NVPTXInst<(outs regclass:$dst),
  1136. (ins ptrclass:$addr, regclass:$b, regclass:$c),
  1137. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1138. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
  1139. Requires<Pred>;
  1140. def imm1 : NVPTXInst<(outs regclass:$dst),
  1141. (ins ptrclass:$addr, IMMType:$b, regclass:$c),
  1142. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1143. [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
  1144. Requires<Pred>;
  1145. def imm2 : NVPTXInst<(outs regclass:$dst),
  1146. (ins ptrclass:$addr, regclass:$b, IMMType:$c),
  1147. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
  1148. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
  1149. Requires<Pred>;
  1150. def imm3 : NVPTXInst<(outs regclass:$dst),
  1151. (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
  1152. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1153. [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
  1154. Requires<Pred>;
  1155. }
  1156. multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
  1157. string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
  1158. defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1159. IntOp, IMMType, Pred>;
  1160. defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1161. IntOp, IMMType, Pred>;
  1162. }
  1163. // atom_add
  1164. def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1165. (atomic_load_add_32 node:$a, node:$b)>;
  1166. def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1167. (atomic_load_add_32 node:$a, node:$b)>;
  1168. def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1169. (atomic_load_add_32 node:$a, node:$b)>;
  1170. def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1171. (atomic_load_add_64 node:$a, node:$b)>;
  1172. def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1173. (atomic_load_add_64 node:$a, node:$b)>;
  1174. def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1175. (atomic_load_add_64 node:$a, node:$b)>;
  1176. def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1177. (atomic_load_fadd node:$a, node:$b)>;
  1178. def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1179. (atomic_load_fadd node:$a, node:$b)>;
  1180. def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1181. (atomic_load_fadd node:$a, node:$b)>;
  1182. defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
  1183. atomic_load_add_32_g, i32imm, imm>;
  1184. defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
  1185. atomic_load_add_32_s, i32imm, imm>;
  1186. defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
  1187. atomic_load_add_32_gen, i32imm, imm>;
  1188. defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1189. ".add", atomic_load_add_32_gen, i32imm, imm>;
  1190. defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
  1191. atomic_load_add_64_g, i64imm, imm>;
  1192. defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
  1193. atomic_load_add_64_s, i64imm, imm>;
  1194. defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
  1195. atomic_load_add_64_gen, i64imm, imm>;
  1196. defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1197. ".add", atomic_load_add_64_gen, i64imm, imm>;
  1198. defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
  1199. atomic_load_add_g, f32imm, fpimm>;
  1200. defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
  1201. atomic_load_add_s, f32imm, fpimm>;
  1202. defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
  1203. atomic_load_add_gen, f32imm, fpimm>;
  1204. defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
  1205. atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
  1206. defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
  1207. atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
  1208. defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
  1209. atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
  1210. // atom_sub
  1211. def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1212. (atomic_load_sub_32 node:$a, node:$b)>;
  1213. def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1214. (atomic_load_sub_32 node:$a, node:$b)>;
  1215. def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1216. (atomic_load_sub_32 node:$a, node:$b)>;
  1217. def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1218. (atomic_load_sub_64 node:$a, node:$b)>;
  1219. def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1220. (atomic_load_sub_64 node:$a, node:$b)>;
  1221. def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1222. (atomic_load_sub_64 node:$a, node:$b)>;
  1223. defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
  1224. atomic_load_sub_32_g>;
  1225. defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
  1226. atomic_load_sub_64_g>;
  1227. defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
  1228. atomic_load_sub_32_gen>;
  1229. defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
  1230. ".add", atomic_load_sub_32_gen>;
  1231. defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
  1232. atomic_load_sub_32_s>;
  1233. defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
  1234. atomic_load_sub_64_s>;
  1235. defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
  1236. atomic_load_sub_64_gen>;
  1237. defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
  1238. ".add", atomic_load_sub_64_gen>;
  1239. // atom_swap
  1240. def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1241. (atomic_swap_32 node:$a, node:$b)>;
  1242. def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1243. (atomic_swap_32 node:$a, node:$b)>;
  1244. def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1245. (atomic_swap_32 node:$a, node:$b)>;
  1246. def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1247. (atomic_swap_64 node:$a, node:$b)>;
  1248. def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1249. (atomic_swap_64 node:$a, node:$b)>;
  1250. def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1251. (atomic_swap_64 node:$a, node:$b)>;
  1252. defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
  1253. atomic_swap_32_g, i32imm, imm>;
  1254. defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
  1255. atomic_swap_32_s, i32imm, imm>;
  1256. defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
  1257. atomic_swap_32_gen, i32imm, imm>;
  1258. defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1259. ".exch", atomic_swap_32_gen, i32imm, imm>;
  1260. defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
  1261. atomic_swap_64_g, i64imm, imm>;
  1262. defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
  1263. atomic_swap_64_s, i64imm, imm>;
  1264. defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
  1265. atomic_swap_64_gen, i64imm, imm>;
  1266. defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1267. ".exch", atomic_swap_64_gen, i64imm, imm>;
  1268. // atom_max
  1269. def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
  1270. , (atomic_load_max_32 node:$a, node:$b)>;
  1271. def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1272. (atomic_load_max_32 node:$a, node:$b)>;
  1273. def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1274. (atomic_load_max_32 node:$a, node:$b)>;
  1275. def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
  1276. , (atomic_load_max_64 node:$a, node:$b)>;
  1277. def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1278. (atomic_load_max_64 node:$a, node:$b)>;
  1279. def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1280. (atomic_load_max_64 node:$a, node:$b)>;
  1281. def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1282. (atomic_load_umax_32 node:$a, node:$b)>;
  1283. def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1284. (atomic_load_umax_32 node:$a, node:$b)>;
  1285. def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1286. (atomic_load_umax_32 node:$a, node:$b)>;
  1287. def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1288. (atomic_load_umax_64 node:$a, node:$b)>;
  1289. def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1290. (atomic_load_umax_64 node:$a, node:$b)>;
  1291. def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1292. (atomic_load_umax_64 node:$a, node:$b)>;
  1293. defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
  1294. ".max", atomic_load_max_32_g, i32imm, imm>;
  1295. defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
  1296. ".max", atomic_load_max_32_s, i32imm, imm>;
  1297. defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
  1298. atomic_load_max_32_gen, i32imm, imm>;
  1299. defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1300. ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
  1301. defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
  1302. ".max", atomic_load_max_64_g, i64imm, imm>;
  1303. defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
  1304. ".max", atomic_load_max_64_s, i64imm, imm>;
  1305. defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
  1306. atomic_load_max_64_gen, i64imm, imm>;
  1307. defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1308. ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
  1309. defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1310. ".max", atomic_load_umax_32_g, i32imm, imm>;
  1311. defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
  1312. ".max", atomic_load_umax_32_s, i32imm, imm>;
  1313. defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
  1314. atomic_load_umax_32_gen, i32imm, imm>;
  1315. defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1316. ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
  1317. defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1318. ".max", atomic_load_umax_64_g, i64imm, imm>;
  1319. defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
  1320. ".max", atomic_load_umax_64_s, i64imm, imm>;
  1321. defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
  1322. atomic_load_umax_64_gen, i64imm, imm>;
  1323. defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1324. ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
  1325. // atom_min
  1326. def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1327. (atomic_load_min_32 node:$a, node:$b)>;
  1328. def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1329. (atomic_load_min_32 node:$a, node:$b)>;
  1330. def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1331. (atomic_load_min_32 node:$a, node:$b)>;
  1332. def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1333. (atomic_load_min_64 node:$a, node:$b)>;
  1334. def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1335. (atomic_load_min_64 node:$a, node:$b)>;
  1336. def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1337. (atomic_load_min_64 node:$a, node:$b)>;
  1338. def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1339. (atomic_load_umin_32 node:$a, node:$b)>;
  1340. def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1341. (atomic_load_umin_32 node:$a, node:$b)>;
  1342. def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1343. (atomic_load_umin_32 node:$a, node:$b)>;
  1344. def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1345. (atomic_load_umin_64 node:$a, node:$b)>;
  1346. def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1347. (atomic_load_umin_64 node:$a, node:$b)>;
  1348. def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1349. (atomic_load_umin_64 node:$a, node:$b)>;
  1350. defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
  1351. ".min", atomic_load_min_32_g, i32imm, imm>;
  1352. defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
  1353. ".min", atomic_load_min_32_s, i32imm, imm>;
  1354. defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
  1355. atomic_load_min_32_gen, i32imm, imm>;
  1356. defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1357. ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
  1358. defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
  1359. ".min", atomic_load_min_64_g, i64imm, imm>;
  1360. defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
  1361. ".min", atomic_load_min_64_s, i64imm, imm>;
  1362. defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
  1363. atomic_load_min_64_gen, i64imm, imm>;
  1364. defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1365. ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
  1366. defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1367. ".min", atomic_load_umin_32_g, i32imm, imm>;
  1368. defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
  1369. ".min", atomic_load_umin_32_s, i32imm, imm>;
  1370. defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
  1371. atomic_load_umin_32_gen, i32imm, imm>;
  1372. defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1373. ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
  1374. defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1375. ".min", atomic_load_umin_64_g, i64imm, imm>;
  1376. defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
  1377. ".min", atomic_load_umin_64_s, i64imm, imm>;
  1378. defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
  1379. atomic_load_umin_64_gen, i64imm, imm>;
  1380. defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1381. ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
  1382. // atom_inc atom_dec
  1383. def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1384. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1385. def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1386. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1387. def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1388. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1389. def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1390. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1391. def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1392. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1393. def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1394. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1395. defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
  1396. atomic_load_inc_32_g, i32imm, imm>;
  1397. defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
  1398. atomic_load_inc_32_s, i32imm, imm>;
  1399. defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
  1400. atomic_load_inc_32_gen, i32imm, imm>;
  1401. defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1402. ".inc", atomic_load_inc_32_gen, i32imm, imm>;
  1403. defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
  1404. atomic_load_dec_32_g, i32imm, imm>;
  1405. defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
  1406. atomic_load_dec_32_s, i32imm, imm>;
  1407. defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
  1408. atomic_load_dec_32_gen, i32imm, imm>;
  1409. defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1410. ".dec", atomic_load_dec_32_gen, i32imm, imm>;
  1411. // atom_and
  1412. def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1413. (atomic_load_and_32 node:$a, node:$b)>;
  1414. def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1415. (atomic_load_and_32 node:$a, node:$b)>;
  1416. def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1417. (atomic_load_and_32 node:$a, node:$b)>;
  1418. def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1419. (atomic_load_and_64 node:$a, node:$b)>;
  1420. def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1421. (atomic_load_and_64 node:$a, node:$b)>;
  1422. def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1423. (atomic_load_and_64 node:$a, node:$b)>;
  1424. defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
  1425. atomic_load_and_32_g, i32imm, imm>;
  1426. defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
  1427. atomic_load_and_32_s, i32imm, imm>;
  1428. defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
  1429. atomic_load_and_32_gen, i32imm, imm>;
  1430. defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1431. ".and", atomic_load_and_32_gen, i32imm, imm>;
  1432. defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
  1433. atomic_load_and_64_g, i64imm, imm>;
  1434. defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
  1435. atomic_load_and_64_s, i64imm, imm>;
  1436. defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
  1437. atomic_load_and_64_gen, i64imm, imm>;
  1438. defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1439. ".and", atomic_load_and_64_gen, i64imm, imm>;
  1440. // atom_or
  1441. def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1442. (atomic_load_or_32 node:$a, node:$b)>;
  1443. def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1444. (atomic_load_or_32 node:$a, node:$b)>;
  1445. def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1446. (atomic_load_or_32 node:$a, node:$b)>;
  1447. def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1448. (atomic_load_or_64 node:$a, node:$b)>;
  1449. def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1450. (atomic_load_or_64 node:$a, node:$b)>;
  1451. def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1452. (atomic_load_or_64 node:$a, node:$b)>;
  1453. defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
  1454. atomic_load_or_32_g, i32imm, imm>;
  1455. defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
  1456. atomic_load_or_32_gen, i32imm, imm>;
  1457. defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1458. ".or", atomic_load_or_32_gen, i32imm, imm>;
  1459. defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
  1460. atomic_load_or_32_s, i32imm, imm>;
  1461. defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
  1462. atomic_load_or_64_g, i64imm, imm>;
  1463. defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
  1464. atomic_load_or_64_gen, i64imm, imm>;
  1465. defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1466. ".or", atomic_load_or_64_gen, i64imm, imm>;
  1467. defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
  1468. atomic_load_or_64_s, i64imm, imm>;
  1469. // atom_xor
  1470. def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1471. (atomic_load_xor_32 node:$a, node:$b)>;
  1472. def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1473. (atomic_load_xor_32 node:$a, node:$b)>;
  1474. def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1475. (atomic_load_xor_32 node:$a, node:$b)>;
  1476. def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1477. (atomic_load_xor_64 node:$a, node:$b)>;
  1478. def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1479. (atomic_load_xor_64 node:$a, node:$b)>;
  1480. def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1481. (atomic_load_xor_64 node:$a, node:$b)>;
  1482. defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
  1483. atomic_load_xor_32_g, i32imm, imm>;
  1484. defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
  1485. atomic_load_xor_32_s, i32imm, imm>;
  1486. defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
  1487. atomic_load_xor_32_gen, i32imm, imm>;
  1488. defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1489. ".xor", atomic_load_xor_32_gen, i32imm, imm>;
  1490. defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
  1491. atomic_load_xor_64_g, i64imm, imm>;
  1492. defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
  1493. atomic_load_xor_64_s, i64imm, imm>;
  1494. defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
  1495. atomic_load_xor_64_gen, i64imm, imm>;
  1496. defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1497. ".xor", atomic_load_xor_64_gen, i64imm, imm>;
  1498. // atom_cas
  1499. def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
  1500. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1501. def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
  1502. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1503. def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
  1504. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1505. def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
  1506. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1507. def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
  1508. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1509. def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
  1510. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1511. defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
  1512. atomic_cmp_swap_32_g, i32imm>;
  1513. defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
  1514. atomic_cmp_swap_32_s, i32imm>;
  1515. defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
  1516. atomic_cmp_swap_32_gen, i32imm>;
  1517. defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
  1518. ".cas", atomic_cmp_swap_32_gen, i32imm>;
  1519. defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
  1520. atomic_cmp_swap_64_g, i64imm>;
  1521. defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
  1522. atomic_cmp_swap_64_s, i64imm>;
  1523. defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
  1524. atomic_cmp_swap_64_gen, i64imm>;
  1525. defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
  1526. ".cas", atomic_cmp_swap_64_gen, i64imm>;
  1527. // Support for scoped atomic operations. Matches
  1528. // int_nvvm_atomic_{op}_{space}_{type}_{scope}
  1529. // and converts it into the appropriate instruction.
  1530. // NOTE: not all possible combinations are implemented
  1531. // 'space' is limited to generic as it's the only one needed to support CUDA.
  1532. // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
  1533. class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
  1534. dag ins, dag Operands>
  1535. : NVPTXInst<(outs regclass:$result), ins,
  1536. AsmStr,
  1537. [(set regclass:$result, Operands)]>,
  1538. Requires<Preds>;
  1539. // Define instruction variants for all addressing modes.
  1540. multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
  1541. NVPTXRegClass regclass, Operand ImmType,
  1542. SDNode Imm, ValueType ImmTy,
  1543. list<Predicate> Preds> {
  1544. let AddedComplexity = 1 in {
  1545. def : ATOM23_impl<AsmStr, regclass, Preds,
  1546. (ins Int32Regs:$src, regclass:$b),
  1547. (Intr Int32Regs:$src, regclass:$b)>;
  1548. def : ATOM23_impl<AsmStr, regclass, Preds,
  1549. (ins Int64Regs:$src, regclass:$b),
  1550. (Intr Int64Regs:$src, regclass:$b)>;
  1551. }
  1552. // tablegen can't infer argument types from Intrinsic (though it can
  1553. // from Instruction) so we have to enforce specific type on
  1554. // immediates via explicit cast to ImmTy.
  1555. def : ATOM23_impl<AsmStr, regclass, Preds,
  1556. (ins Int32Regs:$src, ImmType:$b),
  1557. (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
  1558. def : ATOM23_impl<AsmStr, regclass, Preds,
  1559. (ins Int64Regs:$src, ImmType:$b),
  1560. (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
  1561. }
  1562. multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
  1563. NVPTXRegClass regclass, Operand ImmType,
  1564. SDNode Imm, ValueType ImmTy,
  1565. list<Predicate> Preds> {
  1566. // Variants for register/immediate permutations of $b and $c
  1567. let AddedComplexity = 2 in {
  1568. def : ATOM23_impl<AsmStr, regclass, Preds,
  1569. (ins Int32Regs:$src, regclass:$b, regclass:$c),
  1570. (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
  1571. def : ATOM23_impl<AsmStr, regclass, Preds,
  1572. (ins Int64Regs:$src, regclass:$b, regclass:$c),
  1573. (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
  1574. }
  1575. let AddedComplexity = 1 in {
  1576. def : ATOM23_impl<AsmStr, regclass, Preds,
  1577. (ins Int32Regs:$src, ImmType:$b, regclass:$c),
  1578. (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
  1579. def : ATOM23_impl<AsmStr, regclass, Preds,
  1580. (ins Int64Regs:$src, ImmType:$b, regclass:$c),
  1581. (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
  1582. def : ATOM23_impl<AsmStr, regclass, Preds,
  1583. (ins Int32Regs:$src, regclass:$b, ImmType:$c),
  1584. (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
  1585. def : ATOM23_impl<AsmStr, regclass, Preds,
  1586. (ins Int64Regs:$src, regclass:$b, ImmType:$c),
  1587. (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
  1588. }
  1589. def : ATOM23_impl<AsmStr, regclass, Preds,
  1590. (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
  1591. (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
  1592. def : ATOM23_impl<AsmStr, regclass, Preds,
  1593. (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
  1594. (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
  1595. }
  1596. // Constructs instrinsic name and instruction asm strings.
  1597. multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
  1598. string ScopeStr, string SpaceStr,
  1599. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1600. ValueType ImmTy, list<Predicate> Preds> {
  1601. defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
  1602. # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
  1603. # "." # OpStr # "." # TypeStr
  1604. # " \t$result, [$src], $b;",
  1605. !cast<Intrinsic>(
  1606. "int_nvvm_atomic_" # OpStr
  1607. # "_" # SpaceStr # "_" # IntTypeStr
  1608. # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
  1609. regclass, ImmType, Imm, ImmTy, Preds>;
  1610. }
  1611. multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
  1612. string ScopeStr, string SpaceStr,
  1613. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1614. ValueType ImmTy, list<Predicate> Preds> {
  1615. defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
  1616. # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
  1617. # "." # OpStr # "." # TypeStr
  1618. # " \t$result, [$src], $b, $c;",
  1619. !cast<Intrinsic>(
  1620. "int_nvvm_atomic_" # OpStr
  1621. # "_" # SpaceStr # "_" # IntTypeStr
  1622. # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
  1623. regclass, ImmType, Imm, ImmTy, Preds>;
  1624. }
  1625. // Constructs variants for different address spaces.
  1626. // For now we only need variants for generic space pointers.
  1627. multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
  1628. string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
  1629. SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
  1630. defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
  1631. regclass, ImmType, Imm, ImmTy, Preds>;
  1632. }
  1633. multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
  1634. string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
  1635. SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
  1636. defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
  1637. regclass, ImmType, Imm, ImmTy, Preds>;
  1638. }
  1639. // Constructs variants for different scopes of atomic op.
  1640. multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
  1641. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1642. ValueType ImmTy, list<Predicate> Preds> {
  1643. // .gpu scope is default and is currently covered by existing
  1644. // atomics w/o explicitly specified scope.
  1645. defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
  1646. regclass, ImmType, Imm, ImmTy,
  1647. !listconcat(Preds,[hasAtomScope])>;
  1648. defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
  1649. regclass, ImmType, Imm, ImmTy,
  1650. !listconcat(Preds,[hasAtomScope])>;
  1651. }
  1652. multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
  1653. NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
  1654. list<Predicate> Preds> {
  1655. // No need to define ".gpu"-scoped atomics. They do the same thing
  1656. // as the regular, non-scoped atomics defined elsewhere.
  1657. defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
  1658. regclass, ImmType, Imm, ImmTy,
  1659. !listconcat(Preds,[hasAtomScope])>;
  1660. defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
  1661. regclass, ImmType, Imm, ImmTy,
  1662. !listconcat(Preds,[hasAtomScope])>;
  1663. }
  1664. // atom.add
  1665. multiclass ATOM2_add_impl<string OpStr> {
  1666. defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
  1667. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1668. defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
  1669. defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
  1670. []>;
  1671. defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
  1672. [hasAtomAddF64]>;
  1673. }
  1674. // atom.{and,or,xor}
  1675. multiclass ATOM2_bitwise_impl<string OpStr> {
  1676. defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1677. defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
  1678. [hasAtomBitwise64]>;
  1679. }
  1680. // atom.exch
  1681. multiclass ATOM2_exch_impl<string OpStr> {
  1682. defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1683. defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
  1684. }
  1685. // atom.{min,max}
  1686. multiclass ATOM2_minmax_impl<string OpStr> {
  1687. defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
  1688. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1689. defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
  1690. [hasAtomMinMax64]>;
  1691. defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
  1692. [hasAtomMinMax64]>;
  1693. }
  1694. // atom.{inc,dec}
  1695. multiclass ATOM2_incdec_impl<string OpStr> {
  1696. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1697. }
  1698. // atom.cas
  1699. multiclass ATOM3_cas_impl<string OpStr> {
  1700. defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1701. defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
  1702. }
  1703. defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
  1704. defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
  1705. defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
  1706. defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
  1707. defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
  1708. defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
  1709. defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
  1710. defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
  1711. defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
  1712. defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
  1713. //-----------------------------------
  1714. // Support for ldu on sm_20 or later
  1715. //-----------------------------------
  1716. // Don't annotate ldu instructions as mayLoad, as they load from memory that is
  1717. // read-only in a kernel.
  1718. // Scalar
  1719. multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
  1720. def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
  1721. !strconcat("ldu.global.", TyStr),
  1722. []>, Requires<[hasLDU]>;
  1723. def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
  1724. !strconcat("ldu.global.", TyStr),
  1725. []>, Requires<[hasLDU]>;
  1726. def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
  1727. !strconcat("ldu.global.", TyStr),
  1728. []>, Requires<[hasLDU]>;
  1729. def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
  1730. !strconcat("ldu.global.", TyStr),
  1731. []>, Requires<[hasLDU]>;
  1732. def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
  1733. !strconcat("ldu.global.", TyStr),
  1734. []>, Requires<[hasLDU]>;
  1735. }
  1736. defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
  1737. defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
  1738. defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
  1739. defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
  1740. defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
  1741. defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
  1742. defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
  1743. defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
  1744. defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
  1745. defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
  1746. // vector
  1747. // Elementized vector ldu
  1748. multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
  1749. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1750. (ins Int32Regs:$src),
  1751. !strconcat("ldu.global.", TyStr), []>;
  1752. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1753. (ins Int64Regs:$src),
  1754. !strconcat("ldu.global.", TyStr), []>;
  1755. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1756. (ins MEMri:$src),
  1757. !strconcat("ldu.global.", TyStr), []>;
  1758. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1759. (ins MEMri64:$src),
  1760. !strconcat("ldu.global.", TyStr), []>;
  1761. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1762. (ins imemAny:$src),
  1763. !strconcat("ldu.global.", TyStr), []>;
  1764. }
  1765. multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
  1766. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1767. regclass:$dst4), (ins Int32Regs:$src),
  1768. !strconcat("ldu.global.", TyStr), []>;
  1769. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1770. regclass:$dst4), (ins Int64Regs:$src),
  1771. !strconcat("ldu.global.", TyStr), []>;
  1772. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1773. regclass:$dst4), (ins MEMri:$src),
  1774. !strconcat("ldu.global.", TyStr), []>;
  1775. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1776. regclass:$dst4), (ins MEMri64:$src),
  1777. !strconcat("ldu.global.", TyStr), []>;
  1778. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1779. regclass:$dst4), (ins imemAny:$src),
  1780. !strconcat("ldu.global.", TyStr), []>;
  1781. }
  1782. defm INT_PTX_LDU_G_v2i8_ELE
  1783. : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1784. defm INT_PTX_LDU_G_v2i16_ELE
  1785. : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1786. defm INT_PTX_LDU_G_v2i32_ELE
  1787. : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
  1788. defm INT_PTX_LDU_G_v2f16_ELE
  1789. : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
  1790. defm INT_PTX_LDU_G_v2f16x2_ELE
  1791. : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
  1792. defm INT_PTX_LDU_G_v2f32_ELE
  1793. : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
  1794. defm INT_PTX_LDU_G_v2i64_ELE
  1795. : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
  1796. defm INT_PTX_LDU_G_v2f64_ELE
  1797. : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
  1798. defm INT_PTX_LDU_G_v4i8_ELE
  1799. : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  1800. defm INT_PTX_LDU_G_v4i16_ELE
  1801. : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1802. Int16Regs>;
  1803. defm INT_PTX_LDU_G_v4i32_ELE
  1804. : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1805. Int32Regs>;
  1806. defm INT_PTX_LDU_G_v4f16_ELE
  1807. : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1808. Float16Regs>;
  1809. defm INT_PTX_LDU_G_v4f16x2_ELE
  1810. : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1811. Float16x2Regs>;
  1812. defm INT_PTX_LDU_G_v4f32_ELE
  1813. : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1814. Float32Regs>;
  1815. //-----------------------------------
  1816. // Support for ldg on sm_35 or later
  1817. //-----------------------------------
  1818. // Don't annotate ld.global.nc as mayLoad, because these loads go through the
  1819. // non-coherent texture cache, and therefore the values read must be read-only
  1820. // during the lifetime of the kernel.
  1821. multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
  1822. def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
  1823. !strconcat("ld.global.nc.", TyStr),
  1824. []>, Requires<[hasLDG]>;
  1825. def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
  1826. !strconcat("ld.global.nc.", TyStr),
  1827. []>, Requires<[hasLDG]>;
  1828. def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
  1829. !strconcat("ld.global.nc.", TyStr),
  1830. []>, Requires<[hasLDG]>;
  1831. def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
  1832. !strconcat("ld.global.nc.", TyStr),
  1833. []>, Requires<[hasLDG]>;
  1834. def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
  1835. !strconcat("ld.global.nc.", TyStr),
  1836. []>, Requires<[hasLDG]>;
  1837. }
  1838. defm INT_PTX_LDG_GLOBAL_i8
  1839. : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
  1840. defm INT_PTX_LDG_GLOBAL_i16
  1841. : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
  1842. defm INT_PTX_LDG_GLOBAL_i32
  1843. : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
  1844. defm INT_PTX_LDG_GLOBAL_i64
  1845. : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
  1846. defm INT_PTX_LDG_GLOBAL_f16
  1847. : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
  1848. defm INT_PTX_LDG_GLOBAL_f16x2
  1849. : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
  1850. defm INT_PTX_LDG_GLOBAL_f32
  1851. : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
  1852. defm INT_PTX_LDG_GLOBAL_f64
  1853. : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
  1854. defm INT_PTX_LDG_GLOBAL_p32
  1855. : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
  1856. defm INT_PTX_LDG_GLOBAL_p64
  1857. : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
  1858. // vector
  1859. // Elementized vector ldg
  1860. multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
  1861. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1862. (ins Int32Regs:$src),
  1863. !strconcat("ld.global.nc.", TyStr), []>;
  1864. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1865. (ins Int64Regs:$src),
  1866. !strconcat("ld.global.nc.", TyStr), []>;
  1867. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1868. (ins MEMri:$src),
  1869. !strconcat("ld.global.nc.", TyStr), []>;
  1870. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1871. (ins MEMri64:$src),
  1872. !strconcat("ld.global.nc.", TyStr), []>;
  1873. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1874. (ins imemAny:$src),
  1875. !strconcat("ld.global.nc.", TyStr), []>;
  1876. }
  1877. multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
  1878. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1879. regclass:$dst4), (ins Int32Regs:$src),
  1880. !strconcat("ld.global.nc.", TyStr), []>;
  1881. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1882. regclass:$dst4), (ins Int64Regs:$src),
  1883. !strconcat("ld.global.nc.", TyStr), []>;
  1884. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1885. regclass:$dst4), (ins MEMri:$src),
  1886. !strconcat("ld.global.nc.", TyStr), []>;
  1887. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1888. regclass:$dst4), (ins MEMri64:$src),
  1889. !strconcat("ld.global.nc.", TyStr), []>;
  1890. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1891. regclass:$dst4), (ins imemAny:$src),
  1892. !strconcat("ld.global.nc.", TyStr), []>;
  1893. }
  1894. // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
  1895. defm INT_PTX_LDG_G_v2i8_ELE
  1896. : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1897. defm INT_PTX_LDG_G_v2i16_ELE
  1898. : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1899. defm INT_PTX_LDG_G_v2i32_ELE
  1900. : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
  1901. defm INT_PTX_LDG_G_v2f16_ELE
  1902. : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
  1903. defm INT_PTX_LDG_G_v2f16x2_ELE
  1904. : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
  1905. defm INT_PTX_LDG_G_v2f32_ELE
  1906. : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
  1907. defm INT_PTX_LDG_G_v2i64_ELE
  1908. : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
  1909. defm INT_PTX_LDG_G_v2f64_ELE
  1910. : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
  1911. defm INT_PTX_LDG_G_v4i8_ELE
  1912. : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  1913. defm INT_PTX_LDG_G_v4i16_ELE
  1914. : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  1915. defm INT_PTX_LDG_G_v4i32_ELE
  1916. : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
  1917. defm INT_PTX_LDG_G_v4f16_ELE
  1918. : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
  1919. defm INT_PTX_LDG_G_v4f16x2_ELE
  1920. : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
  1921. defm INT_PTX_LDG_G_v4f32_ELE
  1922. : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
  1923. multiclass NG_TO_G<string Str, Intrinsic Intrin> {
  1924. def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
  1925. !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
  1926. [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
  1927. def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
  1928. !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
  1929. [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
  1930. def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
  1931. "{{ .reg .b64 %tmp;\n\t"
  1932. #" cvt.u64.u32 \t%tmp, $src;\n\t"
  1933. #" cvta." # Str # ".u64 \t$result, %tmp; }}",
  1934. [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
  1935. Requires<[useShortPtr]>;
  1936. }
  1937. multiclass G_TO_NG<string Str, Intrinsic Intrin> {
  1938. def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
  1939. !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
  1940. [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
  1941. def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
  1942. !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
  1943. [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
  1944. def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
  1945. "{{ .reg .b64 %tmp;\n\t"
  1946. #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
  1947. #" cvt.u32.u64 \t$result, %tmp; }}",
  1948. [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
  1949. Requires<[useShortPtr]>;
  1950. }
  1951. defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
  1952. defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
  1953. defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
  1954. defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
  1955. defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
  1956. defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
  1957. defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
  1958. defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
  1959. // nvvm.ptr.gen.to.param
  1960. def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
  1961. (ins Int32Regs:$src),
  1962. "mov.u32 \t$result, $src;",
  1963. [(set Int32Regs:$result,
  1964. (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
  1965. def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
  1966. (ins Int64Regs:$src),
  1967. "mov.u64 \t$result, $src;",
  1968. [(set Int64Regs:$result,
  1969. (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
  1970. // nvvm.move intrinsicc
  1971. def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
  1972. "mov.b16 \t$r, $s;",
  1973. [(set Int16Regs:$r,
  1974. (int_nvvm_move_i16 Int16Regs:$s))]>;
  1975. def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
  1976. "mov.b32 \t$r, $s;",
  1977. [(set Int32Regs:$r,
  1978. (int_nvvm_move_i32 Int32Regs:$s))]>;
  1979. def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
  1980. "mov.b64 \t$r, $s;",
  1981. [(set Int64Regs:$r,
  1982. (int_nvvm_move_i64 Int64Regs:$s))]>;
  1983. def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
  1984. "mov.f32 \t$r, $s;",
  1985. [(set Float32Regs:$r,
  1986. (int_nvvm_move_float Float32Regs:$s))]>;
  1987. def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
  1988. "mov.f64 \t$r, $s;",
  1989. [(set Float64Regs:$r,
  1990. (int_nvvm_move_double Float64Regs:$s))]>;
  1991. def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
  1992. "mov.u32 \t$r, $s;",
  1993. [(set Int32Regs:$r,
  1994. (int_nvvm_move_ptr Int32Regs:$s))]>;
  1995. def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
  1996. "mov.u64 \t$r, $s;",
  1997. [(set Int64Regs:$r,
  1998. (int_nvvm_move_ptr Int64Regs:$s))]>;
  1999. // @TODO: Are these actually needed, or will we always just see symbols
  2000. // copied to registers first?
  2001. /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
  2002. "mov.u32 \t$r, $s;",
  2003. [(set Int32Regs:$r,
  2004. (int_nvvm_move_ptr texternalsym:$s))]>;
  2005. def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
  2006. "mov.u64 \t$r, $s;",
  2007. [(set Int64Regs:$r,
  2008. (int_nvvm_move_ptr texternalsym:$s))]>;*/
  2009. // MoveParam %r1, param
  2010. // ptr_local_to_gen %r2, %r1
  2011. // ptr_gen_to_local %r3, %r2
  2012. // ->
  2013. // mov %r1, param
  2014. // @TODO: Revisit this. There is a type
  2015. // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
  2016. // instructions are not currently defined. However, we can use the ptr
  2017. // variants and the asm printer will do the right thing.
  2018. def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
  2019. (MoveParam texternalsym:$src)))),
  2020. (nvvm_move_ptr64 texternalsym:$src)>;
  2021. def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
  2022. (MoveParam texternalsym:$src)))),
  2023. (nvvm_move_ptr32 texternalsym:$src)>;
  2024. def texsurf_handles
  2025. : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
  2026. "mov.u64 \t$result, $src;", []>;
  2027. //-----------------------------------
  2028. // Compiler Error Warn
  2029. // - Just ignore them in codegen
  2030. //-----------------------------------
  2031. def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
  2032. "// llvm.nvvm.compiler.warn()",
  2033. [(int_nvvm_compiler_warn Int32Regs:$a)]>;
  2034. def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
  2035. "// llvm.nvvm.compiler.warn()",
  2036. [(int_nvvm_compiler_warn Int64Regs:$a)]>;
  2037. def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
  2038. "// llvm.nvvm.compiler.error()",
  2039. [(int_nvvm_compiler_error Int32Regs:$a)]>;
  2040. def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
  2041. "// llvm.nvvm.compiler.error()",
  2042. [(int_nvvm_compiler_error Int64Regs:$a)]>;
  2043. // isspacep
  2044. def ISSPACEP_CONST_32
  2045. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2046. "isspacep.const \t$d, $a;",
  2047. [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
  2048. Requires<[hasPTX31]>;
  2049. def ISSPACEP_CONST_64
  2050. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2051. "isspacep.const \t$d, $a;",
  2052. [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
  2053. Requires<[hasPTX31]>;
  2054. def ISSPACEP_GLOBAL_32
  2055. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2056. "isspacep.global \t$d, $a;",
  2057. [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
  2058. def ISSPACEP_GLOBAL_64
  2059. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2060. "isspacep.global \t$d, $a;",
  2061. [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
  2062. def ISSPACEP_LOCAL_32
  2063. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2064. "isspacep.local \t$d, $a;",
  2065. [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
  2066. def ISSPACEP_LOCAL_64
  2067. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2068. "isspacep.local \t$d, $a;",
  2069. [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
  2070. def ISSPACEP_SHARED_32
  2071. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2072. "isspacep.shared \t$d, $a;",
  2073. [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
  2074. def ISSPACEP_SHARED_64
  2075. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2076. "isspacep.shared \t$d, $a;",
  2077. [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
  2078. // Special register reads
  2079. def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
  2080. (ins SpecialRegs:$r),
  2081. "mov.b32 \t$d, $r;", []>;
  2082. def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
  2083. def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
  2084. def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
  2085. def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
  2086. def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
  2087. def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
  2088. def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
  2089. def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
  2090. def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
  2091. def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
  2092. def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
  2093. def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
  2094. def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
  2095. def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
  2096. def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
  2097. def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
  2098. def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
  2099. def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
  2100. def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
  2101. def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
  2102. def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
  2103. def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
  2104. def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
  2105. def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
  2106. def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
  2107. def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
  2108. def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
  2109. def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
  2110. def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
  2111. def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
  2112. def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
  2113. def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
  2114. // rotate builtin support
  2115. def ROTATE_B32_HW_IMM
  2116. : NVPTXInst<(outs Int32Regs:$dst),
  2117. (ins Int32Regs:$src, i32imm:$amt),
  2118. "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
  2119. [(set Int32Regs:$dst,
  2120. (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
  2121. Requires<[hasHWROT32]> ;
  2122. def ROTATE_B32_HW_REG
  2123. : NVPTXInst<(outs Int32Regs:$dst),
  2124. (ins Int32Regs:$src, Int32Regs:$amt),
  2125. "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
  2126. [(set Int32Regs:$dst,
  2127. (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
  2128. Requires<[hasHWROT32]> ;
  2129. def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
  2130. (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
  2131. Requires<[noHWROT32]> ;
  2132. def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
  2133. (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
  2134. Requires<[noHWROT32]> ;
  2135. let hasSideEffects = false in {
  2136. def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
  2137. !strconcat("{{\n\t",
  2138. ".reg .b32 %dummy;\n\t",
  2139. "mov.b64 \t{$dst,%dummy}, $src;\n\t",
  2140. "}}"),
  2141. []> ;
  2142. def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
  2143. !strconcat("{{\n\t",
  2144. ".reg .b32 %dummy;\n\t",
  2145. "mov.b64 \t{%dummy,$dst}, $src;\n\t",
  2146. "}}"),
  2147. []> ;
  2148. }
  2149. let hasSideEffects = false in {
  2150. def PACK_TWO_INT32
  2151. : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
  2152. "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
  2153. }
  2154. def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
  2155. (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
  2156. (GET_LO_INT64 Int64Regs:$src))> ;
  2157. // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
  2158. // no side effects.
  2159. let hasSideEffects = false in {
  2160. def SHF_L_WRAP_B32_IMM
  2161. : NVPTXInst<(outs Int32Regs:$dst),
  2162. (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
  2163. "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2164. Requires<[hasHWROT32]>;
  2165. def SHF_L_WRAP_B32_REG
  2166. : NVPTXInst<(outs Int32Regs:$dst),
  2167. (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
  2168. "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2169. Requires<[hasHWROT32]>;
  2170. def SHF_R_WRAP_B32_IMM
  2171. : NVPTXInst<(outs Int32Regs:$dst),
  2172. (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
  2173. "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2174. Requires<[hasHWROT32]>;
  2175. def SHF_R_WRAP_B32_REG
  2176. : NVPTXInst<(outs Int32Regs:$dst),
  2177. (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
  2178. "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2179. Requires<[hasHWROT32]>;
  2180. }
  2181. // HW version of rotate 64
  2182. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
  2183. (PACK_TWO_INT32
  2184. (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
  2185. (GET_LO_INT64 Int64Regs:$src), imm:$amt),
  2186. (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
  2187. (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
  2188. Requires<[hasHWROT32]>;
  2189. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
  2190. (PACK_TWO_INT32
  2191. (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
  2192. (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
  2193. (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
  2194. (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
  2195. Requires<[hasHWROT32]>;
  2196. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
  2197. (PACK_TWO_INT32
  2198. (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
  2199. (GET_HI_INT64 Int64Regs:$src), imm:$amt),
  2200. (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
  2201. (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
  2202. Requires<[hasHWROT32]>;
  2203. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
  2204. (PACK_TWO_INT32
  2205. (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
  2206. (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
  2207. (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
  2208. (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
  2209. Requires<[hasHWROT32]>;
  2210. // SW version of rotate 64
  2211. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
  2212. (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
  2213. Requires<[noHWROT32]>;
  2214. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
  2215. (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
  2216. Requires<[noHWROT32]>;
  2217. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
  2218. (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
  2219. Requires<[noHWROT32]>;
  2220. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
  2221. (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
  2222. Requires<[noHWROT32]>;
  2223. //-----------------------------------
  2224. // Texture Intrinsics
  2225. //-----------------------------------
  2226. // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
  2227. // also defined in NVPTXReplaceImageHandles.cpp
  2228. // texmode_independent
  2229. let IsTex = true, IsTexModeUnified = false in {
  2230. // Texture fetch instructions using handles
  2231. class TEX_1D_base<string inst, NVPTXRegClass outtype,
  2232. NVPTXRegClass intype, dag texsamp>
  2233. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2234. outtype:$b, outtype:$a),
  2235. !con(texsamp, (ins intype:$x)),
  2236. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
  2237. []>;
  2238. multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2239. def _RR : TEX_1D_base<inst, outtype, intype,
  2240. (ins Int64Regs:$t, Int64Regs:$s)>;
  2241. def _RI : TEX_1D_base<inst, outtype, intype,
  2242. (ins Int64Regs:$t, i64imm:$s)>;
  2243. def _IR : TEX_1D_base<inst, outtype, intype,
  2244. (ins i64imm:$t, Int64Regs:$s)>;
  2245. def _II : TEX_1D_base<inst, outtype, intype,
  2246. (ins i64imm:$t, i64imm:$s)>;
  2247. }
  2248. defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2249. defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2250. defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2251. defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2252. defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2253. defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2254. class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2255. NVPTXRegClass intype, dag texsamp>
  2256. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2257. outtype:$b, outtype:$a),
  2258. !con(texsamp, (ins intype:$x, intype:$lod)),
  2259. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
  2260. []>;
  2261. multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
  2262. NVPTXRegClass intype> {
  2263. def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
  2264. (ins Int64Regs:$t, Int64Regs:$s)>;
  2265. def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
  2266. (ins Int64Regs:$t, i64imm:$s)>;
  2267. def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
  2268. (ins i64imm:$t, Int64Regs:$s)>;
  2269. def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
  2270. (ins i64imm:$t, i64imm:$s)>;
  2271. }
  2272. defm TEX_1D_F32_F32_LEVEL :
  2273. TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2274. defm TEX_1D_S32_F32_LEVEL :
  2275. TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2276. defm TEX_1D_U32_F32_LEVEL :
  2277. TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2278. class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
  2279. NVPTXRegClass intype, dag texsamp>
  2280. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2281. outtype:$b, outtype:$a),
  2282. !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
  2283. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
  2284. " \\{$gradx\\}, \\{$grady\\};",
  2285. []>;
  2286. multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
  2287. NVPTXRegClass intype> {
  2288. def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
  2289. (ins Int64Regs:$t, Int64Regs:$s)>;
  2290. def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
  2291. (ins Int64Regs:$t, i64imm:$s)>;
  2292. def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
  2293. (ins i64imm:$t, Int64Regs:$s)>;
  2294. def _II : TEX_1D_GRAD_base<inst, outtype, intype,
  2295. (ins i64imm:$t, i64imm:$s)>;
  2296. }
  2297. defm TEX_1D_F32_F32_GRAD
  2298. : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2299. defm TEX_1D_S32_F32_GRAD
  2300. : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2301. defm TEX_1D_U32_F32_GRAD
  2302. : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2303. class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2304. NVPTXRegClass intype, dag texsamp>
  2305. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2306. outtype:$b, outtype:$a),
  2307. !con(texsamp, (ins Int32Regs:$l, intype:$x)),
  2308. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
  2309. []>;
  2310. multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
  2311. NVPTXRegClass intype> {
  2312. def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
  2313. (ins Int64Regs:$t, Int64Regs:$s)>;
  2314. def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
  2315. (ins Int64Regs:$t, i64imm:$s)>;
  2316. def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
  2317. (ins i64imm:$t, Int64Regs:$s)>;
  2318. def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
  2319. (ins i64imm:$t, i64imm:$s)>;
  2320. }
  2321. defm TEX_1D_ARRAY_F32_F32
  2322. : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2323. defm TEX_1D_ARRAY_F32_S32
  2324. : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2325. defm TEX_1D_ARRAY_S32_S32
  2326. : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2327. defm TEX_1D_ARRAY_S32_F32
  2328. : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2329. defm TEX_1D_ARRAY_U32_S32
  2330. : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2331. defm TEX_1D_ARRAY_U32_F32
  2332. : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2333. class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2334. NVPTXRegClass intype, dag texsamp>
  2335. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2336. outtype:$b, outtype:$a),
  2337. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
  2338. inst # " \t\\{$r, $g, $b, $a\\},"
  2339. " [$t, $s, \\{$l, $x\\}], $lod;",
  2340. []>;
  2341. multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2342. NVPTXRegClass intype> {
  2343. def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2344. (ins Int64Regs:$t, Int64Regs:$s)>;
  2345. def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2346. (ins Int64Regs:$t, i64imm:$s)>;
  2347. def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2348. (ins i64imm:$t, Int64Regs:$s)>;
  2349. def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2350. (ins i64imm:$t, i64imm:$s)>;
  2351. }
  2352. defm TEX_1D_ARRAY_F32_F32_LEVEL
  2353. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2354. defm TEX_1D_ARRAY_S32_F32_LEVEL
  2355. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2356. defm TEX_1D_ARRAY_U32_F32_LEVEL
  2357. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2358. class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  2359. NVPTXRegClass intype, dag texsamp>
  2360. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2361. outtype:$b, outtype:$a),
  2362. !con(texsamp, (ins Int32Regs:$l, intype:$x,
  2363. intype:$gradx, intype:$grady)),
  2364. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
  2365. " \\{$gradx\\}, \\{$grady\\};",
  2366. []>;
  2367. multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  2368. NVPTXRegClass intype> {
  2369. def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2370. (ins Int64Regs:$t, Int64Regs:$s)>;
  2371. def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2372. (ins Int64Regs:$t, i64imm:$s)>;
  2373. def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2374. (ins i64imm:$t, Int64Regs:$s)>;
  2375. def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2376. (ins i64imm:$t, i64imm:$s)>;
  2377. }
  2378. defm TEX_1D_ARRAY_F32_F32_GRAD
  2379. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2380. defm TEX_1D_ARRAY_S32_F32_GRAD
  2381. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2382. defm TEX_1D_ARRAY_U32_F32_GRAD
  2383. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2384. class TEX_2D_base<string inst, NVPTXRegClass outtype,
  2385. NVPTXRegClass intype, dag texsamp>
  2386. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2387. outtype:$b, outtype:$a),
  2388. !con(texsamp, (ins intype:$x, intype:$y)),
  2389. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
  2390. []>;
  2391. multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2392. def _RR : TEX_2D_base<inst, outtype, intype,
  2393. (ins Int64Regs:$t, Int64Regs:$s)>;
  2394. def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
  2395. def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
  2396. def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
  2397. }
  2398. defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2399. defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2400. defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2401. defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2402. defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2403. defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2404. class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2405. NVPTXRegClass intype, dag texsamp>
  2406. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2407. outtype:$b, outtype:$a),
  2408. !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
  2409. inst # " \t\\{$r, $g, $b, $a\\},"
  2410. " [$t, $s, \\{$x, $y\\}], $lod;",
  2411. []>;
  2412. multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
  2413. NVPTXRegClass intype> {
  2414. def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
  2415. (ins Int64Regs:$t, Int64Regs:$s)>;
  2416. def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
  2417. (ins Int64Regs:$t, i64imm:$s)>;
  2418. def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
  2419. (ins i64imm:$t, Int64Regs:$s)>;
  2420. def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
  2421. (ins i64imm:$t, i64imm:$s)>;
  2422. }
  2423. defm TEX_2D_F32_F32_LEVEL :
  2424. TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2425. defm TEX_2D_S32_F32_LEVEL :
  2426. TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2427. defm TEX_2D_U32_F32_LEVEL :
  2428. TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2429. class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
  2430. NVPTXRegClass intype, dag texsamp>
  2431. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2432. outtype:$b, outtype:$a),
  2433. !con(texsamp, (ins intype:$x, intype:$y,
  2434. intype:$gradx0, intype:$gradx1,
  2435. intype:$grady0, intype:$grady1)),
  2436. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
  2437. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  2438. []>;
  2439. multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
  2440. NVPTXRegClass intype> {
  2441. def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
  2442. (ins Int64Regs:$t, Int64Regs:$s)>;
  2443. def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
  2444. (ins Int64Regs:$t, i64imm:$s)>;
  2445. def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
  2446. (ins i64imm:$t, Int64Regs:$s)>;
  2447. def _II : TEX_2D_GRAD_base<inst, outtype, intype,
  2448. (ins i64imm:$t, i64imm:$s)>;
  2449. }
  2450. defm TEX_2D_F32_F32_GRAD :
  2451. TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2452. defm TEX_2D_S32_F32_GRAD :
  2453. TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2454. defm TEX_2D_U32_F32_GRAD :
  2455. TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2456. class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2457. NVPTXRegClass intype, dag texsamp>
  2458. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2459. outtype:$b, outtype:$a),
  2460. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
  2461. inst # " \t\\{$r, $g, $b, $a\\},"
  2462. " [$t, $s, \\{$l, $x, $y, $y\\}];",
  2463. []>;
  2464. multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
  2465. NVPTXRegClass intype> {
  2466. def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
  2467. (ins Int64Regs:$t, Int64Regs:$s)>;
  2468. def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
  2469. (ins Int64Regs:$t, i64imm:$s)>;
  2470. def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
  2471. (ins i64imm:$t, Int64Regs:$s)>;
  2472. def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
  2473. (ins i64imm:$t, i64imm:$s)>;
  2474. }
  2475. defm TEX_2D_ARRAY_F32_F32
  2476. : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2477. defm TEX_2D_ARRAY_F32_S32
  2478. : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2479. defm TEX_2D_ARRAY_S32_S32
  2480. : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2481. defm TEX_2D_ARRAY_S32_F32
  2482. : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2483. defm TEX_2D_ARRAY_U32_S32
  2484. : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2485. defm TEX_2D_ARRAY_U32_F32
  2486. : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2487. class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2488. NVPTXRegClass intype, dag texsamp>
  2489. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2490. outtype:$b, outtype:$a),
  2491. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2492. intype:$lod)),
  2493. inst # " \t\\{$r, $g, $b, $a\\},"
  2494. " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
  2495. []>;
  2496. multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2497. NVPTXRegClass intype> {
  2498. def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2499. (ins Int64Regs:$t, Int64Regs:$s)>;
  2500. def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2501. (ins Int64Regs:$t, i64imm:$s)>;
  2502. def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2503. (ins i64imm:$t, Int64Regs:$s)>;
  2504. def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2505. (ins i64imm:$t, i64imm:$s)>;
  2506. }
  2507. defm TEX_2D_ARRAY_F32_F32_LEVEL
  2508. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2509. defm TEX_2D_ARRAY_S32_F32_LEVEL
  2510. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2511. defm TEX_2D_ARRAY_U32_F32_LEVEL
  2512. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2513. class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  2514. NVPTXRegClass intype, dag texsamp>
  2515. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2516. outtype:$b, outtype:$a),
  2517. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2518. intype:$gradx0, intype:$gradx1,
  2519. intype:$grady0, intype:$grady1)),
  2520. inst # " \t\\{$r, $g, $b, $a\\},"
  2521. " [$t, $s, \\{$l, $x, $y, $y\\}],"
  2522. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  2523. []>;
  2524. multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  2525. NVPTXRegClass intype> {
  2526. def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2527. (ins Int64Regs:$t, Int64Regs:$s)>;
  2528. def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2529. (ins Int64Regs:$t, i64imm:$s)>;
  2530. def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2531. (ins i64imm:$t, Int64Regs:$s)>;
  2532. def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2533. (ins i64imm:$t, i64imm:$s)>;
  2534. }
  2535. defm TEX_2D_ARRAY_F32_F32_GRAD
  2536. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2537. defm TEX_2D_ARRAY_S32_F32_GRAD
  2538. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2539. defm TEX_2D_ARRAY_U32_F32_GRAD
  2540. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2541. class TEX_3D_base<string inst, NVPTXRegClass outtype,
  2542. NVPTXRegClass intype, dag texsamp>
  2543. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2544. outtype:$b, outtype:$a),
  2545. !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
  2546. inst # " \t\\{$r, $g, $b, $a\\},"
  2547. " [$t, $s, \\{$x, $y, $z, $z\\}];",
  2548. []>;
  2549. multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2550. def _RR : TEX_3D_base<inst, outtype, intype,
  2551. (ins Int64Regs:$t, Int64Regs:$s)>;
  2552. def _RI : TEX_3D_base<inst, outtype, intype,
  2553. (ins Int64Regs:$t, i64imm:$s)>;
  2554. def _IR : TEX_3D_base<inst, outtype, intype,
  2555. (ins i64imm:$t, Int64Regs:$s)>;
  2556. def _II : TEX_3D_base<inst, outtype, intype,
  2557. (ins i64imm:$t, i64imm:$s)>;
  2558. }
  2559. defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2560. defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
  2561. defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
  2562. defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2563. defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
  2564. defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2565. class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2566. NVPTXRegClass intype, dag texsamp>
  2567. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2568. outtype:$b, outtype:$a),
  2569. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2570. intype:$lod)),
  2571. inst # " \t\\{$r, $g, $b, $a\\},"
  2572. " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
  2573. []>;
  2574. multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
  2575. NVPTXRegClass intype> {
  2576. def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
  2577. (ins Int64Regs:$t, Int64Regs:$s)>;
  2578. def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
  2579. (ins Int64Regs:$t, i64imm:$s)>;
  2580. def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
  2581. (ins i64imm:$t, Int64Regs:$s)>;
  2582. def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
  2583. (ins i64imm:$t, i64imm:$s)>;
  2584. }
  2585. defm TEX_3D_F32_F32_LEVEL
  2586. : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2587. defm TEX_3D_S32_F32_LEVEL
  2588. : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2589. defm TEX_3D_U32_F32_LEVEL
  2590. : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2591. class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
  2592. NVPTXRegClass intype, dag texsamp>
  2593. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2594. outtype:$b, outtype:$a),
  2595. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2596. intype :$gradx0, intype:$gradx1,
  2597. intype:$gradx2, intype:$grady0,
  2598. intype:$grady1, intype:$grady2)),
  2599. inst # " \t\\{$r, $g, $b, $a\\},"
  2600. " [$t, $s, \\{$x, $y, $z, $z\\}],"
  2601. " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
  2602. " \\{$grady0, $grady1, $grady2, $grady2\\};",
  2603. []>;
  2604. multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
  2605. NVPTXRegClass intype> {
  2606. def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
  2607. (ins Int64Regs:$t, Int64Regs:$s)>;
  2608. def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
  2609. (ins Int64Regs:$t, i64imm:$s)>;
  2610. def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
  2611. (ins i64imm:$t, Int64Regs:$s)>;
  2612. def _II : TEX_3D_GRAD_base<inst, outtype, intype,
  2613. (ins i64imm:$t, i64imm:$s)>;
  2614. }
  2615. defm TEX_3D_F32_F32_GRAD
  2616. : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2617. defm TEX_3D_S32_F32_GRAD
  2618. : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2619. defm TEX_3D_U32_F32_GRAD
  2620. : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2621. class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
  2622. NVPTXRegClass intype, dag texsamp>
  2623. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2624. outtype:$b, outtype:$a),
  2625. !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
  2626. inst # " \t\\{$r, $g, $b, $a\\},"
  2627. " [$t, $s, \\{$x, $y, $z, $z\\}];",
  2628. []>;
  2629. multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2630. def _RR : TEX_CUBE_base<inst, outtype, intype,
  2631. (ins Int64Regs:$t, Int64Regs:$s)>;
  2632. def _RI : TEX_CUBE_base<inst, outtype, intype,
  2633. (ins Int64Regs:$t, i64imm:$s)>;
  2634. def _IR : TEX_CUBE_base<inst, outtype, intype,
  2635. (ins i64imm:$t, Int64Regs:$s)>;
  2636. def _II : TEX_CUBE_base<inst, outtype, intype,
  2637. (ins i64imm:$t, i64imm:$s)>;
  2638. }
  2639. defm TEX_CUBE_F32_F32
  2640. : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  2641. defm TEX_CUBE_S32_F32
  2642. : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  2643. defm TEX_CUBE_U32_F32
  2644. : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  2645. class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
  2646. NVPTXRegClass intype, dag texsamp>
  2647. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2648. outtype:$b, outtype:$a),
  2649. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2650. intype:$lod)),
  2651. inst # " \t\\{$r, $g, $b, $a\\},"
  2652. " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
  2653. []>;
  2654. multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
  2655. NVPTXRegClass intype> {
  2656. def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2657. (ins Int64Regs:$t, Int64Regs:$s)>;
  2658. def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2659. (ins Int64Regs:$t, i64imm:$s)>;
  2660. def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2661. (ins i64imm:$t, Int64Regs:$s)>;
  2662. def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2663. (ins i64imm:$t, i64imm:$s)>;
  2664. }
  2665. defm TEX_CUBE_F32_F32_LEVEL
  2666. : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  2667. defm TEX_CUBE_S32_F32_LEVEL
  2668. : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  2669. defm TEX_CUBE_U32_F32_LEVEL
  2670. : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  2671. class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
  2672. NVPTXRegClass intype, dag texsamp>
  2673. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2674. outtype:$b, outtype:$a),
  2675. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2676. intype:$z)),
  2677. inst # " \t\\{$r, $g, $b, $a\\},"
  2678. " [$t, $s, \\{$l, $x, $y, $z\\}];",
  2679. []>;
  2680. multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
  2681. NVPTXRegClass intype> {
  2682. def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2683. (ins Int64Regs:$t, Int64Regs:$s)>;
  2684. def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2685. (ins Int64Regs:$t, i64imm:$s)>;
  2686. def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2687. (ins i64imm:$t, Int64Regs:$s)>;
  2688. def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2689. (ins i64imm:$t, i64imm:$s)>;
  2690. }
  2691. defm TEX_CUBE_ARRAY_F32_F32
  2692. : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
  2693. defm TEX_CUBE_ARRAY_S32_F32
  2694. : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
  2695. defm TEX_CUBE_ARRAY_U32_F32
  2696. : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
  2697. class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2698. NVPTXRegClass intype, dag texsamp>
  2699. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2700. outtype:$b, outtype:$a),
  2701. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2702. intype:$z, intype:$lod)),
  2703. inst # " \t\\{$r, $g, $b, $a\\},"
  2704. " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
  2705. []>;
  2706. multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2707. NVPTXRegClass intype> {
  2708. def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2709. (ins Int64Regs:$t, Int64Regs:$s)>;
  2710. def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2711. (ins Int64Regs:$t, i64imm:$s)>;
  2712. def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2713. (ins i64imm:$t, Int64Regs:$s)>;
  2714. def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2715. (ins i64imm:$t, i64imm:$s)>;
  2716. }
  2717. defm TEX_CUBE_ARRAY_F32_F32_LEVEL
  2718. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
  2719. Float32Regs, Float32Regs>;
  2720. defm TEX_CUBE_ARRAY_S32_F32_LEVEL
  2721. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
  2722. Int32Regs, Float32Regs>;
  2723. defm TEX_CUBE_ARRAY_U32_F32_LEVEL
  2724. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
  2725. Int32Regs, Float32Regs>;
  2726. class TLD4_2D_base<string inst, NVPTXRegClass outtype,
  2727. NVPTXRegClass intype, dag texsamp>
  2728. : NVPTXInst<(outs outtype:$v0, outtype:$v1,
  2729. outtype:$v2, outtype:$v3),
  2730. !con(texsamp, (ins intype:$x, intype:$y)),
  2731. inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
  2732. []>;
  2733. multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2734. def _RR : TLD4_2D_base<inst, outtype, intype,
  2735. (ins Int64Regs:$t, Int64Regs:$s)>;
  2736. def _RI : TLD4_2D_base<inst, outtype, intype,
  2737. (ins Int64Regs:$t, i64imm:$s)>;
  2738. def _IR : TLD4_2D_base<inst, outtype, intype,
  2739. (ins i64imm:$t, Int64Regs:$s)>;
  2740. def _II : TLD4_2D_base<inst, outtype, intype,
  2741. (ins i64imm:$t, i64imm:$s)>;
  2742. }
  2743. defm TLD4_R_2D_F32_F32
  2744. : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2745. defm TLD4_G_2D_F32_F32
  2746. : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2747. defm TLD4_B_2D_F32_F32
  2748. : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2749. defm TLD4_A_2D_F32_F32
  2750. : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2751. defm TLD4_R_2D_S32_F32
  2752. : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2753. defm TLD4_G_2D_S32_F32
  2754. : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2755. defm TLD4_B_2D_S32_F32
  2756. : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2757. defm TLD4_A_2D_S32_F32
  2758. : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2759. defm TLD4_R_2D_U32_F32
  2760. : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2761. defm TLD4_G_2D_U32_F32
  2762. : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2763. defm TLD4_B_2D_U32_F32
  2764. : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2765. defm TLD4_A_2D_U32_F32
  2766. : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2767. }
  2768. // texmode_unified
  2769. let IsTex = true, IsTexModeUnified = true in {
  2770. // Texture fetch instructions using handles
  2771. class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
  2772. NVPTXRegClass intype, dag tex>
  2773. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2774. outtype:$b, outtype:$a),
  2775. !con(tex, (ins intype:$x)),
  2776. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
  2777. []>;
  2778. multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
  2779. NVPTXRegClass intype> {
  2780. def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2781. def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
  2782. }
  2783. defm TEX_UNIFIED_1D_F32_S32
  2784. : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2785. defm TEX_UNIFIED_1D_F32_F32
  2786. : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2787. defm TEX_UNIFIED_1D_S32_S32
  2788. : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2789. defm TEX_UNIFIED_1D_S32_F32
  2790. : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2791. defm TEX_UNIFIED_1D_U32_S32
  2792. : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2793. defm TEX_UNIFIED_1D_U32_F32
  2794. : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2795. class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2796. NVPTXRegClass intype, dag tex>
  2797. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2798. outtype:$b, outtype:$a),
  2799. !con(tex, (ins intype:$x, intype:$lod)),
  2800. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
  2801. []>;
  2802. multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
  2803. NVPTXRegClass intype> {
  2804. def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2805. def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  2806. }
  2807. defm TEX_UNIFIED_1D_F32_F32_LEVEL
  2808. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2809. defm TEX_UNIFIED_1D_S32_F32_LEVEL
  2810. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2811. defm TEX_UNIFIED_1D_U32_F32_LEVEL
  2812. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2813. class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
  2814. NVPTXRegClass intype, dag tex>
  2815. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2816. outtype:$b, outtype:$a),
  2817. !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
  2818. inst # " \t\\{$r, $g, $b, $a\\},"
  2819. " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
  2820. []>;
  2821. multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
  2822. NVPTXRegClass intype> {
  2823. def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2824. def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  2825. }
  2826. defm TEX_UNIFIED_1D_F32_F32_GRAD
  2827. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2828. defm TEX_UNIFIED_1D_S32_F32_GRAD
  2829. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2830. defm TEX_UNIFIED_1D_U32_F32_GRAD
  2831. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2832. class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2833. NVPTXRegClass intype, dag tex>
  2834. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2835. outtype:$b, outtype:$a),
  2836. !con(tex, (ins Int32Regs:$l, intype:$x)),
  2837. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
  2838. []>;
  2839. multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
  2840. NVPTXRegClass intype> {
  2841. def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2842. def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
  2843. }
  2844. defm TEX_UNIFIED_1D_ARRAY_F32_S32
  2845. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2846. defm TEX_UNIFIED_1D_ARRAY_F32_F32
  2847. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2848. defm TEX_UNIFIED_1D_ARRAY_S32_S32
  2849. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2850. defm TEX_UNIFIED_1D_ARRAY_S32_F32
  2851. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2852. defm TEX_UNIFIED_1D_ARRAY_U32_S32
  2853. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2854. defm TEX_UNIFIED_1D_ARRAY_U32_F32
  2855. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2856. class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2857. NVPTXRegClass intype, dag tex>
  2858. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2859. outtype:$b, outtype:$a),
  2860. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
  2861. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
  2862. []>;
  2863. multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2864. NVPTXRegClass intype> {
  2865. def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2866. (ins Int64Regs:$t)>;
  2867. def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2868. (ins i64imm:$t)>;
  2869. }
  2870. defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
  2871. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
  2872. Float32Regs, Float32Regs>;
  2873. defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
  2874. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
  2875. Int32Regs, Float32Regs>;
  2876. defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
  2877. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
  2878. Int32Regs, Float32Regs>;
  2879. class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  2880. NVPTXRegClass intype, dag tex>
  2881. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2882. outtype:$b, outtype:$a),
  2883. !con(tex, (ins Int32Regs:$l, intype:$x,
  2884. intype:$gradx, intype:$grady)),
  2885. inst # " \t\\{$r, $g, $b, $a\\},"
  2886. " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
  2887. []>;
  2888. multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  2889. NVPTXRegClass intype> {
  2890. def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2891. (ins Int64Regs:$t)>;
  2892. def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2893. (ins i64imm:$t)>;
  2894. }
  2895. defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
  2896. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
  2897. Float32Regs, Float32Regs>;
  2898. defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
  2899. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
  2900. Int32Regs, Float32Regs>;
  2901. defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
  2902. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
  2903. Int32Regs, Float32Regs>;
  2904. class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
  2905. NVPTXRegClass intype, dag tex>
  2906. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2907. outtype:$b, outtype:$a),
  2908. !con(tex, (ins intype:$x, intype:$y)),
  2909. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
  2910. []>;
  2911. multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
  2912. NVPTXRegClass intype> {
  2913. def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2914. def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
  2915. }
  2916. defm TEX_UNIFIED_2D_F32_S32
  2917. : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2918. defm TEX_UNIFIED_2D_F32_F32
  2919. : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2920. defm TEX_UNIFIED_2D_S32_S32
  2921. : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2922. defm TEX_UNIFIED_2D_S32_F32
  2923. : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2924. defm TEX_UNIFIED_2D_U32_S32
  2925. : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2926. defm TEX_UNIFIED_2D_U32_F32
  2927. : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2928. class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2929. NVPTXRegClass intype, dag tex>
  2930. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2931. outtype:$b, outtype:$a),
  2932. !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
  2933. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
  2934. []>;
  2935. multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
  2936. NVPTXRegClass intype> {
  2937. def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2938. def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  2939. }
  2940. defm TEX_UNIFIED_2D_F32_F32_LEVEL
  2941. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2942. defm TEX_UNIFIED_2D_S32_F32_LEVEL
  2943. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2944. defm TEX_UNIFIED_2D_U32_F32_LEVEL
  2945. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2946. class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
  2947. NVPTXRegClass intype, dag tex>
  2948. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2949. outtype:$b, outtype:$a),
  2950. !con(tex, (ins intype:$x, intype:$y,
  2951. intype:$gradx0, intype:$gradx1,
  2952. intype:$grady0, intype:$grady1)),
  2953. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
  2954. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  2955. []>;
  2956. multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
  2957. NVPTXRegClass intype> {
  2958. def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2959. def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  2960. }
  2961. defm TEX_UNIFIED_2D_F32_F32_GRAD
  2962. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2963. defm TEX_UNIFIED_2D_S32_F32_GRAD
  2964. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2965. defm TEX_UNIFIED_2D_U32_F32_GRAD
  2966. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2967. class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2968. NVPTXRegClass intype, dag tex>
  2969. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2970. outtype:$b, outtype:$a),
  2971. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
  2972. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
  2973. []>;
  2974. multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
  2975. NVPTXRegClass intype> {
  2976. def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2977. def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
  2978. }
  2979. defm TEX_UNIFIED_2D_ARRAY_F32_S32
  2980. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2981. defm TEX_UNIFIED_2D_ARRAY_F32_F32
  2982. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2983. defm TEX_UNIFIED_2D_ARRAY_S32_S32
  2984. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2985. defm TEX_UNIFIED_2D_ARRAY_S32_F32
  2986. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2987. defm TEX_UNIFIED_2D_ARRAY_U32_S32
  2988. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2989. defm TEX_UNIFIED_2D_ARRAY_U32_F32
  2990. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2991. class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2992. NVPTXRegClass intype, dag tex>
  2993. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2994. outtype:$b, outtype:$a),
  2995. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
  2996. intype:$lod)),
  2997. inst # " \t\\{$r, $g, $b, $a\\},"
  2998. " [$t, \\{$l, $x, $y, $y\\}], $lod;",
  2999. []>;
  3000. multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  3001. NVPTXRegClass intype> {
  3002. def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  3003. (ins Int64Regs:$t)>;
  3004. def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  3005. (ins i64imm:$t)>;
  3006. }
  3007. defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
  3008. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
  3009. Float32Regs, Float32Regs>;
  3010. defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
  3011. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
  3012. Int32Regs, Float32Regs>;
  3013. defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
  3014. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
  3015. Int32Regs, Float32Regs>;
  3016. class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  3017. NVPTXRegClass intype, dag tex>
  3018. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3019. outtype:$b, outtype:$a),
  3020. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
  3021. intype:$gradx0, intype:$gradx1,
  3022. intype:$grady0, intype:$grady1)),
  3023. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
  3024. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  3025. []>;
  3026. multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  3027. NVPTXRegClass intype> {
  3028. def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
  3029. (ins Int64Regs:$t)>;
  3030. def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
  3031. (ins i64imm:$t)>;
  3032. }
  3033. defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
  3034. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
  3035. Float32Regs, Float32Regs>;
  3036. defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
  3037. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
  3038. Int32Regs, Float32Regs>;
  3039. defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
  3040. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
  3041. Int32Regs, Float32Regs>;
  3042. class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
  3043. NVPTXRegClass intype, dag tex>
  3044. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3045. outtype:$b, outtype:$a),
  3046. !con(tex, (ins intype:$x, intype:$y, intype:$z)),
  3047. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
  3048. []>;
  3049. multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
  3050. NVPTXRegClass intype> {
  3051. def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3052. def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
  3053. }
  3054. defm TEX_UNIFIED_3D_F32_S32
  3055. : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
  3056. defm TEX_UNIFIED_3D_F32_F32
  3057. : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3058. defm TEX_UNIFIED_3D_S32_S32
  3059. : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
  3060. defm TEX_UNIFIED_3D_S32_F32
  3061. : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3062. defm TEX_UNIFIED_3D_U32_S32
  3063. : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
  3064. defm TEX_UNIFIED_3D_U32_F32
  3065. : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3066. class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
  3067. NVPTXRegClass intype, dag tex>
  3068. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3069. outtype:$b, outtype:$a),
  3070. !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
  3071. inst # " \t\\{$r, $g, $b, $a\\},"
  3072. " [$t, \\{$x, $y, $z, $z\\}], $lod;",
  3073. []>;
  3074. multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
  3075. NVPTXRegClass intype> {
  3076. def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3077. def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  3078. }
  3079. defm TEX_UNIFIED_3D_F32_F32_LEVEL
  3080. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3081. defm TEX_UNIFIED_3D_S32_F32_LEVEL
  3082. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3083. defm TEX_UNIFIED_3D_U32_F32_LEVEL
  3084. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3085. class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
  3086. NVPTXRegClass intype, dag tex>
  3087. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3088. outtype:$b, outtype:$a),
  3089. !con(tex, (ins intype:$x, intype:$y, intype:$z,
  3090. intype:$gradx0, intype:$gradx1,
  3091. intype:$gradx2, intype:$grady0,
  3092. intype:$grady1, intype:$grady2)),
  3093. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
  3094. " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
  3095. " \\{$grady0, $grady1, $grady2, $grady2\\};",
  3096. []>;
  3097. multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
  3098. NVPTXRegClass intype> {
  3099. def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3100. def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  3101. }
  3102. defm TEX_UNIFIED_3D_F32_F32_GRAD
  3103. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3104. defm TEX_UNIFIED_3D_S32_F32_GRAD
  3105. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3106. defm TEX_UNIFIED_3D_U32_F32_GRAD
  3107. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3108. class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
  3109. NVPTXRegClass intype, dag tex>
  3110. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3111. outtype:$b, outtype:$a),
  3112. !con(tex, (ins intype:$x, intype:$y, intype:$z)),
  3113. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
  3114. []>;
  3115. multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
  3116. NVPTXRegClass intype> {
  3117. def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3118. def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
  3119. }
  3120. defm TEX_UNIFIED_CUBE_F32_F32
  3121. : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  3122. defm TEX_UNIFIED_CUBE_S32_F32
  3123. : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  3124. defm TEX_UNIFIED_CUBE_U32_F32
  3125. : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  3126. class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
  3127. NVPTXRegClass intype, dag tex>
  3128. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3129. outtype:$b, outtype:$a),
  3130. !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
  3131. inst # " \t\\{$r, $g, $b, $a\\},"
  3132. " [$t, \\{$x, $y, $z, $z\\}], $lod;",
  3133. []>;
  3134. multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
  3135. NVPTXRegClass intype> {
  3136. def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
  3137. (ins Int64Regs:$t)>;
  3138. def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
  3139. (ins i64imm:$t)>;
  3140. }
  3141. defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
  3142. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
  3143. Float32Regs, Float32Regs>;
  3144. defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
  3145. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
  3146. Int32Regs, Float32Regs>;
  3147. defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
  3148. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
  3149. Int32Regs, Float32Regs>;
  3150. class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
  3151. NVPTXRegClass intype, dag tex>
  3152. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3153. outtype:$b, outtype:$a),
  3154. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
  3155. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
  3156. []>;
  3157. multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
  3158. NVPTXRegClass intype> {
  3159. def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
  3160. (ins Int64Regs:$t)>;
  3161. def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
  3162. (ins i64imm:$t)>;
  3163. }
  3164. defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
  3165. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
  3166. defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
  3167. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
  3168. defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
  3169. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
  3170. class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  3171. NVPTXRegClass intype, dag tex>
  3172. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3173. outtype:$b, outtype:$a),
  3174. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
  3175. intype:$lod)),
  3176. inst # " \t\\{$r, $g, $b, $a\\},"
  3177. " [$t, \\{$l, $x, $y, $z\\}], $lod;",
  3178. []>;
  3179. multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  3180. NVPTXRegClass intype> {
  3181. def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  3182. (ins Int64Regs:$t)>;
  3183. def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  3184. (ins i64imm:$t)>;
  3185. }
  3186. defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
  3187. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
  3188. Float32Regs, Float32Regs>;
  3189. defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
  3190. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
  3191. Int32Regs, Float32Regs>;
  3192. defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
  3193. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
  3194. Int32Regs, Float32Regs>;
  3195. class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
  3196. NVPTXRegClass intype, dag tex>
  3197. : NVPTXInst<(outs outtype:$v0, outtype:$v1,
  3198. outtype:$v2, outtype:$v3),
  3199. !con(tex, (ins intype:$x, intype:$y)),
  3200. inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
  3201. []>;
  3202. multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
  3203. NVPTXRegClass intype> {
  3204. def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3205. def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
  3206. }
  3207. defm TLD4_UNIFIED_R_2D_F32_F32
  3208. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3209. defm TLD4_UNIFIED_G_2D_F32_F32
  3210. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3211. defm TLD4_UNIFIED_B_2D_F32_F32
  3212. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3213. defm TLD4_UNIFIED_A_2D_F32_F32
  3214. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3215. defm TLD4_UNIFIED_R_2D_S32_F32
  3216. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3217. defm TLD4_UNIFIED_G_2D_S32_F32
  3218. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3219. defm TLD4_UNIFIED_B_2D_S32_F32
  3220. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3221. defm TLD4_UNIFIED_A_2D_S32_F32
  3222. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3223. defm TLD4_UNIFIED_R_2D_U32_F32
  3224. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3225. defm TLD4_UNIFIED_G_2D_U32_F32
  3226. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3227. defm TLD4_UNIFIED_B_2D_U32_F32
  3228. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3229. defm TLD4_UNIFIED_A_2D_U32_F32
  3230. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3231. }
  3232. //=== Surface load instructions
  3233. let IsSuld = true in {
  3234. class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
  3235. : NVPTXInst<(outs outtype:$r),
  3236. !con(surf, (ins Int32Regs:$x)),
  3237. inst # " \\{$r\\}, [$s, \\{$x\\}];",
  3238. []>;
  3239. multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
  3240. def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
  3241. def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
  3242. }
  3243. defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
  3244. defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
  3245. defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
  3246. defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
  3247. defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
  3248. defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
  3249. defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
  3250. defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
  3251. defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
  3252. defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
  3253. defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
  3254. defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
  3255. class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
  3256. : NVPTXInst<(outs outtype:$r),
  3257. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3258. inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
  3259. []>;
  3260. multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
  3261. def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
  3262. def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
  3263. }
  3264. defm SULD_1D_ARRAY_I8_CLAMP
  3265. : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
  3266. defm SULD_1D_ARRAY_I16_CLAMP
  3267. : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
  3268. defm SULD_1D_ARRAY_I32_CLAMP
  3269. : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
  3270. defm SULD_1D_ARRAY_I64_CLAMP
  3271. : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
  3272. defm SULD_1D_ARRAY_I8_TRAP
  3273. : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
  3274. defm SULD_1D_ARRAY_I16_TRAP
  3275. : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
  3276. defm SULD_1D_ARRAY_I32_TRAP
  3277. : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
  3278. defm SULD_1D_ARRAY_I64_TRAP
  3279. : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
  3280. defm SULD_1D_ARRAY_I8_ZERO
  3281. : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
  3282. defm SULD_1D_ARRAY_I16_ZERO
  3283. : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
  3284. defm SULD_1D_ARRAY_I32_ZERO
  3285. : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
  3286. defm SULD_1D_ARRAY_I64_ZERO
  3287. : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
  3288. class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
  3289. : NVPTXInst<(outs outtype:$r),
  3290. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3291. inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
  3292. []>;
  3293. multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
  3294. def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
  3295. def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
  3296. }
  3297. defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
  3298. defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
  3299. defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
  3300. defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
  3301. defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
  3302. defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
  3303. defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
  3304. defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
  3305. defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
  3306. defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
  3307. defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
  3308. defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
  3309. class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
  3310. : NVPTXInst<(outs outtype:$r),
  3311. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3312. inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3313. []>;
  3314. multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
  3315. def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
  3316. def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
  3317. }
  3318. defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
  3319. defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
  3320. defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
  3321. defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
  3322. defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
  3323. defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
  3324. defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
  3325. defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
  3326. defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
  3327. defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
  3328. defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
  3329. defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
  3330. class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
  3331. : NVPTXInst<(outs outtype:$r),
  3332. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3333. inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3334. []>;
  3335. multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
  3336. def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
  3337. def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
  3338. }
  3339. defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
  3340. defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
  3341. defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
  3342. defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
  3343. defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
  3344. defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
  3345. defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
  3346. defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
  3347. defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
  3348. defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
  3349. defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
  3350. defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
  3351. }
  3352. let IsSuld = 2 in {
  3353. class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3354. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3355. !con(surf, (ins Int32Regs:$x)),
  3356. inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
  3357. []>;
  3358. multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
  3359. def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3360. def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3361. }
  3362. defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
  3363. defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
  3364. defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
  3365. defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
  3366. defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
  3367. defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
  3368. defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
  3369. defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
  3370. defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
  3371. defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
  3372. defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
  3373. defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
  3374. class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3375. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3376. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3377. inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
  3378. []>;
  3379. multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
  3380. def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3381. def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
  3382. }
  3383. defm SULD_1D_ARRAY_V2I8_CLAMP
  3384. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
  3385. defm SULD_1D_ARRAY_V2I16_CLAMP
  3386. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
  3387. defm SULD_1D_ARRAY_V2I32_CLAMP
  3388. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
  3389. defm SULD_1D_ARRAY_V2I64_CLAMP
  3390. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
  3391. defm SULD_1D_ARRAY_V2I8_TRAP
  3392. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
  3393. defm SULD_1D_ARRAY_V2I16_TRAP
  3394. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
  3395. defm SULD_1D_ARRAY_V2I32_TRAP
  3396. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
  3397. defm SULD_1D_ARRAY_V2I64_TRAP
  3398. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
  3399. defm SULD_1D_ARRAY_V2I8_ZERO
  3400. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
  3401. defm SULD_1D_ARRAY_V2I16_ZERO
  3402. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
  3403. defm SULD_1D_ARRAY_V2I32_ZERO
  3404. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
  3405. defm SULD_1D_ARRAY_V2I64_ZERO
  3406. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
  3407. class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3408. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3409. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3410. inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
  3411. []>;
  3412. multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
  3413. def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3414. def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3415. }
  3416. defm SULD_2D_V2I8_CLAMP
  3417. : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
  3418. defm SULD_2D_V2I16_CLAMP
  3419. : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
  3420. defm SULD_2D_V2I32_CLAMP
  3421. : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
  3422. defm SULD_2D_V2I64_CLAMP
  3423. : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
  3424. defm SULD_2D_V2I8_TRAP
  3425. : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
  3426. defm SULD_2D_V2I16_TRAP
  3427. : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
  3428. defm SULD_2D_V2I32_TRAP
  3429. : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
  3430. defm SULD_2D_V2I64_TRAP
  3431. : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
  3432. defm SULD_2D_V2I8_ZERO
  3433. : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
  3434. defm SULD_2D_V2I16_ZERO
  3435. : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
  3436. defm SULD_2D_V2I32_ZERO
  3437. : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
  3438. defm SULD_2D_V2I64_ZERO
  3439. : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
  3440. class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3441. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3442. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3443. inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3444. []>;
  3445. multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
  3446. def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3447. def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
  3448. }
  3449. defm SULD_2D_ARRAY_V2I8_CLAMP
  3450. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
  3451. defm SULD_2D_ARRAY_V2I16_CLAMP
  3452. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
  3453. defm SULD_2D_ARRAY_V2I32_CLAMP
  3454. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
  3455. defm SULD_2D_ARRAY_V2I64_CLAMP
  3456. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
  3457. defm SULD_2D_ARRAY_V2I8_TRAP
  3458. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
  3459. defm SULD_2D_ARRAY_V2I16_TRAP
  3460. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
  3461. defm SULD_2D_ARRAY_V2I32_TRAP
  3462. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
  3463. defm SULD_2D_ARRAY_V2I64_TRAP
  3464. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
  3465. defm SULD_2D_ARRAY_V2I8_ZERO
  3466. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
  3467. defm SULD_2D_ARRAY_V2I16_ZERO
  3468. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
  3469. defm SULD_2D_ARRAY_V2I32_ZERO
  3470. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
  3471. defm SULD_2D_ARRAY_V2I64_ZERO
  3472. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
  3473. class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3474. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3475. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3476. inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3477. []>;
  3478. multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
  3479. def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3480. def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3481. }
  3482. defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
  3483. defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
  3484. defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
  3485. defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
  3486. defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
  3487. defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
  3488. defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
  3489. defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
  3490. defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
  3491. defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
  3492. defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
  3493. defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
  3494. }
  3495. let IsSuld = 3 in {
  3496. class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3497. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3498. !con(surf, (ins Int32Regs:$x)),
  3499. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
  3500. []>;
  3501. multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
  3502. def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3503. def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3504. }
  3505. defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
  3506. defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
  3507. defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
  3508. defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
  3509. defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
  3510. defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
  3511. defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
  3512. defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
  3513. defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
  3514. class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3515. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3516. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3517. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
  3518. []>;
  3519. multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
  3520. def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3521. def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
  3522. }
  3523. defm SULD_1D_ARRAY_V4I8_CLAMP
  3524. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
  3525. defm SULD_1D_ARRAY_V4I16_CLAMP
  3526. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
  3527. defm SULD_1D_ARRAY_V4I32_CLAMP
  3528. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
  3529. defm SULD_1D_ARRAY_V4I8_TRAP
  3530. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
  3531. defm SULD_1D_ARRAY_V4I16_TRAP
  3532. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
  3533. defm SULD_1D_ARRAY_V4I32_TRAP
  3534. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
  3535. defm SULD_1D_ARRAY_V4I8_ZERO
  3536. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
  3537. defm SULD_1D_ARRAY_V4I16_ZERO
  3538. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
  3539. defm SULD_1D_ARRAY_V4I32_ZERO
  3540. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
  3541. class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3542. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3543. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3544. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
  3545. []>;
  3546. multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
  3547. def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3548. def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3549. }
  3550. defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
  3551. defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
  3552. defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
  3553. defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
  3554. defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
  3555. defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
  3556. defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
  3557. defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
  3558. defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
  3559. class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3560. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3561. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3562. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3563. []>;
  3564. multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
  3565. def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3566. def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
  3567. }
  3568. defm SULD_2D_ARRAY_V4I8_CLAMP
  3569. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
  3570. defm SULD_2D_ARRAY_V4I16_CLAMP
  3571. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
  3572. defm SULD_2D_ARRAY_V4I32_CLAMP
  3573. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
  3574. defm SULD_2D_ARRAY_V4I8_TRAP
  3575. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
  3576. defm SULD_2D_ARRAY_V4I16_TRAP
  3577. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
  3578. defm SULD_2D_ARRAY_V4I32_TRAP
  3579. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
  3580. defm SULD_2D_ARRAY_V4I8_ZERO
  3581. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
  3582. defm SULD_2D_ARRAY_V4I16_ZERO
  3583. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
  3584. defm SULD_2D_ARRAY_V4I32_ZERO
  3585. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
  3586. class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3587. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3588. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3589. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3590. []>;
  3591. multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
  3592. def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3593. def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3594. }
  3595. defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
  3596. defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
  3597. defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
  3598. defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
  3599. defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
  3600. defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
  3601. defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
  3602. defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
  3603. defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
  3604. }
  3605. //-----------------------------------
  3606. // Texture Query Intrinsics
  3607. //-----------------------------------
  3608. let IsSurfTexQuery = true in {
  3609. def TXQ_CHANNEL_ORDER_R
  3610. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3611. "txq.channel_order.b32 \t$d, [$a];",
  3612. []>;
  3613. def TXQ_CHANNEL_ORDER_I
  3614. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3615. "txq.channel_order.b32 \t$d, [$a];",
  3616. []>;
  3617. def TXQ_CHANNEL_DATA_TYPE_R
  3618. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3619. "txq.channel_data_type.b32 \t$d, [$a];",
  3620. []>;
  3621. def TXQ_CHANNEL_DATA_TYPE_I
  3622. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3623. "txq.channel_data_type.b32 \t$d, [$a];",
  3624. []>;
  3625. def TXQ_WIDTH_R
  3626. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3627. "txq.width.b32 \t$d, [$a];",
  3628. []>;
  3629. def TXQ_WIDTH_I
  3630. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3631. "txq.width.b32 \t$d, [$a];",
  3632. []>;
  3633. def TXQ_HEIGHT_R
  3634. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3635. "txq.height.b32 \t$d, [$a];",
  3636. []>;
  3637. def TXQ_HEIGHT_I
  3638. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3639. "txq.height.b32 \t$d, [$a];",
  3640. []>;
  3641. def TXQ_DEPTH_R
  3642. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3643. "txq.depth.b32 \t$d, [$a];",
  3644. []>;
  3645. def TXQ_DEPTH_I
  3646. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3647. "txq.depth.b32 \t$d, [$a];",
  3648. []>;
  3649. def TXQ_ARRAY_SIZE_R
  3650. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3651. "txq.array_size.b32 \t$d, [$a];",
  3652. []>;
  3653. def TXQ_ARRAY_SIZE_I
  3654. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3655. "txq.array_size.b32 \t$d, [$a];",
  3656. []>;
  3657. def TXQ_NUM_SAMPLES_R
  3658. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3659. "txq.num_samples.b32 \t$d, [$a];",
  3660. []>;
  3661. def TXQ_NUM_SAMPLES_I
  3662. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3663. "txq.num_samples.b32 \t$d, [$a];",
  3664. []>;
  3665. def TXQ_NUM_MIPMAP_LEVELS_R
  3666. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3667. "txq.num_mipmap_levels.b32 \t$d, [$a];",
  3668. []>;
  3669. def TXQ_NUM_MIPMAP_LEVELS_I
  3670. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3671. "txq.num_mipmap_levels.b32 \t$d, [$a];",
  3672. []>;
  3673. }
  3674. def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
  3675. (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
  3676. def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
  3677. (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
  3678. def : Pat<(int_nvvm_txq_width Int64Regs:$a),
  3679. (TXQ_WIDTH_R Int64Regs:$a)>;
  3680. def : Pat<(int_nvvm_txq_height Int64Regs:$a),
  3681. (TXQ_HEIGHT_R Int64Regs:$a)>;
  3682. def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
  3683. (TXQ_DEPTH_R Int64Regs:$a)>;
  3684. def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
  3685. (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
  3686. def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
  3687. (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
  3688. def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
  3689. (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
  3690. //-----------------------------------
  3691. // Surface Query Intrinsics
  3692. //-----------------------------------
  3693. let IsSurfTexQuery = true in {
  3694. def SUQ_CHANNEL_ORDER_R
  3695. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3696. "suq.channel_order.b32 \t$d, [$a];",
  3697. []>;
  3698. def SUQ_CHANNEL_ORDER_I
  3699. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3700. "suq.channel_order.b32 \t$d, [$a];",
  3701. []>;
  3702. def SUQ_CHANNEL_DATA_TYPE_R
  3703. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3704. "suq.channel_data_type.b32 \t$d, [$a];",
  3705. []>;
  3706. def SUQ_CHANNEL_DATA_TYPE_I
  3707. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3708. "suq.channel_data_type.b32 \t$d, [$a];",
  3709. []>;
  3710. def SUQ_WIDTH_R
  3711. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3712. "suq.width.b32 \t$d, [$a];",
  3713. []>;
  3714. def SUQ_WIDTH_I
  3715. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3716. "suq.width.b32 \t$d, [$a];",
  3717. []>;
  3718. def SUQ_HEIGHT_R
  3719. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3720. "suq.height.b32 \t$d, [$a];",
  3721. []>;
  3722. def SUQ_HEIGHT_I
  3723. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3724. "suq.height.b32 \t$d, [$a];",
  3725. []>;
  3726. def SUQ_DEPTH_R
  3727. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3728. "suq.depth.b32 \t$d, [$a];",
  3729. []>;
  3730. def SUQ_DEPTH_I
  3731. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3732. "suq.depth.b32 \t$d, [$a];",
  3733. []>;
  3734. def SUQ_ARRAY_SIZE_R
  3735. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3736. "suq.array_size.b32 \t$d, [$a];",
  3737. []>;
  3738. def SUQ_ARRAY_SIZE_I
  3739. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3740. "suq.array_size.b32 \t$d, [$a];",
  3741. []>;
  3742. }
  3743. def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
  3744. (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
  3745. def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
  3746. (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
  3747. def : Pat<(int_nvvm_suq_width Int64Regs:$a),
  3748. (SUQ_WIDTH_R Int64Regs:$a)>;
  3749. def : Pat<(int_nvvm_suq_height Int64Regs:$a),
  3750. (SUQ_HEIGHT_R Int64Regs:$a)>;
  3751. def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
  3752. (SUQ_DEPTH_R Int64Regs:$a)>;
  3753. def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
  3754. (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
  3755. //===- Handle Query -------------------------------------------------------===//
  3756. // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
  3757. def ISTYPEP_SAMPLER
  3758. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3759. "istypep.samplerref \t$d, $a;",
  3760. [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
  3761. def ISTYPEP_SURFACE
  3762. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3763. "istypep.surfref \t$d, $a;",
  3764. [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
  3765. def ISTYPEP_TEXTURE
  3766. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3767. "istypep.texref \t$d, $a;",
  3768. [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
  3769. //===- Surface Stores -----------------------------------------------------===//
  3770. let IsSust = true in {
  3771. class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
  3772. : NVPTXInst<(outs),
  3773. !con(surf, (ins Int32Regs:$x, intype:$r)),
  3774. inst # " \t[$s, \\{$x\\}], \\{$r\\};",
  3775. []>;
  3776. multiclass SUST_1D<string inst, NVPTXRegClass intype> {
  3777. def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
  3778. def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
  3779. }
  3780. defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
  3781. defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
  3782. defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
  3783. defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
  3784. defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
  3785. defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
  3786. defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
  3787. defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
  3788. defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
  3789. defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
  3790. defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
  3791. defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
  3792. defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
  3793. defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
  3794. defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
  3795. class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  3796. : NVPTXInst<(outs),
  3797. !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
  3798. inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
  3799. []>;
  3800. multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
  3801. def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  3802. def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
  3803. }
  3804. defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
  3805. defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
  3806. defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
  3807. defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
  3808. defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
  3809. defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
  3810. defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
  3811. defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
  3812. defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
  3813. defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
  3814. defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
  3815. defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
  3816. defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
  3817. defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
  3818. defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
  3819. class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  3820. : NVPTXInst<(outs),
  3821. !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
  3822. intype:$b, intype:$a)),
  3823. inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
  3824. []>;
  3825. multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
  3826. def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  3827. def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
  3828. }
  3829. defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
  3830. defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
  3831. defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
  3832. defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
  3833. defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
  3834. defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
  3835. defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
  3836. defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
  3837. defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
  3838. defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
  3839. defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
  3840. defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
  3841. class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
  3842. : NVPTXInst<(outs),
  3843. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
  3844. inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
  3845. []>;
  3846. multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
  3847. def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
  3848. def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
  3849. }
  3850. defm SUST_B_1D_ARRAY_B8_CLAMP
  3851. : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
  3852. defm SUST_B_1D_ARRAY_B16_CLAMP
  3853. : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
  3854. defm SUST_B_1D_ARRAY_B32_CLAMP
  3855. : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
  3856. defm SUST_B_1D_ARRAY_B64_CLAMP
  3857. : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
  3858. defm SUST_B_1D_ARRAY_B8_TRAP
  3859. : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
  3860. defm SUST_B_1D_ARRAY_B16_TRAP
  3861. : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
  3862. defm SUST_B_1D_ARRAY_B32_TRAP
  3863. : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
  3864. defm SUST_B_1D_ARRAY_B64_TRAP
  3865. : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
  3866. defm SUST_B_1D_ARRAY_B8_ZERO
  3867. : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
  3868. defm SUST_B_1D_ARRAY_B16_ZERO
  3869. : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
  3870. defm SUST_B_1D_ARRAY_B32_ZERO
  3871. : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
  3872. defm SUST_B_1D_ARRAY_B64_ZERO
  3873. : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
  3874. defm SUST_P_1D_ARRAY_B8_TRAP
  3875. : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
  3876. defm SUST_P_1D_ARRAY_B16_TRAP
  3877. : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
  3878. defm SUST_P_1D_ARRAY_B32_TRAP
  3879. : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
  3880. class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
  3881. : NVPTXInst<(outs),
  3882. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
  3883. intype:$r, intype:$g)),
  3884. inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
  3885. []>;
  3886. multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
  3887. def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
  3888. def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
  3889. }
  3890. defm SUST_B_1D_ARRAY_V2B8_CLAMP
  3891. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
  3892. defm SUST_B_1D_ARRAY_V2B16_CLAMP
  3893. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
  3894. defm SUST_B_1D_ARRAY_V2B32_CLAMP
  3895. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
  3896. defm SUST_B_1D_ARRAY_V2B64_CLAMP
  3897. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
  3898. defm SUST_B_1D_ARRAY_V2B8_TRAP
  3899. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
  3900. defm SUST_B_1D_ARRAY_V2B16_TRAP
  3901. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
  3902. defm SUST_B_1D_ARRAY_V2B32_TRAP
  3903. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
  3904. defm SUST_B_1D_ARRAY_V2B64_TRAP
  3905. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
  3906. defm SUST_B_1D_ARRAY_V2B8_ZERO
  3907. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
  3908. defm SUST_B_1D_ARRAY_V2B16_ZERO
  3909. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
  3910. defm SUST_B_1D_ARRAY_V2B32_ZERO
  3911. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
  3912. defm SUST_B_1D_ARRAY_V2B64_ZERO
  3913. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
  3914. defm SUST_P_1D_ARRAY_V2B8_TRAP
  3915. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
  3916. defm SUST_P_1D_ARRAY_V2B16_TRAP
  3917. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
  3918. defm SUST_P_1D_ARRAY_V2B32_TRAP
  3919. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
  3920. class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
  3921. : NVPTXInst<(outs),
  3922. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
  3923. intype:$r, intype:$g, intype:$b, intype:$a)),
  3924. inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
  3925. []>;
  3926. multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
  3927. def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
  3928. def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
  3929. }
  3930. defm SUST_B_1D_ARRAY_V4B8_CLAMP
  3931. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
  3932. defm SUST_B_1D_ARRAY_V4B16_CLAMP
  3933. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
  3934. defm SUST_B_1D_ARRAY_V4B32_CLAMP
  3935. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
  3936. defm SUST_B_1D_ARRAY_V4B8_TRAP
  3937. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
  3938. defm SUST_B_1D_ARRAY_V4B16_TRAP
  3939. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
  3940. defm SUST_B_1D_ARRAY_V4B32_TRAP
  3941. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
  3942. defm SUST_B_1D_ARRAY_V4B8_ZERO
  3943. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
  3944. defm SUST_B_1D_ARRAY_V4B16_ZERO
  3945. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
  3946. defm SUST_B_1D_ARRAY_V4B32_ZERO
  3947. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
  3948. defm SUST_P_1D_ARRAY_V4B8_TRAP
  3949. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
  3950. defm SUST_P_1D_ARRAY_V4B16_TRAP
  3951. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
  3952. defm SUST_P_1D_ARRAY_V4B32_TRAP
  3953. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
  3954. class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
  3955. : NVPTXInst<(outs),
  3956. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
  3957. inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
  3958. []>;
  3959. multiclass SUST_2D<string inst, NVPTXRegClass intype> {
  3960. def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
  3961. def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
  3962. }
  3963. defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
  3964. defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
  3965. defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
  3966. defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
  3967. defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
  3968. defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
  3969. defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
  3970. defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
  3971. defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
  3972. defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
  3973. defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
  3974. defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
  3975. defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
  3976. defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
  3977. defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
  3978. class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  3979. : NVPTXInst<(outs),
  3980. !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
  3981. intype:$r, intype:$g)),
  3982. inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
  3983. []>;
  3984. multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
  3985. def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  3986. def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
  3987. }
  3988. defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
  3989. defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
  3990. defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
  3991. defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
  3992. defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
  3993. defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
  3994. defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
  3995. defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
  3996. defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
  3997. defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
  3998. defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
  3999. defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
  4000. defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
  4001. defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
  4002. defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
  4003. class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4004. : NVPTXInst<(outs),
  4005. !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
  4006. intype:$r, intype:$g, intype:$b, intype:$a)),
  4007. inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
  4008. []>;
  4009. multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
  4010. def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4011. def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
  4012. }
  4013. defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
  4014. defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
  4015. defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
  4016. defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
  4017. defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
  4018. defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
  4019. defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
  4020. defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
  4021. defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
  4022. defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
  4023. defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
  4024. defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
  4025. class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
  4026. : NVPTXInst<(outs),
  4027. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4028. intype:$r)),
  4029. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
  4030. []>;
  4031. multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
  4032. def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
  4033. def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
  4034. }
  4035. defm SUST_B_2D_ARRAY_B8_CLAMP
  4036. : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
  4037. defm SUST_B_2D_ARRAY_B16_CLAMP
  4038. : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
  4039. defm SUST_B_2D_ARRAY_B32_CLAMP
  4040. : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
  4041. defm SUST_B_2D_ARRAY_B64_CLAMP
  4042. : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
  4043. defm SUST_B_2D_ARRAY_B8_TRAP
  4044. : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
  4045. defm SUST_B_2D_ARRAY_B16_TRAP
  4046. : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
  4047. defm SUST_B_2D_ARRAY_B32_TRAP
  4048. : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
  4049. defm SUST_B_2D_ARRAY_B64_TRAP
  4050. : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
  4051. defm SUST_B_2D_ARRAY_B8_ZERO
  4052. : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
  4053. defm SUST_B_2D_ARRAY_B16_ZERO
  4054. : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
  4055. defm SUST_B_2D_ARRAY_B32_ZERO
  4056. : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
  4057. defm SUST_B_2D_ARRAY_B64_ZERO
  4058. : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
  4059. defm SUST_P_2D_ARRAY_B8_TRAP
  4060. : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
  4061. defm SUST_P_2D_ARRAY_B16_TRAP
  4062. : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
  4063. defm SUST_P_2D_ARRAY_B32_TRAP
  4064. : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
  4065. class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4066. : NVPTXInst<(outs),
  4067. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4068. intype:$r, intype:$g)),
  4069. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
  4070. []>;
  4071. multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
  4072. def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4073. def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
  4074. }
  4075. defm SUST_B_2D_ARRAY_V2B8_CLAMP
  4076. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
  4077. defm SUST_B_2D_ARRAY_V2B16_CLAMP
  4078. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
  4079. defm SUST_B_2D_ARRAY_V2B32_CLAMP
  4080. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
  4081. defm SUST_B_2D_ARRAY_V2B64_CLAMP
  4082. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
  4083. defm SUST_B_2D_ARRAY_V2B8_TRAP
  4084. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
  4085. defm SUST_B_2D_ARRAY_V2B16_TRAP
  4086. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
  4087. defm SUST_B_2D_ARRAY_V2B32_TRAP
  4088. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
  4089. defm SUST_B_2D_ARRAY_V2B64_TRAP
  4090. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
  4091. defm SUST_B_2D_ARRAY_V2B8_ZERO
  4092. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
  4093. defm SUST_B_2D_ARRAY_V2B16_ZERO
  4094. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
  4095. defm SUST_B_2D_ARRAY_V2B32_ZERO
  4096. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
  4097. defm SUST_B_2D_ARRAY_V2B64_ZERO
  4098. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
  4099. defm SUST_P_2D_ARRAY_V2B8_TRAP
  4100. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
  4101. defm SUST_P_2D_ARRAY_V2B16_TRAP
  4102. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
  4103. defm SUST_P_2D_ARRAY_V2B32_TRAP
  4104. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
  4105. class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4106. : NVPTXInst<(outs),
  4107. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4108. intype:$r, intype:$g, intype:$b, intype:$a)),
  4109. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
  4110. []>;
  4111. multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
  4112. def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4113. def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
  4114. }
  4115. defm SUST_B_2D_ARRAY_V4B8_CLAMP
  4116. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
  4117. defm SUST_B_2D_ARRAY_V4B16_CLAMP
  4118. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
  4119. defm SUST_B_2D_ARRAY_V4B32_CLAMP
  4120. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
  4121. defm SUST_B_2D_ARRAY_V4B8_TRAP
  4122. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
  4123. defm SUST_B_2D_ARRAY_V4B16_TRAP
  4124. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
  4125. defm SUST_B_2D_ARRAY_V4B32_TRAP
  4126. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
  4127. defm SUST_B_2D_ARRAY_V4B8_ZERO
  4128. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
  4129. defm SUST_B_2D_ARRAY_V4B16_ZERO
  4130. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
  4131. defm SUST_B_2D_ARRAY_V4B32_ZERO
  4132. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
  4133. defm SUST_P_2D_ARRAY_V4B8_TRAP
  4134. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
  4135. defm SUST_P_2D_ARRAY_V4B16_TRAP
  4136. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
  4137. defm SUST_P_2D_ARRAY_V4B32_TRAP
  4138. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
  4139. class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
  4140. : NVPTXInst<(outs),
  4141. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4142. intype:$r)),
  4143. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
  4144. []>;
  4145. multiclass SUST_3D<string inst, NVPTXRegClass intype> {
  4146. def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
  4147. def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
  4148. }
  4149. defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
  4150. defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
  4151. defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
  4152. defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
  4153. defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
  4154. defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
  4155. defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
  4156. defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
  4157. defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
  4158. defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
  4159. defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
  4160. defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
  4161. defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
  4162. defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
  4163. defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
  4164. class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4165. : NVPTXInst<(outs),
  4166. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4167. intype:$r, intype:$g)),
  4168. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
  4169. []>;
  4170. multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
  4171. def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4172. def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
  4173. }
  4174. defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
  4175. defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
  4176. defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
  4177. defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
  4178. defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
  4179. defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
  4180. defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
  4181. defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
  4182. defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
  4183. defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
  4184. defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
  4185. defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
  4186. defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
  4187. defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
  4188. defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
  4189. class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4190. : NVPTXInst<(outs),
  4191. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4192. intype:$r, intype:$g, intype:$b, intype:$a)),
  4193. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
  4194. []>;
  4195. multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
  4196. def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4197. def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
  4198. }
  4199. defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
  4200. defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
  4201. defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
  4202. defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
  4203. defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
  4204. defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
  4205. defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
  4206. defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
  4207. defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
  4208. defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
  4209. defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
  4210. defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
  4211. }
  4212. // Surface store instruction patterns
  4213. // I'm not sure why we can't just include these in the instruction definitions,
  4214. // but TableGen complains of type errors :(
  4215. // .clamp variant
  4216. def : Pat<(int_nvvm_sust_b_1d_i8_clamp
  4217. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4218. (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4219. def : Pat<(int_nvvm_sust_b_1d_i16_clamp
  4220. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4221. (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4222. def : Pat<(int_nvvm_sust_b_1d_i32_clamp
  4223. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4224. (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4225. def : Pat<(int_nvvm_sust_b_1d_i64_clamp
  4226. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4227. (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4228. def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
  4229. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4230. (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4231. Int16Regs:$r, Int16Regs:$g)>;
  4232. def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
  4233. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4234. (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4235. Int16Regs:$r, Int16Regs:$g)>;
  4236. def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
  4237. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4238. (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4239. Int32Regs:$r, Int32Regs:$g)>;
  4240. def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
  4241. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4242. (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4243. Int64Regs:$r, Int64Regs:$g)>;
  4244. def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
  4245. Int64Regs:$s, Int32Regs:$x,
  4246. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4247. (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4248. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4249. def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
  4250. Int64Regs:$s, Int32Regs:$x,
  4251. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4252. (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4253. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4254. def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
  4255. Int64Regs:$s, Int32Regs:$x,
  4256. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4257. (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4258. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4259. def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
  4260. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4261. (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4262. Int16Regs:$r)>;
  4263. def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
  4264. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4265. (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4266. Int16Regs:$r)>;
  4267. def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
  4268. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4269. (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4270. Int32Regs:$r)>;
  4271. def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
  4272. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4273. (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4274. Int64Regs:$r)>;
  4275. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
  4276. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4277. (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4278. Int16Regs:$r, Int16Regs:$g)>;
  4279. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
  4280. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4281. (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4282. Int16Regs:$r, Int16Regs:$g)>;
  4283. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
  4284. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4285. (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4286. Int32Regs:$r, Int32Regs:$g)>;
  4287. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
  4288. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4289. (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4290. Int64Regs:$r, Int64Regs:$g)>;
  4291. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
  4292. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4293. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4294. (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4295. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4296. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
  4297. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4298. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4299. (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4300. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4301. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
  4302. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4303. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4304. (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4305. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4306. def : Pat<(int_nvvm_sust_b_2d_i8_clamp
  4307. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4308. (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4309. Int16Regs:$r)>;
  4310. def : Pat<(int_nvvm_sust_b_2d_i16_clamp
  4311. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4312. (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4313. Int16Regs:$r)>;
  4314. def : Pat<(int_nvvm_sust_b_2d_i32_clamp
  4315. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4316. (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4317. Int32Regs:$r)>;
  4318. def : Pat<(int_nvvm_sust_b_2d_i64_clamp
  4319. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4320. (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4321. Int64Regs:$r)>;
  4322. def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
  4323. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4324. (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4325. Int16Regs:$r, Int16Regs:$g)>;
  4326. def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
  4327. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4328. (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4329. Int16Regs:$r, Int16Regs:$g)>;
  4330. def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
  4331. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  4332. (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4333. Int32Regs:$r, Int32Regs:$g)>;
  4334. def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
  4335. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  4336. (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4337. Int64Regs:$r, Int64Regs:$g)>;
  4338. def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
  4339. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4340. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4341. (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4342. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4343. def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
  4344. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4345. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4346. (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4347. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4348. def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
  4349. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4350. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4351. (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4352. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4353. def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
  4354. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4355. (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
  4356. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4357. Int16Regs:$r)>;
  4358. def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
  4359. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4360. (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
  4361. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4362. Int16Regs:$r)>;
  4363. def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
  4364. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4365. (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
  4366. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4367. Int32Regs:$r)>;
  4368. def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
  4369. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4370. (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
  4371. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4372. Int64Regs:$r)>;
  4373. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
  4374. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4375. Int16Regs:$r, Int16Regs:$g),
  4376. (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4377. Int32Regs:$x, Int32Regs:$y,
  4378. Int16Regs:$r, Int16Regs:$g)>;
  4379. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
  4380. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4381. Int16Regs:$r, Int16Regs:$g),
  4382. (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4383. Int32Regs:$x, Int32Regs:$y,
  4384. Int16Regs:$r, Int16Regs:$g)>;
  4385. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
  4386. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  4387. Int32Regs:$g),
  4388. (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4389. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  4390. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
  4391. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  4392. Int64Regs:$g),
  4393. (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4394. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  4395. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
  4396. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4397. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4398. (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
  4399. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4400. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4401. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
  4402. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4403. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4404. (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
  4405. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4406. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4407. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
  4408. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4409. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4410. (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4411. Int32Regs:$x, Int32Regs:$y,
  4412. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4413. def : Pat<(int_nvvm_sust_b_3d_i8_clamp
  4414. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4415. Int16Regs:$r),
  4416. (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
  4417. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4418. Int16Regs:$r)>;
  4419. def : Pat<(int_nvvm_sust_b_3d_i16_clamp
  4420. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4421. Int16Regs:$r),
  4422. (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
  4423. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4424. Int16Regs:$r)>;
  4425. def : Pat<(int_nvvm_sust_b_3d_i32_clamp
  4426. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4427. Int32Regs:$r),
  4428. (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
  4429. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4430. Int32Regs:$r)>;
  4431. def : Pat<(int_nvvm_sust_b_3d_i64_clamp
  4432. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4433. Int64Regs:$r),
  4434. (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
  4435. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4436. Int64Regs:$r)>;
  4437. def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
  4438. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4439. Int16Regs:$r, Int16Regs:$g),
  4440. (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
  4441. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4442. Int16Regs:$r, Int16Regs:$g)>;
  4443. def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
  4444. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4445. Int16Regs:$r, Int16Regs:$g),
  4446. (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
  4447. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4448. Int16Regs:$r, Int16Regs:$g)>;
  4449. def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
  4450. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4451. Int32Regs:$r, Int32Regs:$g),
  4452. (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
  4453. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4454. Int32Regs:$r, Int32Regs:$g)>;
  4455. def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
  4456. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4457. Int64Regs:$r, Int64Regs:$g),
  4458. (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
  4459. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4460. Int64Regs:$r, Int64Regs:$g)>;
  4461. def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
  4462. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4463. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4464. (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
  4465. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4466. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4467. def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
  4468. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4469. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4470. (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
  4471. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4472. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4473. def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
  4474. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4475. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4476. (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
  4477. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4478. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4479. // .trap variant
  4480. def : Pat<(int_nvvm_sust_b_1d_i8_trap
  4481. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4482. (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4483. def : Pat<(int_nvvm_sust_b_1d_i16_trap
  4484. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4485. (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4486. def : Pat<(int_nvvm_sust_b_1d_i32_trap
  4487. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4488. (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4489. def : Pat<(int_nvvm_sust_b_1d_i64_trap
  4490. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4491. (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4492. def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
  4493. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4494. (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4495. Int16Regs:$r, Int16Regs:$g)>;
  4496. def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
  4497. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4498. (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4499. Int16Regs:$r, Int16Regs:$g)>;
  4500. def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
  4501. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4502. (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4503. Int32Regs:$r, Int32Regs:$g)>;
  4504. def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
  4505. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4506. (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4507. Int64Regs:$r, Int64Regs:$g)>;
  4508. def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
  4509. Int64Regs:$s, Int32Regs:$x,
  4510. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4511. (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4512. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4513. def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
  4514. Int64Regs:$s, Int32Regs:$x,
  4515. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4516. (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4517. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4518. def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
  4519. Int64Regs:$s, Int32Regs:$x,
  4520. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4521. (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4522. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4523. def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
  4524. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4525. (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4526. Int16Regs:$r)>;
  4527. def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
  4528. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4529. (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4530. Int16Regs:$r)>;
  4531. def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
  4532. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4533. (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4534. Int32Regs:$r)>;
  4535. def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
  4536. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4537. (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4538. Int64Regs:$r)>;
  4539. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
  4540. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4541. (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4542. Int16Regs:$r, Int16Regs:$g)>;
  4543. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
  4544. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4545. (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4546. Int16Regs:$r, Int16Regs:$g)>;
  4547. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
  4548. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4549. (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4550. Int32Regs:$r, Int32Regs:$g)>;
  4551. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
  4552. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4553. (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4554. Int64Regs:$r, Int64Regs:$g)>;
  4555. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
  4556. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4557. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4558. (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4559. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4560. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
  4561. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4562. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4563. (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4564. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4565. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
  4566. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4567. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4568. (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4569. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4570. def : Pat<(int_nvvm_sust_b_2d_i8_trap
  4571. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4572. (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4573. Int16Regs:$r)>;
  4574. def : Pat<(int_nvvm_sust_b_2d_i16_trap
  4575. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4576. (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4577. Int16Regs:$r)>;
  4578. def : Pat<(int_nvvm_sust_b_2d_i32_trap
  4579. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4580. (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4581. Int32Regs:$r)>;
  4582. def : Pat<(int_nvvm_sust_b_2d_i64_trap
  4583. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4584. (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4585. Int64Regs:$r)>;
  4586. def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
  4587. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4588. (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4589. Int16Regs:$r, Int16Regs:$g)>;
  4590. def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
  4591. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4592. (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4593. Int16Regs:$r, Int16Regs:$g)>;
  4594. def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
  4595. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  4596. (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4597. Int32Regs:$r, Int32Regs:$g)>;
  4598. def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
  4599. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  4600. (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4601. Int64Regs:$r, Int64Regs:$g)>;
  4602. def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
  4603. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4604. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4605. (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4606. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4607. def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
  4608. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4609. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4610. (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4611. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4612. def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
  4613. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4614. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4615. (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4616. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4617. def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
  4618. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4619. (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
  4620. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4621. Int16Regs:$r)>;
  4622. def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
  4623. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4624. (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
  4625. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4626. Int16Regs:$r)>;
  4627. def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
  4628. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4629. (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
  4630. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4631. Int32Regs:$r)>;
  4632. def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
  4633. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4634. (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
  4635. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4636. Int64Regs:$r)>;
  4637. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
  4638. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4639. Int16Regs:$r, Int16Regs:$g),
  4640. (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4641. Int32Regs:$x, Int32Regs:$y,
  4642. Int16Regs:$r, Int16Regs:$g)>;
  4643. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
  4644. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4645. Int16Regs:$r, Int16Regs:$g),
  4646. (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4647. Int32Regs:$x, Int32Regs:$y,
  4648. Int16Regs:$r, Int16Regs:$g)>;
  4649. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
  4650. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  4651. Int32Regs:$g),
  4652. (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4653. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  4654. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
  4655. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  4656. Int64Regs:$g),
  4657. (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4658. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  4659. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
  4660. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4661. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4662. (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
  4663. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4664. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4665. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
  4666. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4667. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4668. (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
  4669. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4670. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4671. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
  4672. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4673. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4674. (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4675. Int32Regs:$x, Int32Regs:$y,
  4676. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4677. def : Pat<(int_nvvm_sust_b_3d_i8_trap
  4678. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4679. Int16Regs:$r),
  4680. (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
  4681. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4682. Int16Regs:$r)>;
  4683. def : Pat<(int_nvvm_sust_b_3d_i16_trap
  4684. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4685. Int16Regs:$r),
  4686. (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
  4687. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4688. Int16Regs:$r)>;
  4689. def : Pat<(int_nvvm_sust_b_3d_i32_trap
  4690. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4691. Int32Regs:$r),
  4692. (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
  4693. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4694. Int32Regs:$r)>;
  4695. def : Pat<(int_nvvm_sust_b_3d_i64_trap
  4696. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4697. Int64Regs:$r),
  4698. (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
  4699. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4700. Int64Regs:$r)>;
  4701. def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
  4702. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4703. Int16Regs:$r, Int16Regs:$g),
  4704. (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
  4705. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4706. Int16Regs:$r, Int16Regs:$g)>;
  4707. def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
  4708. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4709. Int16Regs:$r, Int16Regs:$g),
  4710. (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
  4711. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4712. Int16Regs:$r, Int16Regs:$g)>;
  4713. def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
  4714. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4715. Int32Regs:$r, Int32Regs:$g),
  4716. (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
  4717. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4718. Int32Regs:$r, Int32Regs:$g)>;
  4719. def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
  4720. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4721. Int64Regs:$r, Int64Regs:$g),
  4722. (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
  4723. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4724. Int64Regs:$r, Int64Regs:$g)>;
  4725. def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
  4726. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4727. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4728. (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
  4729. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4730. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4731. def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
  4732. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4733. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4734. (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
  4735. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4736. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4737. def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
  4738. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4739. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4740. (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
  4741. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4742. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4743. // .zero variant
  4744. def : Pat<(int_nvvm_sust_b_1d_i8_zero
  4745. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4746. (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4747. def : Pat<(int_nvvm_sust_b_1d_i16_zero
  4748. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4749. (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4750. def : Pat<(int_nvvm_sust_b_1d_i32_zero
  4751. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4752. (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4753. def : Pat<(int_nvvm_sust_b_1d_i64_zero
  4754. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4755. (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4756. def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
  4757. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4758. (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4759. Int16Regs:$r, Int16Regs:$g)>;
  4760. def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
  4761. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4762. (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4763. Int16Regs:$r, Int16Regs:$g)>;
  4764. def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
  4765. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4766. (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4767. Int32Regs:$r, Int32Regs:$g)>;
  4768. def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
  4769. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4770. (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4771. Int64Regs:$r, Int64Regs:$g)>;
  4772. def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
  4773. Int64Regs:$s, Int32Regs:$x,
  4774. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4775. (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4776. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4777. def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
  4778. Int64Regs:$s, Int32Regs:$x,
  4779. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4780. (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4781. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4782. def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
  4783. Int64Regs:$s, Int32Regs:$x,
  4784. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4785. (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4786. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4787. def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
  4788. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4789. (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4790. Int16Regs:$r)>;
  4791. def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
  4792. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4793. (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4794. Int16Regs:$r)>;
  4795. def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
  4796. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4797. (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4798. Int32Regs:$r)>;
  4799. def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
  4800. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4801. (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4802. Int64Regs:$r)>;
  4803. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
  4804. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4805. (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4806. Int16Regs:$r, Int16Regs:$g)>;
  4807. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
  4808. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4809. (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4810. Int16Regs:$r, Int16Regs:$g)>;
  4811. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
  4812. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4813. (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4814. Int32Regs:$r, Int32Regs:$g)>;
  4815. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
  4816. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4817. (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4818. Int64Regs:$r, Int64Regs:$g)>;
  4819. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
  4820. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4821. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4822. (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4823. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4824. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
  4825. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4826. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4827. (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4828. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4829. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
  4830. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4831. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4832. (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4833. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4834. def : Pat<(int_nvvm_sust_b_2d_i8_zero
  4835. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4836. (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4837. Int16Regs:$r)>;
  4838. def : Pat<(int_nvvm_sust_b_2d_i16_zero
  4839. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4840. (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4841. Int16Regs:$r)>;
  4842. def : Pat<(int_nvvm_sust_b_2d_i32_zero
  4843. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4844. (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4845. Int32Regs:$r)>;
  4846. def : Pat<(int_nvvm_sust_b_2d_i64_zero
  4847. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4848. (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4849. Int64Regs:$r)>;
  4850. def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
  4851. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4852. (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4853. Int16Regs:$r, Int16Regs:$g)>;
  4854. def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
  4855. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4856. (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4857. Int16Regs:$r, Int16Regs:$g)>;
  4858. def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
  4859. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  4860. (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4861. Int32Regs:$r, Int32Regs:$g)>;
  4862. def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
  4863. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  4864. (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4865. Int64Regs:$r, Int64Regs:$g)>;
  4866. def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
  4867. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4868. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4869. (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4870. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4871. def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
  4872. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4873. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4874. (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4875. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4876. def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
  4877. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4878. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4879. (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4880. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4881. def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
  4882. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4883. (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
  4884. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4885. Int16Regs:$r)>;
  4886. def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
  4887. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4888. (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
  4889. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4890. Int16Regs:$r)>;
  4891. def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
  4892. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4893. (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
  4894. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4895. Int32Regs:$r)>;
  4896. def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
  4897. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4898. (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
  4899. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4900. Int64Regs:$r)>;
  4901. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
  4902. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4903. Int16Regs:$r, Int16Regs:$g),
  4904. (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
  4905. Int32Regs:$x, Int32Regs:$y,
  4906. Int16Regs:$r, Int16Regs:$g)>;
  4907. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
  4908. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4909. Int16Regs:$r, Int16Regs:$g),
  4910. (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
  4911. Int32Regs:$x, Int32Regs:$y,
  4912. Int16Regs:$r, Int16Regs:$g)>;
  4913. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
  4914. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  4915. Int32Regs:$g),
  4916. (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
  4917. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  4918. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
  4919. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  4920. Int64Regs:$g),
  4921. (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
  4922. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  4923. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
  4924. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4925. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4926. (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
  4927. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4928. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4929. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
  4930. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4931. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4932. (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
  4933. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4934. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4935. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
  4936. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4937. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4938. (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
  4939. Int32Regs:$x, Int32Regs:$y,
  4940. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4941. def : Pat<(int_nvvm_sust_b_3d_i8_zero
  4942. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4943. Int16Regs:$r),
  4944. (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
  4945. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4946. Int16Regs:$r)>;
  4947. def : Pat<(int_nvvm_sust_b_3d_i16_zero
  4948. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4949. Int16Regs:$r),
  4950. (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
  4951. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4952. Int16Regs:$r)>;
  4953. def : Pat<(int_nvvm_sust_b_3d_i32_zero
  4954. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4955. Int32Regs:$r),
  4956. (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
  4957. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4958. Int32Regs:$r)>;
  4959. def : Pat<(int_nvvm_sust_b_3d_i64_zero
  4960. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4961. Int64Regs:$r),
  4962. (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
  4963. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4964. Int64Regs:$r)>;
  4965. def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
  4966. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4967. Int16Regs:$r, Int16Regs:$g),
  4968. (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
  4969. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4970. Int16Regs:$r, Int16Regs:$g)>;
  4971. def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
  4972. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4973. Int16Regs:$r, Int16Regs:$g),
  4974. (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
  4975. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4976. Int16Regs:$r, Int16Regs:$g)>;
  4977. def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
  4978. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4979. Int32Regs:$r, Int32Regs:$g),
  4980. (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
  4981. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4982. Int32Regs:$r, Int32Regs:$g)>;
  4983. def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
  4984. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4985. Int64Regs:$r, Int64Regs:$g),
  4986. (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
  4987. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4988. Int64Regs:$r, Int64Regs:$g)>;
  4989. def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
  4990. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4991. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4992. (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
  4993. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4994. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4995. def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
  4996. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4997. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4998. (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
  4999. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5000. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5001. def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
  5002. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5003. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5004. (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
  5005. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5006. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5007. def : Pat<(int_nvvm_sust_p_1d_i8_trap
  5008. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  5009. (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  5010. def : Pat<(int_nvvm_sust_p_1d_i16_trap
  5011. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  5012. (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  5013. def : Pat<(int_nvvm_sust_p_1d_i32_trap
  5014. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  5015. (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  5016. def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
  5017. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5018. (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5019. Int16Regs:$r, Int16Regs:$g)>;
  5020. def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
  5021. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5022. (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5023. Int16Regs:$r, Int16Regs:$g)>;
  5024. def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
  5025. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  5026. (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5027. Int32Regs:$r, Int32Regs:$g)>;
  5028. def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
  5029. Int64Regs:$s, Int32Regs:$x,
  5030. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5031. (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5032. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5033. def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
  5034. Int64Regs:$s, Int32Regs:$x,
  5035. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5036. (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5037. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5038. def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
  5039. Int64Regs:$s, Int32Regs:$x,
  5040. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5041. (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5042. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5043. def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
  5044. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  5045. (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5046. Int16Regs:$r)>;
  5047. def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
  5048. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  5049. (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5050. Int16Regs:$r)>;
  5051. def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
  5052. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  5053. (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5054. Int32Regs:$r)>;
  5055. def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
  5056. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5057. (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5058. Int16Regs:$r, Int16Regs:$g)>;
  5059. def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
  5060. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5061. (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5062. Int16Regs:$r, Int16Regs:$g)>;
  5063. def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
  5064. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  5065. (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5066. Int32Regs:$r, Int32Regs:$g)>;
  5067. def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
  5068. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5069. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5070. (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5071. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5072. def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
  5073. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5074. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5075. (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5076. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5077. def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
  5078. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5079. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5080. (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5081. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5082. def : Pat<(int_nvvm_sust_p_2d_i8_trap
  5083. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5084. (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5085. Int16Regs:$r)>;
  5086. def : Pat<(int_nvvm_sust_p_2d_i16_trap
  5087. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5088. (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5089. Int16Regs:$r)>;
  5090. def : Pat<(int_nvvm_sust_p_2d_i32_trap
  5091. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5092. (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5093. Int32Regs:$r)>;
  5094. def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
  5095. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5096. (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5097. Int16Regs:$r, Int16Regs:$g)>;
  5098. def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
  5099. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5100. (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5101. Int16Regs:$r, Int16Regs:$g)>;
  5102. def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
  5103. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  5104. (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5105. Int32Regs:$r, Int32Regs:$g)>;
  5106. def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
  5107. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5108. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5109. (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5110. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5111. def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
  5112. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5113. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5114. (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5115. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5116. def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
  5117. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5118. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5119. (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5120. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5121. def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
  5122. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5123. (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
  5124. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5125. Int16Regs:$r)>;
  5126. def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
  5127. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5128. (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
  5129. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5130. Int16Regs:$r)>;
  5131. def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
  5132. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5133. (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
  5134. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5135. Int32Regs:$r)>;
  5136. def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
  5137. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5138. Int16Regs:$r, Int16Regs:$g),
  5139. (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5140. Int32Regs:$x, Int32Regs:$y,
  5141. Int16Regs:$r, Int16Regs:$g)>;
  5142. def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
  5143. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5144. Int16Regs:$r, Int16Regs:$g),
  5145. (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5146. Int32Regs:$x, Int32Regs:$y,
  5147. Int16Regs:$r, Int16Regs:$g)>;
  5148. def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
  5149. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  5150. Int32Regs:$g),
  5151. (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5152. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  5153. def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
  5154. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5155. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5156. (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
  5157. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5158. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5159. def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
  5160. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5161. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5162. (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
  5163. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5164. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5165. def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
  5166. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5167. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5168. (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5169. Int32Regs:$x, Int32Regs:$y,
  5170. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5171. def : Pat<(int_nvvm_sust_p_3d_i8_trap
  5172. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5173. Int16Regs:$r),
  5174. (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
  5175. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5176. Int16Regs:$r)>;
  5177. def : Pat<(int_nvvm_sust_p_3d_i16_trap
  5178. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5179. Int16Regs:$r),
  5180. (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
  5181. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5182. Int16Regs:$r)>;
  5183. def : Pat<(int_nvvm_sust_p_3d_i32_trap
  5184. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5185. Int32Regs:$r),
  5186. (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
  5187. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5188. Int32Regs:$r)>;
  5189. def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
  5190. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5191. Int16Regs:$r, Int16Regs:$g),
  5192. (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
  5193. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5194. Int16Regs:$r, Int16Regs:$g)>;
  5195. def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
  5196. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5197. Int16Regs:$r, Int16Regs:$g),
  5198. (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
  5199. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5200. Int16Regs:$r, Int16Regs:$g)>;
  5201. def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
  5202. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5203. Int32Regs:$r, Int32Regs:$g),
  5204. (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
  5205. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5206. Int32Regs:$r, Int32Regs:$g)>;
  5207. def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
  5208. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5209. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5210. (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
  5211. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5212. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5213. def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
  5214. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5215. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5216. (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
  5217. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5218. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5219. def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
  5220. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5221. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5222. (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
  5223. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5224. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5225. //-----------------------------------
  5226. // Read Special Registers
  5227. //-----------------------------------
  5228. class PTX_READ_SREG_R64<string regname, Intrinsic intop>
  5229. : NVPTXInst<(outs Int64Regs:$d), (ins),
  5230. !strconcat("mov.u64 \t$d, %", regname, ";"),
  5231. [(set Int64Regs:$d, (intop))]>;
  5232. class PTX_READ_SREG_R32<string regname, Intrinsic intop>
  5233. : NVPTXInst<(outs Int32Regs:$d), (ins),
  5234. !strconcat("mov.u32 \t$d, %", regname, ";"),
  5235. [(set Int32Regs:$d, (intop))]>;
  5236. // TODO Add read vector-version of special registers
  5237. def INT_PTX_SREG_TID_X :
  5238. PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
  5239. def INT_PTX_SREG_TID_Y :
  5240. PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
  5241. def INT_PTX_SREG_TID_Z :
  5242. PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
  5243. def INT_PTX_SREG_TID_W :
  5244. PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
  5245. def INT_PTX_SREG_NTID_X :
  5246. PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
  5247. def INT_PTX_SREG_NTID_Y :
  5248. PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
  5249. def INT_PTX_SREG_NTID_Z :
  5250. PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
  5251. def INT_PTX_SREG_NTID_W :
  5252. PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
  5253. def INT_PTX_SREG_LANEID :
  5254. PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
  5255. def INT_PTX_SREG_WARPID :
  5256. PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
  5257. def INT_PTX_SREG_NWARPID :
  5258. PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
  5259. def INT_PTX_SREG_CTAID_X :
  5260. PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
  5261. def INT_PTX_SREG_CTAID_Y :
  5262. PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
  5263. def INT_PTX_SREG_CTAID_Z :
  5264. PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
  5265. def INT_PTX_SREG_CTAID_W :
  5266. PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
  5267. def INT_PTX_SREG_NCTAID_X :
  5268. PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
  5269. def INT_PTX_SREG_NCTAID_Y :
  5270. PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
  5271. def INT_PTX_SREG_NCTAID_Z :
  5272. PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
  5273. def INT_PTX_SREG_NCTAID_W :
  5274. PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
  5275. def INT_PTX_SREG_SMID :
  5276. PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
  5277. def INT_PTX_SREG_NSMID :
  5278. PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
  5279. def INT_PTX_SREG_GRIDID :
  5280. PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
  5281. def INT_PTX_SREG_LANEMASK_EQ :
  5282. PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
  5283. def INT_PTX_SREG_LANEMASK_LE :
  5284. PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
  5285. def INT_PTX_SREG_LANEMASK_LT :
  5286. PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
  5287. def INT_PTX_SREG_LANEMASK_GE :
  5288. PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
  5289. def INT_PTX_SREG_LANEMASK_GT :
  5290. PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
  5291. def INT_PTX_SREG_CLOCK :
  5292. PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
  5293. def INT_PTX_SREG_CLOCK64 :
  5294. PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
  5295. def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
  5296. def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
  5297. def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
  5298. def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
  5299. // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
  5300. // handle the constant.
  5301. def INT_PTX_SREG_WARPSIZE :
  5302. NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
  5303. [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
  5304. // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
  5305. // In addition to target-independent fields provided by WMMA_REGS, it adds
  5306. // the fields commonly used to implement specific PTX instruction -- register
  5307. // types and names, constraints, parts of assembly, etc.
  5308. class WMMA_REGINFO<WMMA_REGS r, string op>
  5309. : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
  5310. // NVPTX register types used to carry fragment data.
  5311. NVPTXRegClass regclass = !cond(
  5312. !eq(ptx_elt_type, "f16") : Float16x2Regs,
  5313. !eq(ptx_elt_type, "f32") : Float32Regs,
  5314. !eq(ptx_elt_type, "f64") : Float64Regs,
  5315. !eq(ptx_elt_type, "bf16") : Int32Regs,
  5316. !eq(ptx_elt_type, "tf32") : Int32Regs,
  5317. !eq(ptx_elt_type, "s32") : Int32Regs,
  5318. !eq(ptx_elt_type, "b16") : Int32Regs,
  5319. !eq(ptx_elt_type, "s8") : Int32Regs,
  5320. !eq(ptx_elt_type, "u8") : Int32Regs,
  5321. !eq(ptx_elt_type, "s4") : Int32Regs,
  5322. !eq(ptx_elt_type, "u4") : Int32Regs,
  5323. !eq(ptx_elt_type, "b1") : Int32Regs);
  5324. // Instruction input/output arguments for the fragment.
  5325. list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
  5326. // List of register names for the fragment -- ["ra0", "ra1",...]
  5327. list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
  5328. // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
  5329. string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
  5330. // Predicates for particular fragment variant. Technically those are
  5331. // per-instruction predicates, but currently all fragments that can be used in
  5332. // a given instruction are subject to the same constraints, so an instruction
  5333. // can use predicates from any of its fragments. If/when this is no
  5334. // longer the case, we can concat all per-fragment predicates to enforce that
  5335. // all fragments of the instruction are viable.
  5336. list<Predicate> Predicates = !cond(
  5337. // fp16 -> fp16/fp32 @ m16n16k16
  5338. !and(!eq(geom, "m16n16k16"),
  5339. !or(!eq(ptx_elt_type, "f16"),
  5340. !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
  5341. !and(!eq(geom,"m8n8k4"),
  5342. !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
  5343. // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
  5344. !and(!or(!eq(geom, "m8n32k16"),
  5345. !eq(geom, "m32n8k16")),
  5346. !or(!eq(ptx_elt_type, "f16"),
  5347. !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
  5348. // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
  5349. !and(!or(!eq(geom,"m16n16k16"),
  5350. !eq(geom,"m8n32k16"),
  5351. !eq(geom,"m32n8k16")),
  5352. !or(!eq(ptx_elt_type, "u8"),
  5353. !eq(ptx_elt_type, "s8"),
  5354. !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
  5355. !and(!or(!eq(geom,"m16n16k16"),
  5356. !eq(geom,"m8n32k16"),
  5357. !eq(geom,"m32n8k16")),
  5358. !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
  5359. !and(!eq(geom,"m16n16k8"),
  5360. !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
  5361. !and(!eq(geom,"m16n16k8"),
  5362. !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
  5363. // b1 -> s32 @ m8n8k128(b1)
  5364. !and(!ne(op,"mma"),
  5365. !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
  5366. // u4/s4 -> s32 @ m8n8k32 (u4/s4)
  5367. !and(!ne(op,"mma"),
  5368. !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
  5369. !or(!eq(geom,"m16n8k8"),
  5370. !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
  5371. !and(!ne(ptx_elt_type,"f64"),
  5372. !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
  5373. // mma m8n8k32 requires higher PTX version
  5374. !and(!eq(op,"mma"),
  5375. !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
  5376. !and(!eq(ptx_elt_type,"f64"),
  5377. !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
  5378. !and(!eq(op,"mma"),
  5379. !or(!eq(geom, "m16n8k16"),
  5380. !eq(geom, "m16n8k4"),
  5381. !eq(geom, "m16n8k32"),
  5382. !eq(geom, "m16n8k64"),
  5383. !eq(geom, "m8n8k128"),
  5384. !eq(geom, "m16n8k128"),
  5385. !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
  5386. !and(!eq(op,"ldmatrix"),
  5387. !eq(ptx_elt_type,"b16"),
  5388. !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
  5389. // template DAGs for instruction inputs/output.
  5390. dag Outs = !dag(outs, ptx_regs, reg_names);
  5391. dag Ins = !dag(ins, ptx_regs, reg_names);
  5392. }
  5393. // Convert dag of arguments into a dag to match given intrinsic.
  5394. class BuildPatternI<Intrinsic Intr, dag Ins> {
  5395. // Build a dag pattern that matches the intrinsic call.
  5396. dag ret = !foreach(tmp, Ins,
  5397. !subst(imem, ADDRvar,
  5398. !subst(MEMri64, ADDRri64,
  5399. !subst(MEMri, ADDRri,
  5400. !subst(ins, Intr, tmp)))));
  5401. }
  5402. // Same as above, but uses PatFrag instead of an Intrinsic.
  5403. class BuildPatternPF<PatFrag Intr, dag Ins> {
  5404. // Build a dag pattern that matches the intrinsic call.
  5405. dag ret = !foreach(tmp, Ins,
  5406. !subst(imem, ADDRvar,
  5407. !subst(MEMri64, ADDRri64,
  5408. !subst(MEMri, ADDRri,
  5409. !subst(ins, Intr, tmp)))));
  5410. }
  5411. // Common WMMA-related fields used for building patterns for all MMA instructions.
  5412. class WMMA_INSTR<string _Intr, list<dag> _Args>
  5413. : NVPTXInst<(outs), (ins), "?", []> {
  5414. Intrinsic Intr = !cast<Intrinsic>(_Intr);
  5415. // Concatenate all arguments into a single dag.
  5416. dag Args = !foldl((ins), _Args, a, b, !con(a,b));
  5417. // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
  5418. dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
  5419. }
  5420. //
  5421. // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
  5422. //
  5423. class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
  5424. DAGOperand SrcOp>
  5425. : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
  5426. [!con((ins SrcOp:$src),
  5427. !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
  5428. Requires<Frag.Predicates> {
  5429. // Load/store intrinsics are overloaded on pointer's address space.
  5430. // To match the right intrinsic, we need to build AS-constrained PatFrag.
  5431. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
  5432. dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
  5433. dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
  5434. // Build PatFrag that only matches particular address space.
  5435. PatFrag IntrFrag = PatFrag<PFOperands,
  5436. PFOperandsIntr,
  5437. !cond(!eq(Space, ".shared"): AS_match.shared,
  5438. !eq(Space, ".global"): AS_match.global,
  5439. true: AS_match.generic)>;
  5440. // Build AS-constrained pattern.
  5441. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5442. let OutOperandList = Frag.Outs;
  5443. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5444. let AsmString = "wmma.load."
  5445. # Frag.frag
  5446. # ".sync"
  5447. # "${ptx:aligned}"
  5448. # "." # Layout
  5449. # "." # Frag.geom
  5450. # Space
  5451. # "." # Frag.ptx_elt_type # " \t"
  5452. # Frag.regstring
  5453. # ", [$src]"
  5454. # !if(WithStride, ", $ldm", "")
  5455. # ";";
  5456. }
  5457. //
  5458. // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
  5459. //
  5460. class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
  5461. bit WithStride, DAGOperand DstOp>
  5462. : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
  5463. [!con((ins DstOp:$dst),
  5464. Frag.Ins,
  5465. !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
  5466. Requires<Frag.Predicates> {
  5467. // Load/store intrinsics are overloaded on pointer's address space.
  5468. // To match the right intrinsic, we need to build AS-constrained PatFrag.
  5469. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
  5470. dag PFOperands = !con((ops node:$dst),
  5471. !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
  5472. !if(WithStride, (ops node:$ldm), (ops)));
  5473. // Build PatFrag that only matches particular address space.
  5474. PatFrag IntrFrag = PatFrag<PFOperands,
  5475. !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
  5476. !cond(!eq(Space, ".shared"): AS_match.shared,
  5477. !eq(Space, ".global"): AS_match.global,
  5478. true: AS_match.generic)>;
  5479. // Build AS-constrained pattern.
  5480. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5481. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5482. let OutOperandList = (outs);
  5483. let AsmString = "wmma.store.d.sync"
  5484. # "${ptx:aligned}"
  5485. # "." # Layout
  5486. # "." # Frag.geom
  5487. # Space
  5488. # "." # Frag.ptx_elt_type
  5489. # " \t[$dst],"
  5490. # Frag.regstring
  5491. # !if(WithStride, ", $ldm", "")
  5492. # ";";
  5493. }
  5494. // Create all load/store variants
  5495. defset list<WMMA_INSTR> MMA_LDSTs = {
  5496. foreach layout = ["row", "col"] in {
  5497. foreach stride = [false, true] in {
  5498. foreach space = [".global", ".shared", ""] in {
  5499. foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
  5500. foreach frag = NVVM_MMA_OPS.all_ld_ops in
  5501. if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
  5502. def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
  5503. foreach frag = NVVM_MMA_OPS.all_st_ops in
  5504. if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
  5505. def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
  5506. } // addr
  5507. } // space
  5508. } // stride
  5509. } // layout
  5510. } // defset
  5511. // B1 instruction variants need extra constraints.
  5512. class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
  5513. string Op = b1op;
  5514. WMMA_REGINFO Frag = FragA;
  5515. list<Predicate> ret = !listconcat(
  5516. FragA.Predicates,
  5517. !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
  5518. );
  5519. }
  5520. // WMMA.MMA
  5521. class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
  5522. WMMA_REGINFO FragC, WMMA_REGINFO FragD,
  5523. string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
  5524. : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
  5525. [FragA.Ins, FragB.Ins, FragC.Ins]>,
  5526. // Requires does not seem to have effect on Instruction w/o Patterns.
  5527. // We set it here anyways and propagate to the Pat<> we construct below.
  5528. Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
  5529. let OutOperandList = FragD.Outs;
  5530. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5531. string TypeList = !cond(
  5532. !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
  5533. # "." # FragC.ptx_elt_type,
  5534. 1: "." # FragD.ptx_elt_type
  5535. # "." # FragA.ptx_elt_type
  5536. # "." # FragB.ptx_elt_type
  5537. # "." # FragC.ptx_elt_type,
  5538. );
  5539. let AsmString = "wmma.mma"
  5540. # b1op
  5541. # ".sync"
  5542. # "${ptx:aligned}"
  5543. # "." # ALayout
  5544. # "." # BLayout
  5545. # "." # FragA.geom
  5546. # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
  5547. # TypeList
  5548. # !if(Satfinite, ".satfinite", "") # "\n\t\t"
  5549. # FragD.regstring # ",\n\t\t"
  5550. # FragA.regstring # ",\n\t\t"
  5551. # FragB.regstring # ",\n\t\t"
  5552. # FragC.regstring # ";";
  5553. }
  5554. defset list<WMMA_INSTR> WMMAs = {
  5555. foreach layout_a = ["row", "col"] in {
  5556. foreach layout_b = ["row", "col"] in {
  5557. foreach satf = [0, 1] in {
  5558. foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
  5559. foreach op = NVVM_MMA_OPS.all_wmma_ops in {
  5560. foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
  5561. if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
  5562. def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
  5563. WMMA_REGINFO<op[1], "wmma.mma">,
  5564. WMMA_REGINFO<op[2], "wmma.mma">,
  5565. WMMA_REGINFO<op[3], "wmma.mma">,
  5566. layout_a, layout_b, satf, rnd, b1op>;
  5567. }
  5568. } // b1op
  5569. } // op
  5570. } // rnd
  5571. } // satf
  5572. } // layout_b
  5573. } // layout_a
  5574. } // defset
  5575. // MMA
  5576. class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
  5577. WMMA_REGINFO FragC, WMMA_REGINFO FragD,
  5578. string ALayout, string BLayout, int Satfinite, string b1op>
  5579. : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
  5580. [FragA.Ins, FragB.Ins, FragC.Ins]>,
  5581. // Requires does not seem to have effect on Instruction w/o Patterns.
  5582. // We set it here anyways and propagate to the Pat<> we construct below.
  5583. Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
  5584. let OutOperandList = FragD.Outs;
  5585. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5586. string TypeList = "." # FragD.ptx_elt_type
  5587. # "." # FragA.ptx_elt_type
  5588. # "." # FragB.ptx_elt_type
  5589. # "." # FragC.ptx_elt_type;
  5590. let AsmString = "mma.sync.aligned."
  5591. # FragA.geom
  5592. # "." # ALayout
  5593. # "." # BLayout
  5594. # !if(Satfinite, ".satfinite", "")
  5595. # TypeList
  5596. # b1op # "\n\t\t"
  5597. # FragD.regstring # ",\n\t\t"
  5598. # FragA.regstring # ",\n\t\t"
  5599. # FragB.regstring # ",\n\t\t"
  5600. # FragC.regstring # ";";
  5601. }
  5602. defset list<WMMA_INSTR> MMAs = {
  5603. foreach layout_a = ["row", "col"] in {
  5604. foreach layout_b = ["row", "col"] in {
  5605. foreach satf = [0, 1] in {
  5606. foreach op = NVVM_MMA_OPS.all_mma_ops in {
  5607. foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
  5608. if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
  5609. def : MMA<WMMA_REGINFO<op[0], "mma">,
  5610. WMMA_REGINFO<op[1], "mma">,
  5611. WMMA_REGINFO<op[2], "mma">,
  5612. WMMA_REGINFO<op[3], "mma">,
  5613. layout_a, layout_b, satf, b1op>;
  5614. }
  5615. } // b1op
  5616. } // op
  5617. } // satf
  5618. } // layout_b
  5619. } // layout_a
  5620. } // defset
  5621. //
  5622. // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
  5623. //
  5624. class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
  5625. DAGOperand SrcOp>
  5626. : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
  5627. Requires<Frag.Predicates> {
  5628. // Build PatFrag that only matches particular address space.
  5629. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
  5630. !cond(!eq(Space, ".shared"): AS_match.shared,
  5631. true: AS_match.generic)>;
  5632. // Build AS-constrained pattern.
  5633. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5634. let OutOperandList = Frag.Outs;
  5635. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5636. let AsmString = "ldmatrix.sync.aligned."
  5637. # Frag.geom
  5638. # "." # Frag.frag
  5639. # !if(Transposed, ".trans", "")
  5640. # Space
  5641. # "." # Frag.ptx_elt_type
  5642. # " " # Frag.regstring # ", [$src];";
  5643. }
  5644. // Create all ldmatrix variants
  5645. defset list<WMMA_INSTR> LDMATRIXs = {
  5646. foreach transposed = [false, true] in {
  5647. foreach space = [".shared", ""] in {
  5648. foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
  5649. foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
  5650. if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
  5651. def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
  5652. addr>;
  5653. } // addr
  5654. } // space
  5655. } // transposed
  5656. } // defset
  5657. // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
  5658. // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
  5659. // the instruction record.
  5660. class MMA_PAT<WMMA_INSTR wi>
  5661. : Pat<wi.IntrinsicPattern,
  5662. !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
  5663. (wi ptx.version))>,
  5664. Requires<wi.Predicates>;
  5665. // Build intrinsic->instruction patterns for all MMA instructions.
  5666. foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
  5667. def : MMA_PAT<mma>;