NVPTXIntrinsics.td 298 KB


  1. //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. def immFloat0 : PatLeaf<(fpimm), [{
  9. float f = (float)N->getValueAPF().convertToFloat();
  10. return (f==0.0f);
  11. }]>;
  12. def immFloat1 : PatLeaf<(fpimm), [{
  13. float f = (float)N->getValueAPF().convertToFloat();
  14. return (f==1.0f);
  15. }]>;
  16. def immDouble0 : PatLeaf<(fpimm), [{
  17. double d = (double)N->getValueAPF().convertToDouble();
  18. return (d==0.0);
  19. }]>;
  20. def immDouble1 : PatLeaf<(fpimm), [{
  21. double d = (double)N->getValueAPF().convertToDouble();
  22. return (d==1.0);
  23. }]>;
  24. def AS_match {
  25. code generic = [{
  26. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
  27. }];
  28. code shared = [{
  29. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
  30. }];
  31. code global = [{
  32. return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
  33. }];
  34. }
  35. // A node that will be replaced with the current PTX version.
  36. class PTX {
  37. SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
  38. return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
  39. }]>;
  40. // (i32 0) will be XForm'ed to the currently used PTX version.
  41. dag version = (PTXVerXform (i32 0));
  42. }
  43. def ptx : PTX;
  44. // Generates list of n sequential register names.
  45. // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
  46. class RegSeq<int n, string prefix> {
  47. list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
  48. [prefix # !sub(n, 1)]),
  49. []);
  50. }
  51. class THREADMASK_INFO<bit sync> {
  52. list<bit> ret = !if(sync, [0, 1], [0]);
  53. }
  54. //-----------------------------------
  55. // Synchronization and shuffle functions
  56. //-----------------------------------
  57. let isConvergent = true in {
  58. def INT_BARRIER0 : NVPTXInst<(outs), (ins),
  59. "bar.sync \t0;",
  60. [(int_nvvm_barrier0)]>;
  61. def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
  62. "bar.sync \t$src1;",
  63. [(int_nvvm_barrier_n Int32Regs:$src1)]>;
  64. def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
  65. "bar.sync \t$src1, $src2;",
  66. [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
  67. def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  68. !strconcat("{{ \n\t",
  69. ".reg .pred \t%p1; \n\t",
  70. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  71. "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
  72. "}}"),
  73. [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
  74. def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  75. !strconcat("{{ \n\t",
  76. ".reg .pred \t%p1; \n\t",
  77. ".reg .pred \t%p2; \n\t",
  78. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  79. "bar.red.and.pred \t%p2, 0, %p1; \n\t",
  80. "selp.u32 \t$dst, 1, 0, %p2; \n\t",
  81. "}}"),
  82. [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
  83. def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
  84. !strconcat("{{ \n\t",
  85. ".reg .pred \t%p1; \n\t",
  86. ".reg .pred \t%p2; \n\t",
  87. "setp.ne.u32 \t%p1, $pred, 0; \n\t",
  88. "bar.red.or.pred \t%p2, 0, %p1; \n\t",
  89. "selp.u32 \t$dst, 1, 0, %p2; \n\t",
  90. "}}"),
  91. [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
  92. def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
  93. [(int_nvvm_bar_sync imm:$i)]>;
  94. def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
  95. [(int_nvvm_bar_warp_sync imm:$i)]>,
  96. Requires<[hasPTX60, hasSM30]>;
  97. def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
  98. [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
  99. Requires<[hasPTX60, hasSM30]>;
  100. def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
  101. [(int_nvvm_barrier_sync imm:$i)]>,
  102. Requires<[hasPTX60, hasSM30]>;
  103. def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
  104. [(int_nvvm_barrier_sync Int32Regs:$i)]>,
  105. Requires<[hasPTX60, hasSM30]>;
  106. def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
  107. "barrier.sync \t$id, $cnt;",
  108. [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
  109. Requires<[hasPTX60, hasSM30]>;
  110. def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
  111. "barrier.sync \t$id, $cnt;",
  112. [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
  113. Requires<[hasPTX60, hasSM30]>;
  114. def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
  115. "barrier.sync \t$id, $cnt;",
  116. [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
  117. Requires<[hasPTX60, hasSM30]>;
  118. def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
  119. "barrier.sync \t$id, $cnt;",
  120. [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
  121. Requires<[hasPTX60, hasSM30]>;
  122. class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
  123. bit offset_imm, bit mask_imm, bit threadmask_imm>
  124. : NVPTXInst<(outs), (ins), "?", []> {
  125. NVPTXRegClass rc = !cond(
  126. !eq(reg, "i32"): Int32Regs,
  127. !eq(reg, "f32"): Float32Regs);
  128. string IntrName = "int_nvvm_shfl_"
  129. # !if(sync, "sync_", "")
  130. # mode
  131. # "_" # reg
  132. # !if(return_pred, "p", "");
  133. Intrinsic Intr = !cast<Intrinsic>(IntrName);
  134. let InOperandList = !con(
  135. !if(sync,
  136. !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
  137. (ins)),
  138. (ins rc:$src),
  139. !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
  140. !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
  141. );
  142. let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
  143. let AsmString = "shfl."
  144. # !if(sync, "sync.", "")
  145. # mode # ".b32\t"
  146. # "$dst"
  147. # !if(return_pred, "|$pred", "") # ", "
  148. # "$src, $offset, $mask"
  149. # !if(sync, ", $threadmask", "")
  150. # ";"
  151. ;
  152. let Pattern = [!con(
  153. !foreach(tmp, OutOperandList,
  154. !subst(outs, set,
  155. !subst(i32imm, imm, tmp))),
  156. (set !foreach(tmp, InOperandList,
  157. !subst(ins, Intr,
  158. !subst(i32imm, imm, tmp))))
  159. )];
  160. }
  161. foreach sync = [false, true] in {
  162. foreach mode = ["up", "down", "bfly", "idx"] in {
  163. foreach regclass = ["i32", "f32"] in {
  164. foreach return_pred = [false, true] in {
  165. foreach offset_imm = [false, true] in {
  166. foreach mask_imm = [false, true] in {
  167. foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
  168. def : SHFL_INSTR<sync, mode, regclass, return_pred,
  169. offset_imm, mask_imm, threadmask_imm>,
  170. Requires<!if(sync, [hasSM30, hasPTX60], [hasSM30, hasSHFL])>;
  171. }
  172. }
  173. }
  174. }
  175. }
  176. }
  177. }
  178. // vote.{all,any,uni,ballot}
  179. multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
  180. def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
  181. "vote." # mode # " \t$dest, $pred;",
  182. [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
  183. Requires<[hasPTX60, hasSM30]>;
  184. }
  185. defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
  186. defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
  187. defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
  188. defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
  189. // vote.sync.{all,any,uni,ballot}
  190. multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
  191. def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
  192. "vote.sync." # mode # " \t$dest, $pred, $mask;",
  193. [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
  194. Requires<[hasPTX60, hasSM30]>;
  195. def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
  196. "vote.sync." # mode #" \t$dest, $pred, $mask;",
  197. [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
  198. Requires<[hasPTX60, hasSM30]>;
  199. }
  200. defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
  201. defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
  202. defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
  203. defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
  204. multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
  205. Operand ImmOp> {
  206. def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
  207. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  208. [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
  209. Requires<[hasPTX60, hasSM70]>;
  210. def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
  211. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  212. [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
  213. Requires<[hasPTX60, hasSM70]>;
  214. def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
  215. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  216. [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
  217. Requires<[hasPTX60, hasSM70]>;
  218. def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
  219. "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
  220. [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
  221. Requires<[hasPTX60, hasSM70]>;
  222. }
  223. defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
  224. i32imm>;
  225. defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
  226. i64imm>;
  227. multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
  228. Operand ImmOp> {
  229. def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
  230. (ins i32imm:$mask, ImmOp:$value),
  231. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  232. [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
  233. Requires<[hasPTX60, hasSM70]>;
  234. def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
  235. (ins Int32Regs:$mask, ImmOp:$value),
  236. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  237. [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
  238. Requires<[hasPTX60, hasSM70]>;
  239. def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
  240. (ins i32imm:$mask, regclass:$value),
  241. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  242. [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
  243. Requires<[hasPTX60, hasSM70]>;
  244. def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
  245. (ins Int32Regs:$mask, regclass:$value),
  246. "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
  247. [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
  248. Requires<[hasPTX60, hasSM70]>;
  249. }
  250. defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
  251. i32imm>;
  252. defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
  253. i64imm>;
  254. multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
  255. def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
  256. "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
  257. [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
  258. Requires<[hasPTX70, hasSM80]>;
  259. }
  260. defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
  261. defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
  262. defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
  263. defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
  264. defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
  265. defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
  266. defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
  267. defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
  268. } // isConvergent = true
  269. //-----------------------------------
  270. // Explicit Memory Fence Functions
  271. //-----------------------------------
  272. class MEMBAR<string StrOp, Intrinsic IntOP> :
  273. NVPTXInst<(outs), (ins),
  274. StrOp, [(IntOP)]>;
  275. def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
  276. def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
  277. def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
  278. //-----------------------------------
  279. // Async Copy Functions
  280. //-----------------------------------
  281. multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
  282. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
  283. !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
  284. [(Intrin Int32Regs:$addr)]>,
  285. Requires<[hasPTX70, hasSM80]>;
  286. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
  287. !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
  288. [(Intrin Int64Regs:$addr)]>,
  289. Requires<[hasPTX70, hasSM80]>;
  290. }
  291. defm CP_ASYNC_MBARRIER_ARRIVE :
  292. CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
  293. defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
  294. CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
  295. defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
  296. CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
  297. defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
  298. CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
  299. multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
  300. def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
  301. !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
  302. [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
  303. Requires<[hasPTX70, hasSM80]>;
  304. def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
  305. !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
  306. [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
  307. Requires<[hasPTX70, hasSM80]>;
  308. }
  309. defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
  310. CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
  311. defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
  312. CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
  313. defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
  314. CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
  315. multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
  316. def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
  317. !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
  318. [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
  319. Requires<[hasPTX70, hasSM80]>;
  320. def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
  321. !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
  322. [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
  323. Requires<[hasPTX70, hasSM80]>;
  324. }
  325. defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
  326. CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
  327. def CP_ASYNC_COMMIT_GROUP :
  328. NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
  329. Requires<[hasPTX70, hasSM80]>;
  330. def CP_ASYNC_WAIT_GROUP :
  331. NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
  332. [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
  333. Requires<[hasPTX70, hasSM80]>;
  334. def CP_ASYNC_WAIT_ALL :
  335. NVPTXInst<(outs), (ins), "cp.async.wait_all;",
  336. [(int_nvvm_cp_async_wait_all)]>,
  337. Requires<[hasPTX70, hasSM80]>;
  338. //-----------------------------------
  339. // MBarrier Functions
  340. //-----------------------------------
  341. multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
  342. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
  343. !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
  344. [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
  345. Requires<[hasPTX70, hasSM80]>;
  346. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
  347. !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
  348. [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
  349. Requires<[hasPTX70, hasSM80]>;
  350. }
  351. defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
  352. defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
  353. int_nvvm_mbarrier_init_shared>;
  354. multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
  355. def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
  356. !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
  357. [(Intrin Int32Regs:$addr)]>,
  358. Requires<[hasPTX70, hasSM80]>;
  359. def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
  360. !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
  361. [(Intrin Int64Regs:$addr)]>,
  362. Requires<[hasPTX70, hasSM80]>;
  363. }
  364. defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
  365. defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
  366. int_nvvm_mbarrier_inval_shared>;
  367. multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
  368. def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
  369. !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
  370. [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
  371. Requires<[hasPTX70, hasSM80]>;
  372. def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
  373. !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
  374. [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
  375. Requires<[hasPTX70, hasSM80]>;
  376. }
  377. defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
  378. defm MBARRIER_ARRIVE_SHARED :
  379. MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
  380. multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
  381. def _32 : NVPTXInst<(outs Int64Regs:$state),
  382. (ins Int32Regs:$addr, Int32Regs:$count),
  383. !strconcat("mbarrier.arrive.noComplete", AddrSpace,
  384. ".b64 $state, [$addr], $count;"),
  385. [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
  386. Requires<[hasPTX70, hasSM80]>;
  387. def _64 : NVPTXInst<(outs Int64Regs:$state),
  388. (ins Int64Regs:$addr, Int32Regs:$count),
  389. !strconcat("mbarrier.arrive.noComplete", AddrSpace,
  390. ".b64 $state, [$addr], $count;"),
  391. [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
  392. Requires<[hasPTX70, hasSM80]>;
  393. }
  394. defm MBARRIER_ARRIVE_NOCOMPLETE :
  395. MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
  396. defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
  397. MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
  398. multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
  399. def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
  400. !strconcat("mbarrier.arrive_drop", AddrSpace,
  401. ".b64 $state, [$addr];"),
  402. [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
  403. Requires<[hasPTX70, hasSM80]>;
  404. def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
  405. !strconcat("mbarrier.arrive_drop", AddrSpace,
  406. ".b64 $state, [$addr];"),
  407. [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
  408. Requires<[hasPTX70, hasSM80]>;
  409. }
  410. defm MBARRIER_ARRIVE_DROP :
  411. MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
  412. defm MBARRIER_ARRIVE_DROP_SHARED :
  413. MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
  414. multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
  415. def _32 : NVPTXInst<(outs Int64Regs:$state),
  416. (ins Int32Regs:$addr, Int32Regs:$count),
  417. !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
  418. ".b64 $state, [$addr], $count;"),
  419. [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
  420. Requires<[hasPTX70, hasSM80]>;
  421. def _64 : NVPTXInst<(outs Int64Regs:$state),
  422. (ins Int64Regs:$addr, Int32Regs:$count),
  423. !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
  424. ".b64 $state, [$addr], $count;"),
  425. [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
  426. Requires<[hasPTX70, hasSM80]>;
  427. }
  428. defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
  429. MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
  430. defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
  431. MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
  432. int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
  433. multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
  434. def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
  435. !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
  436. [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
  437. Requires<[hasPTX70, hasSM80]>;
  438. def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
  439. !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
  440. [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
  441. Requires<[hasPTX70, hasSM80]>;
  442. }
  443. defm MBARRIER_TEST_WAIT :
  444. MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
  445. defm MBARRIER_TEST_WAIT_SHARED :
  446. MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
  447. class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
  448. NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
  449. "mbarrier.pending_count.b64 $res, $state;",
  450. [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
  451. Requires<[hasPTX70, hasSM80]>;
  452. def MBARRIER_PENDING_COUNT :
  453. MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
  454. //-----------------------------------
  455. // Math Functions
  456. //-----------------------------------
  457. // Map min(1.0, max(0.0, x)) to sat(x)
  458. // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
  459. // NaN
  460. // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
  461. // Same story for fmax, fmin.
  462. def : Pat<(int_nvvm_fmin_f immFloat1,
  463. (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
  464. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  465. def : Pat<(int_nvvm_fmin_f immFloat1,
  466. (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
  467. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  468. def : Pat<(int_nvvm_fmin_f
  469. (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
  470. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  471. def : Pat<(int_nvvm_fmin_f
  472. (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
  473. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  474. def : Pat<(int_nvvm_fmin_d immDouble1,
  475. (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
  476. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  477. def : Pat<(int_nvvm_fmin_d immDouble1,
  478. (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
  479. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  480. def : Pat<(int_nvvm_fmin_d
  481. (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
  482. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  483. def : Pat<(int_nvvm_fmin_d
  484. (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
  485. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  486. // We need a full string for OpcStr here because we need to deal with case like
  487. // INT_PTX_RECIP.
  488. class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
  489. NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
  490. : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
  491. OpcStr,
  492. [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
  493. Requires<Preds>;
  494. // We need a full string for OpcStr here because we need to deal with the case
  495. // like INT_PTX_NATIVE_POWR_F.
  496. class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
  497. NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
  498. list<Predicate> Preds = []>
  499. : NVPTXInst<(outs t_regclass:$dst),
  500. (ins s0_regclass:$src0, s1_regclass:$src1),
  501. OpcStr,
  502. [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
  503. Requires<Preds>;
  504. class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
  505. NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
  506. NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
  507. : NVPTXInst<(outs t_regclass:$dst),
  508. (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
  509. OpcStr,
  510. [(set t_regclass:$dst,
  511. (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
  512. Requires<Preds>;
  513. //
  514. // MISC
  515. //
  516. def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
  517. Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
  518. //
  519. // Min Max
  520. //
  521. def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
  522. Float32Regs, Float32Regs, int_nvvm_fmin_f>;
  523. def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
  524. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
  525. def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
  526. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
  527. [hasPTX70, hasSM80]>;
  528. def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
  529. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
  530. [hasPTX70, hasSM80]>;
  531. def INT_NVVM_FMIN_XORSIGN_ABS_F :
  532. F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
  533. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
  534. [hasPTX72, hasSM86]>;
  535. def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
  536. F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
  537. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
  538. [hasPTX72, hasSM86]>;
  539. def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
  540. F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
  541. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
  542. [hasPTX72, hasSM86]>;
  543. def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
  544. F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
  545. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
  546. [hasPTX72, hasSM86]>;
  547. def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
  548. Float32Regs, Float32Regs, int_nvvm_fmax_f>;
  549. def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
  550. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
  551. def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
  552. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
  553. [hasPTX70, hasSM80]>;
  554. def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
  555. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
  556. [hasPTX70, hasSM80]>;
  557. def INT_NVVM_FMAX_XORSIGN_ABS_F :
  558. F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
  559. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
  560. [hasPTX72, hasSM86]>;
  561. def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
  562. F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
  563. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
  564. [hasPTX72, hasSM86]>;
  565. def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
  566. F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
  567. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
  568. [hasPTX72, hasSM86]>;
  569. def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
  570. F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
  571. Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
  572. [hasPTX72, hasSM86]>;
  573. def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
  574. Float64Regs, Float64Regs, int_nvvm_fmin_d>;
  575. def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
  576. Float64Regs, Float64Regs, int_nvvm_fmax_d>;
  577. //
  578. // Min Max f16, f16x2, bf16, bf16x2
  579. //
  580. class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
  581. list<Predicate> Preds = [hasPTX70, hasSM80]> {
  582. string Variant = V;
  583. Intrinsic Intr = I;
  584. NVPTXRegClass RegClass = RC;
  585. list<Predicate> Predicates = Preds;
  586. }
  587. multiclass MIN_MAX<string IntName> {
  588. foreach P = [
  589. MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
  590. int_nvvm_fmax_f16), Float16Regs>,
  591. MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
  592. int_nvvm_fmax_ftz_f16), Float16Regs>,
  593. MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
  594. int_nvvm_fmax_nan_f16), Float16Regs>,
  595. MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
  596. int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>,
  597. MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
  598. int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
  599. Float16Regs, [hasPTX72, hasSM86]>,
  600. MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
  601. int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
  602. Float16Regs, [hasPTX72, hasSM86]>,
  603. MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
  604. int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
  605. Float16Regs, [hasPTX72, hasSM86]>,
  606. MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
  607. int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
  608. int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>,
  609. MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
  610. int_nvvm_fmax_f16x2), Float16x2Regs>,
  611. MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
  612. int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>,
  613. MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
  614. int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>,
  615. MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
  616. int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>,
  617. MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
  618. int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
  619. Float16x2Regs, [hasPTX72, hasSM86]>,
  620. MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
  621. int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
  622. Float16x2Regs, [hasPTX72, hasSM86]>,
  623. MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
  624. int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
  625. Float16x2Regs, [hasPTX72, hasSM86]>,
  626. MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
  627. int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
  628. int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
  629. Float16x2Regs, [hasPTX72, hasSM86]>,
  630. MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
  631. int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
  632. MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
  633. int_nvvm_fmax_nan_bf16), Int16Regs>,
  634. MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
  635. int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
  636. Int16Regs, [hasPTX72, hasSM86]>,
  637. MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
  638. int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
  639. Int16Regs, [hasPTX72, hasSM86]>,
  640. MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
  641. int_nvvm_fmax_bf16x2), Int32Regs>,
  642. MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
  643. int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
  644. MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
  645. int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
  646. Int32Regs, [hasPTX72, hasSM86]>,
  647. MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
  648. int_nvvm_fmin_nan_xorsign_abs_bf16x2,
  649. int_nvvm_fmax_nan_xorsign_abs_bf16x2),
  650. Int32Regs, [hasPTX72, hasSM86]>] in {
  651. def P.Variant : F_MATH_2<!strconcat(
  652. IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
  653. P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
  654. }
  655. }
  656. defm INT_NVVM_FMIN : MIN_MAX<"min">;
  657. defm INT_NVVM_FMAN : MIN_MAX<"max">;
  658. //
  659. // Multiplication
  660. //
  661. def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
  662. Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
  663. def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
  664. Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
  665. def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
  666. Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
  667. def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
  668. Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
  669. def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
  670. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
  671. def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
  672. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
  673. def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
  674. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
  675. def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
  676. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
  677. def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
  678. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
  679. def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
  680. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
  681. def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
  682. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
  683. def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
  684. Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
  685. def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
  686. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
  687. def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
  688. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
  689. def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
  690. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
  691. def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
  692. Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
  693. def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
  694. Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
  695. def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
  696. Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
  697. //
  698. // Div
  699. //
  700. def INT_NVVM_DIV_APPROX_FTZ_F
  701. : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
  702. Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
  703. def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
  704. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
  705. def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
  706. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
  707. def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
  708. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
  709. def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
  710. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
  711. def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
  712. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
  713. def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
  714. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
  715. def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
  716. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
  717. def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
  718. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
  719. def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
  720. Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
  721. def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
  722. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
  723. def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
  724. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
  725. def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
  726. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
  727. def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
  728. Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
  729. //
  730. // Sad
  731. //
  732. def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
  733. Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
  734. def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
  735. Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
  736. //
  737. // Floor Ceil
  738. //
  739. def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
  740. (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  741. def : Pat<(int_nvvm_floor_f Float32Regs:$a),
  742. (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
  743. def : Pat<(int_nvvm_floor_d Float64Regs:$a),
  744. (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
  745. def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
  746. (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  747. def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
  748. (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
  749. def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
  750. (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
  751. //
  752. // Abs
  753. //
  754. def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
  755. Float32Regs, int_nvvm_fabs_ftz_f>;
  756. def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
  757. Float32Regs, int_nvvm_fabs_f>;
  758. def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
  759. Float64Regs, int_nvvm_fabs_d>;
  760. //
  761. // Abs, Neg bf16, bf16x2
  762. //
  763. def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
  764. Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>;
  765. def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
  766. Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>;
  767. def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
  768. Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>;
  769. def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
  770. Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>;
  771. //
  772. // Round
  773. //
  774. def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
  775. (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  776. def : Pat<(int_nvvm_round_f Float32Regs:$a),
  777. (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
  778. def : Pat<(int_nvvm_round_d Float64Regs:$a),
  779. (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
  780. //
  781. // Trunc
  782. //
  783. def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
  784. (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  785. def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
  786. (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
  787. def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
  788. (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
  789. //
  790. // Saturate
  791. //
  792. def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
  793. (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
  794. def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
  795. (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
  796. def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
  797. (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
  798. //
  799. // Exp2 Log2
  800. //
  801. def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
  802. Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
  803. def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
  804. Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
  805. def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
  806. Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
  807. def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
  808. Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>;
  809. def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
  810. Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>;
  811. def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
  812. Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
  813. def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
  814. Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
  815. def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
  816. Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
  817. //
  818. // Sin Cos
  819. //
  820. def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
  821. Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
  822. def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
  823. Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
  824. def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
  825. Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
  826. def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
  827. Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
  828. //
  829. // Fma
  830. //
  831. class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
  832. list<Predicate> Preds = []> {
  833. string Variant = V;
  834. Intrinsic Intr = I;
  835. NVPTXRegClass RegClass = RC;
  836. list<Predicate> Predicates = Preds;
  837. }
  838. multiclass FMA_INST {
  839. foreach P = [
  840. FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
  841. FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
  842. FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
  843. FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
  844. FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
  845. FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
  846. FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
  847. FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
  848. FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
  849. FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
  850. FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
  851. FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
  852. FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>,
  853. FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs,
  854. [hasPTX42, hasSM53]>,
  855. FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs,
  856. [hasPTX42, hasSM53]>,
  857. FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs,
  858. [hasPTX42, hasSM53]>,
  859. FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs,
  860. [hasPTX70, hasSM80]>,
  861. FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
  862. [hasPTX70, hasSM80]>,
  863. FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
  864. [hasPTX42, hasSM53]>,
  865. FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs,
  866. [hasPTX42, hasSM53]>,
  867. FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs,
  868. [hasPTX42, hasSM53]>,
  869. FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
  870. Float16x2Regs, [hasPTX42, hasSM53]>,
  871. FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs,
  872. [hasPTX70, hasSM80]>,
  873. FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
  874. Float16x2Regs, [hasPTX70, hasSM80]>,
  875. FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>,
  876. FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
  877. [hasPTX70, hasSM80]>,
  878. FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
  879. [hasPTX70, hasSM80]>,
  880. FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
  881. [hasPTX70, hasSM80]>
  882. ] in {
  883. def P.Variant :
  884. F_MATH_3<!strconcat("fma",
  885. !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
  886. P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
  887. }
  888. }
  889. defm INT_NVVM_FMA : FMA_INST;
  890. //
  891. // Rcp
  892. //
  893. def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
  894. Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
  895. def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
  896. Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
  897. def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
  898. Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
  899. def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
  900. Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
  901. def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
  902. Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
  903. def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
  904. Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
  905. def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
  906. Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
  907. def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
  908. Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
  909. def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
  910. Float64Regs, int_nvvm_rcp_rn_d>;
  911. def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
  912. Float64Regs, int_nvvm_rcp_rz_d>;
  913. def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
  914. Float64Regs, int_nvvm_rcp_rm_d>;
  915. def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
  916. Float64Regs, int_nvvm_rcp_rp_d>;
  917. def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
  918. Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
  919. def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
  920. Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
  921. //
  922. // Sqrt
  923. //
  924. def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
  925. Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
  926. def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
  927. Float32Regs, int_nvvm_sqrt_rn_f>;
  928. def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
  929. Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
  930. def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
  931. Float32Regs, int_nvvm_sqrt_rz_f>;
  932. def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
  933. Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
  934. def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
  935. Float32Regs, int_nvvm_sqrt_rm_f>;
  936. def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
  937. Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
  938. def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
  939. Float32Regs, int_nvvm_sqrt_rp_f>;
  940. def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
  941. Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
  942. def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
  943. Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
  944. def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
  945. Float64Regs, int_nvvm_sqrt_rn_d>;
  946. def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
  947. Float64Regs, int_nvvm_sqrt_rz_d>;
  948. def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
  949. Float64Regs, int_nvvm_sqrt_rm_d>;
  950. def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
  951. Float64Regs, int_nvvm_sqrt_rp_d>;
  952. // nvvm_sqrt intrinsic
  953. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  954. (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
  955. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  956. (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
  957. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  958. (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
  959. def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
  960. (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
  961. //
  962. // Rsqrt
  963. //
  964. def INT_NVVM_RSQRT_APPROX_FTZ_F
  965. : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
  966. int_nvvm_rsqrt_approx_ftz_f>;
  967. def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
  968. Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
  969. def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
  970. Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
  971. //
  972. // Add
  973. //
  974. def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
  975. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
  976. def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
  977. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
  978. def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
  979. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
  980. def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
  981. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
  982. def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
  983. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
  984. def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
  985. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
  986. def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
  987. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
  988. def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
  989. Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
  990. def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
  991. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
  992. def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
  993. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
  994. def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
  995. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
  996. def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
  997. Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
  998. //
  999. // Convert
  1000. //
  1001. def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
  1002. (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
  1003. def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
  1004. (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
  1005. def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
  1006. (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
  1007. def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
  1008. (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
  1009. def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
  1010. (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
  1011. def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
  1012. (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
  1013. def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
  1014. (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
  1015. def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
  1016. (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
  1017. def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
  1018. (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
  1019. def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
  1020. (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
  1021. def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
  1022. (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
  1023. def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
  1024. (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
  1025. def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
  1026. (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
  1027. def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
  1028. (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
  1029. def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
  1030. (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
  1031. def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
  1032. (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
  1033. def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
  1034. (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
  1035. def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
  1036. (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
  1037. def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
  1038. (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
  1039. def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
  1040. (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
  1041. def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
  1042. (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
  1043. def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
  1044. (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
  1045. def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
  1046. (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
  1047. def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
  1048. (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
  1049. def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
  1050. (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  1051. def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
  1052. (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
  1053. def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
  1054. (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  1055. def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
  1056. (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
  1057. def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
  1058. (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  1059. def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
  1060. (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
  1061. def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
  1062. (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  1063. def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
  1064. (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
  1065. def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
  1066. (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  1067. def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
  1068. (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
  1069. def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
  1070. (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  1071. def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
  1072. (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
  1073. def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
  1074. (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  1075. def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
  1076. (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
  1077. def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
  1078. (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  1079. def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
  1080. (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
  1081. def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
  1082. (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
  1083. def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
  1084. (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
  1085. def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
  1086. (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
  1087. def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
  1088. (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
  1089. def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
  1090. (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
  1091. def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
  1092. (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
  1093. def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
  1094. (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
  1095. def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
  1096. (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
  1097. def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
  1098. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
  1099. def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
  1100. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
  1101. def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
  1102. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
  1103. def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
  1104. (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
  1105. def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
  1106. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
  1107. def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
  1108. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
  1109. def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
  1110. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
  1111. def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
  1112. (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
  1113. def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
  1114. (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
  1115. def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
  1116. (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
  1117. def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
  1118. (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
  1119. def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
  1120. (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
  1121. def CVT_tf32_f32 :
  1122. NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
  1123. "cvt.rna.tf32.f32 \t$dest, $a;",
  1124. [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
  1125. def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
  1126. Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
  1127. def INT_NVVM_D2I_LO : F_MATH_1<
  1128. !strconcat("{{\n\t",
  1129. ".reg .b32 %temp; \n\t",
  1130. "mov.b64 \t{$dst, %temp}, $src0;\n\t",
  1131. "}}"),
  1132. Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
  1133. def INT_NVVM_D2I_HI : F_MATH_1<
  1134. !strconcat("{{\n\t",
  1135. ".reg .b32 %temp; \n\t",
  1136. "mov.b64 \t{%temp, $dst}, $src0;\n\t",
  1137. "}}"),
  1138. Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
  1139. def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
  1140. (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  1141. def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
  1142. (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
  1143. def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
  1144. (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  1145. def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
  1146. (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
  1147. def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
  1148. (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  1149. def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
  1150. (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
  1151. def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
  1152. (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  1153. def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
  1154. (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
  1155. def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
  1156. (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
  1157. def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
  1158. (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
  1159. def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
  1160. (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
  1161. def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
  1162. (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
  1163. def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
  1164. (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
  1165. def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
  1166. (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
  1167. def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
  1168. (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
  1169. def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
  1170. (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
  1171. def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
  1172. (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
  1173. def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
  1174. (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
  1175. def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
  1176. (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
  1177. def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
  1178. (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
  1179. def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
  1180. (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
  1181. def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
  1182. (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
  1183. def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
  1184. (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
  1185. def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
  1186. (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
  1187. def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
  1188. (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
  1189. def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
  1190. (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
  1191. def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
  1192. (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
  1193. def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
  1194. (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
  1195. def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
  1196. (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
  1197. def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
  1198. (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
  1199. def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
  1200. (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
  1201. def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
  1202. (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
  1203. def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
  1204. (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
  1205. def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
  1206. (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
  1207. def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
  1208. (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
  1209. def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
  1210. (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
  1211. def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
  1212. (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
  1213. def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
  1214. (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
  1215. def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
  1216. (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
  1217. def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
  1218. (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
  1219. def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
  1220. (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
  1221. def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
  1222. (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
  1223. //
  1224. // Bitcast
  1225. //
  1226. def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
  1227. Float32Regs, int_nvvm_bitcast_f2i>;
  1228. def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
  1229. Int32Regs, int_nvvm_bitcast_i2f>;
  1230. def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
  1231. Int64Regs, int_nvvm_bitcast_ll2d>;
  1232. def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
  1233. Float64Regs, int_nvvm_bitcast_d2ll>;
  1234. //
  1235. // FNS
  1236. //
  1237. class INT_FNS_MBO<dag ins, dag Operands>
  1238. : NVPTXInst<(outs Int32Regs:$dst), ins,
  1239. "fns.b32 \t$dst, $mask, $base, $offset;",
  1240. [(set Int32Regs:$dst, Operands )]>,
  1241. Requires<[hasPTX60, hasSM30]>;
  1242. def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
  1243. (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
  1244. def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
  1245. (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
  1246. def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
  1247. (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
  1248. def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
  1249. (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
  1250. def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
  1251. (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
  1252. def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
  1253. (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
  1254. def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
  1255. (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
  1256. def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
  1257. (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
  1258. //-----------------------------------
  1259. // Atomic Functions
  1260. //-----------------------------------
  1261. class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
  1262. : PatFrag<ops, frag, AS_match.global>;
  1263. class ATOMIC_SHARED_CHK <dag ops, dag frag>
  1264. : PatFrag<ops, frag, AS_match.shared>;
  1265. class ATOMIC_GENERIC_CHK <dag ops, dag frag>
  1266. : PatFrag<ops, frag, AS_match.generic>;
  1267. multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1268. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1269. Operand IMMType, SDNode IMM, list<Predicate> Pred> {
  1270. def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
  1271. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
  1272. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
  1273. Requires<Pred>;
  1274. def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
  1275. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
  1276. [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
  1277. Requires<Pred>;
  1278. }
  1279. multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
  1280. string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
  1281. list<Predicate> Pred = []> {
  1282. defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1283. IntOp, IMMType, IMM, Pred>;
  1284. defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1285. IntOp, IMMType, IMM, Pred>;
  1286. }
  1287. // has 2 operands, neg the second one
  1288. multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1289. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1290. list<Predicate> Pred> {
  1291. def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
  1292. !strconcat(
  1293. "{{ \n\t",
  1294. ".reg \t.s", TypeStr, " temp; \n\t",
  1295. "neg.s", TypeStr, " \ttemp, $b; \n\t",
  1296. "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
  1297. "}}"),
  1298. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
  1299. Requires<Pred>;
  1300. }
  1301. multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
  1302. string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
  1303. defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1304. IntOp, Pred> ;
  1305. defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1306. IntOp, Pred> ;
  1307. }
  1308. // has 3 operands
  1309. multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
  1310. string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
  1311. Operand IMMType, list<Predicate> Pred> {
  1312. def reg : NVPTXInst<(outs regclass:$dst),
  1313. (ins ptrclass:$addr, regclass:$b, regclass:$c),
  1314. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1315. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
  1316. Requires<Pred>;
  1317. def imm1 : NVPTXInst<(outs regclass:$dst),
  1318. (ins ptrclass:$addr, IMMType:$b, regclass:$c),
  1319. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1320. [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
  1321. Requires<Pred>;
  1322. def imm2 : NVPTXInst<(outs regclass:$dst),
  1323. (ins ptrclass:$addr, regclass:$b, IMMType:$c),
  1324. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
  1325. [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
  1326. Requires<Pred>;
  1327. def imm3 : NVPTXInst<(outs regclass:$dst),
  1328. (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
  1329. !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
  1330. [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
  1331. Requires<Pred>;
  1332. }
  1333. multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
  1334. string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
  1335. defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1336. IntOp, IMMType, Pred>;
  1337. defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
  1338. IntOp, IMMType, Pred>;
  1339. }
  1340. // atom_add
  1341. def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1342. (atomic_load_add_32 node:$a, node:$b)>;
  1343. def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1344. (atomic_load_add_32 node:$a, node:$b)>;
  1345. def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1346. (atomic_load_add_32 node:$a, node:$b)>;
  1347. def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1348. (atomic_load_add_64 node:$a, node:$b)>;
  1349. def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1350. (atomic_load_add_64 node:$a, node:$b)>;
  1351. def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1352. (atomic_load_add_64 node:$a, node:$b)>;
  1353. def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1354. (atomic_load_fadd node:$a, node:$b)>;
  1355. def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1356. (atomic_load_fadd node:$a, node:$b)>;
  1357. def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1358. (atomic_load_fadd node:$a, node:$b)>;
  1359. defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
  1360. atomic_load_add_32_g, i32imm, imm>;
  1361. defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
  1362. atomic_load_add_32_s, i32imm, imm>;
  1363. defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
  1364. atomic_load_add_32_gen, i32imm, imm>;
  1365. defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1366. ".add", atomic_load_add_32_gen, i32imm, imm>;
  1367. defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
  1368. atomic_load_add_64_g, i64imm, imm>;
  1369. defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
  1370. atomic_load_add_64_s, i64imm, imm>;
  1371. defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
  1372. atomic_load_add_64_gen, i64imm, imm>;
  1373. defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1374. ".add", atomic_load_add_64_gen, i64imm, imm>;
  1375. defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
  1376. atomic_load_add_g, f32imm, fpimm>;
  1377. defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
  1378. atomic_load_add_s, f32imm, fpimm>;
  1379. defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
  1380. atomic_load_add_gen, f32imm, fpimm>;
  1381. defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
  1382. atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
  1383. defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
  1384. atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
  1385. defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
  1386. atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
  1387. // atom_sub
  1388. def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1389. (atomic_load_sub_32 node:$a, node:$b)>;
  1390. def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1391. (atomic_load_sub_32 node:$a, node:$b)>;
  1392. def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1393. (atomic_load_sub_32 node:$a, node:$b)>;
  1394. def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1395. (atomic_load_sub_64 node:$a, node:$b)>;
  1396. def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1397. (atomic_load_sub_64 node:$a, node:$b)>;
  1398. def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1399. (atomic_load_sub_64 node:$a, node:$b)>;
  1400. defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
  1401. atomic_load_sub_32_g>;
  1402. defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
  1403. atomic_load_sub_64_g>;
  1404. defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
  1405. atomic_load_sub_32_gen>;
  1406. defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
  1407. ".add", atomic_load_sub_32_gen>;
  1408. defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
  1409. atomic_load_sub_32_s>;
  1410. defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
  1411. atomic_load_sub_64_s>;
  1412. defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
  1413. atomic_load_sub_64_gen>;
  1414. defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
  1415. ".add", atomic_load_sub_64_gen>;
  1416. // atom_swap
  1417. def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1418. (atomic_swap_32 node:$a, node:$b)>;
  1419. def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1420. (atomic_swap_32 node:$a, node:$b)>;
  1421. def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1422. (atomic_swap_32 node:$a, node:$b)>;
  1423. def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1424. (atomic_swap_64 node:$a, node:$b)>;
  1425. def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1426. (atomic_swap_64 node:$a, node:$b)>;
  1427. def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1428. (atomic_swap_64 node:$a, node:$b)>;
  1429. defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
  1430. atomic_swap_32_g, i32imm, imm>;
  1431. defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
  1432. atomic_swap_32_s, i32imm, imm>;
  1433. defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
  1434. atomic_swap_32_gen, i32imm, imm>;
  1435. defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1436. ".exch", atomic_swap_32_gen, i32imm, imm>;
  1437. defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
  1438. atomic_swap_64_g, i64imm, imm>;
  1439. defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
  1440. atomic_swap_64_s, i64imm, imm>;
  1441. defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
  1442. atomic_swap_64_gen, i64imm, imm>;
  1443. defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1444. ".exch", atomic_swap_64_gen, i64imm, imm>;
  1445. // atom_max
  1446. def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
  1447. , (atomic_load_max_32 node:$a, node:$b)>;
  1448. def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1449. (atomic_load_max_32 node:$a, node:$b)>;
  1450. def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1451. (atomic_load_max_32 node:$a, node:$b)>;
  1452. def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
  1453. , (atomic_load_max_64 node:$a, node:$b)>;
  1454. def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1455. (atomic_load_max_64 node:$a, node:$b)>;
  1456. def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1457. (atomic_load_max_64 node:$a, node:$b)>;
  1458. def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1459. (atomic_load_umax_32 node:$a, node:$b)>;
  1460. def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1461. (atomic_load_umax_32 node:$a, node:$b)>;
  1462. def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1463. (atomic_load_umax_32 node:$a, node:$b)>;
  1464. def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1465. (atomic_load_umax_64 node:$a, node:$b)>;
  1466. def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1467. (atomic_load_umax_64 node:$a, node:$b)>;
  1468. def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1469. (atomic_load_umax_64 node:$a, node:$b)>;
  1470. defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
  1471. ".max", atomic_load_max_32_g, i32imm, imm>;
  1472. defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
  1473. ".max", atomic_load_max_32_s, i32imm, imm>;
  1474. defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
  1475. atomic_load_max_32_gen, i32imm, imm>;
  1476. defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1477. ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
  1478. defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
  1479. ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
  1480. defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
  1481. ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
  1482. defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
  1483. atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
  1484. defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1485. ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
  1486. defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1487. ".max", atomic_load_umax_32_g, i32imm, imm>;
  1488. defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
  1489. ".max", atomic_load_umax_32_s, i32imm, imm>;
  1490. defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
  1491. atomic_load_umax_32_gen, i32imm, imm>;
  1492. defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1493. ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
  1494. defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1495. ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
  1496. defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
  1497. ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
  1498. defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
  1499. atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
  1500. defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1501. ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
  1502. // atom_min
  1503. def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1504. (atomic_load_min_32 node:$a, node:$b)>;
  1505. def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1506. (atomic_load_min_32 node:$a, node:$b)>;
  1507. def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1508. (atomic_load_min_32 node:$a, node:$b)>;
  1509. def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1510. (atomic_load_min_64 node:$a, node:$b)>;
  1511. def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1512. (atomic_load_min_64 node:$a, node:$b)>;
  1513. def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1514. (atomic_load_min_64 node:$a, node:$b)>;
  1515. def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1516. (atomic_load_umin_32 node:$a, node:$b)>;
  1517. def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1518. (atomic_load_umin_32 node:$a, node:$b)>;
  1519. def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1520. (atomic_load_umin_32 node:$a, node:$b)>;
  1521. def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1522. (atomic_load_umin_64 node:$a, node:$b)>;
  1523. def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1524. (atomic_load_umin_64 node:$a, node:$b)>;
  1525. def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1526. (atomic_load_umin_64 node:$a, node:$b)>;
  1527. defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
  1528. ".min", atomic_load_min_32_g, i32imm, imm>;
  1529. defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
  1530. ".min", atomic_load_min_32_s, i32imm, imm>;
  1531. defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
  1532. atomic_load_min_32_gen, i32imm, imm>;
  1533. defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1534. ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
  1535. defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
  1536. ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
  1537. defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
  1538. ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
  1539. defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
  1540. atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
  1541. defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1542. ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
  1543. defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1544. ".min", atomic_load_umin_32_g, i32imm, imm>;
  1545. defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
  1546. ".min", atomic_load_umin_32_s, i32imm, imm>;
  1547. defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
  1548. atomic_load_umin_32_gen, i32imm, imm>;
  1549. defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
  1550. ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
  1551. defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
  1552. ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
  1553. defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
  1554. ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
  1555. defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
  1556. atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
  1557. defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
  1558. ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
  1559. // atom_inc atom_dec
  1560. def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1561. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1562. def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1563. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1564. def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1565. (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
  1566. def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1567. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1568. def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1569. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1570. def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1571. (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
  1572. defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
  1573. atomic_load_inc_32_g, i32imm, imm>;
  1574. defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
  1575. atomic_load_inc_32_s, i32imm, imm>;
  1576. defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
  1577. atomic_load_inc_32_gen, i32imm, imm>;
  1578. defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1579. ".inc", atomic_load_inc_32_gen, i32imm, imm>;
  1580. defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
  1581. atomic_load_dec_32_g, i32imm, imm>;
  1582. defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
  1583. atomic_load_dec_32_s, i32imm, imm>;
  1584. defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
  1585. atomic_load_dec_32_gen, i32imm, imm>;
  1586. defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
  1587. ".dec", atomic_load_dec_32_gen, i32imm, imm>;
  1588. // atom_and
  1589. def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1590. (atomic_load_and_32 node:$a, node:$b)>;
  1591. def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1592. (atomic_load_and_32 node:$a, node:$b)>;
  1593. def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1594. (atomic_load_and_32 node:$a, node:$b)>;
  1595. def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1596. (atomic_load_and_64 node:$a, node:$b)>;
  1597. def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1598. (atomic_load_and_64 node:$a, node:$b)>;
  1599. def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1600. (atomic_load_and_64 node:$a, node:$b)>;
  1601. defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
  1602. atomic_load_and_32_g, i32imm, imm>;
  1603. defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
  1604. atomic_load_and_32_s, i32imm, imm>;
  1605. defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
  1606. atomic_load_and_32_gen, i32imm, imm>;
  1607. defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1608. ".and", atomic_load_and_32_gen, i32imm, imm>;
  1609. defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
  1610. atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
  1611. defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
  1612. atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
  1613. defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
  1614. atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
  1615. defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1616. ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
  1617. // atom_or
  1618. def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1619. (atomic_load_or_32 node:$a, node:$b)>;
  1620. def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1621. (atomic_load_or_32 node:$a, node:$b)>;
  1622. def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1623. (atomic_load_or_32 node:$a, node:$b)>;
  1624. def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1625. (atomic_load_or_64 node:$a, node:$b)>;
  1626. def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1627. (atomic_load_or_64 node:$a, node:$b)>;
  1628. def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1629. (atomic_load_or_64 node:$a, node:$b)>;
  1630. defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
  1631. atomic_load_or_32_g, i32imm, imm>;
  1632. defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
  1633. atomic_load_or_32_gen, i32imm, imm>;
  1634. defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1635. ".or", atomic_load_or_32_gen, i32imm, imm>;
  1636. defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
  1637. atomic_load_or_32_s, i32imm, imm>;
  1638. defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
  1639. atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
  1640. defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
  1641. atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
  1642. defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1643. ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
  1644. defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
  1645. atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
  1646. // atom_xor
  1647. def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1648. (atomic_load_xor_32 node:$a, node:$b)>;
  1649. def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1650. (atomic_load_xor_32 node:$a, node:$b)>;
  1651. def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1652. (atomic_load_xor_32 node:$a, node:$b)>;
  1653. def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
  1654. (atomic_load_xor_64 node:$a, node:$b)>;
  1655. def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
  1656. (atomic_load_xor_64 node:$a, node:$b)>;
  1657. def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
  1658. (atomic_load_xor_64 node:$a, node:$b)>;
  1659. defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
  1660. atomic_load_xor_32_g, i32imm, imm>;
  1661. defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
  1662. atomic_load_xor_32_s, i32imm, imm>;
  1663. defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
  1664. atomic_load_xor_32_gen, i32imm, imm>;
  1665. defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
  1666. ".xor", atomic_load_xor_32_gen, i32imm, imm>;
  1667. defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
  1668. atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
  1669. defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
  1670. atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
  1671. defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
  1672. atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
  1673. defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
  1674. ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
  1675. // atom_cas
  1676. def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
  1677. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1678. def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
  1679. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1680. def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
  1681. (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
  1682. def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
  1683. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1684. def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
  1685. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1686. def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
  1687. (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
  1688. defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
  1689. atomic_cmp_swap_32_g, i32imm>;
  1690. defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
  1691. atomic_cmp_swap_32_s, i32imm>;
  1692. defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
  1693. atomic_cmp_swap_32_gen, i32imm>;
  1694. defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
  1695. ".cas", atomic_cmp_swap_32_gen, i32imm>;
  1696. defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
  1697. atomic_cmp_swap_64_g, i64imm>;
  1698. defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
  1699. atomic_cmp_swap_64_s, i64imm>;
  1700. defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
  1701. atomic_cmp_swap_64_gen, i64imm>;
  1702. defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
  1703. ".cas", atomic_cmp_swap_64_gen, i64imm>;
  1704. // Support for scoped atomic operations. Matches
  1705. // int_nvvm_atomic_{op}_{space}_{type}_{scope}
  1706. // and converts it into the appropriate instruction.
  1707. // NOTE: not all possible combinations are implemented
  1708. // 'space' is limited to generic as it's the only one needed to support CUDA.
  1709. // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
  1710. class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
  1711. dag ins, dag Operands>
  1712. : NVPTXInst<(outs regclass:$result), ins,
  1713. AsmStr,
  1714. [(set regclass:$result, Operands)]>,
  1715. Requires<Preds>;
  1716. // Define instruction variants for all addressing modes.
  1717. multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
  1718. NVPTXRegClass regclass, Operand ImmType,
  1719. SDNode Imm, ValueType ImmTy,
  1720. list<Predicate> Preds> {
  1721. let AddedComplexity = 1 in {
  1722. def : ATOM23_impl<AsmStr, regclass, Preds,
  1723. (ins Int32Regs:$src, regclass:$b),
  1724. (Intr Int32Regs:$src, regclass:$b)>;
  1725. def : ATOM23_impl<AsmStr, regclass, Preds,
  1726. (ins Int64Regs:$src, regclass:$b),
  1727. (Intr Int64Regs:$src, regclass:$b)>;
  1728. }
  1729. // tablegen can't infer argument types from Intrinsic (though it can
  1730. // from Instruction) so we have to enforce specific type on
  1731. // immediates via explicit cast to ImmTy.
  1732. def : ATOM23_impl<AsmStr, regclass, Preds,
  1733. (ins Int32Regs:$src, ImmType:$b),
  1734. (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
  1735. def : ATOM23_impl<AsmStr, regclass, Preds,
  1736. (ins Int64Regs:$src, ImmType:$b),
  1737. (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
  1738. }
  1739. multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
  1740. NVPTXRegClass regclass, Operand ImmType,
  1741. SDNode Imm, ValueType ImmTy,
  1742. list<Predicate> Preds> {
  1743. // Variants for register/immediate permutations of $b and $c
  1744. let AddedComplexity = 2 in {
  1745. def : ATOM23_impl<AsmStr, regclass, Preds,
  1746. (ins Int32Regs:$src, regclass:$b, regclass:$c),
  1747. (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
  1748. def : ATOM23_impl<AsmStr, regclass, Preds,
  1749. (ins Int64Regs:$src, regclass:$b, regclass:$c),
  1750. (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
  1751. }
  1752. let AddedComplexity = 1 in {
  1753. def : ATOM23_impl<AsmStr, regclass, Preds,
  1754. (ins Int32Regs:$src, ImmType:$b, regclass:$c),
  1755. (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
  1756. def : ATOM23_impl<AsmStr, regclass, Preds,
  1757. (ins Int64Regs:$src, ImmType:$b, regclass:$c),
  1758. (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
  1759. def : ATOM23_impl<AsmStr, regclass, Preds,
  1760. (ins Int32Regs:$src, regclass:$b, ImmType:$c),
  1761. (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
  1762. def : ATOM23_impl<AsmStr, regclass, Preds,
  1763. (ins Int64Regs:$src, regclass:$b, ImmType:$c),
  1764. (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
  1765. }
  1766. def : ATOM23_impl<AsmStr, regclass, Preds,
  1767. (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
  1768. (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
  1769. def : ATOM23_impl<AsmStr, regclass, Preds,
  1770. (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
  1771. (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
  1772. }
  1773. // Constructs intrinsic name and instruction asm strings.
  1774. multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
  1775. string ScopeStr, string SpaceStr,
  1776. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1777. ValueType ImmTy, list<Predicate> Preds> {
  1778. defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
  1779. # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
  1780. # "." # OpStr # "." # TypeStr
  1781. # " \t$result, [$src], $b;",
  1782. !cast<Intrinsic>(
  1783. "int_nvvm_atomic_" # OpStr
  1784. # "_" # SpaceStr # "_" # IntTypeStr
  1785. # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
  1786. regclass, ImmType, Imm, ImmTy, Preds>;
  1787. }
  1788. multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
  1789. string ScopeStr, string SpaceStr,
  1790. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1791. ValueType ImmTy, list<Predicate> Preds> {
  1792. defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
  1793. # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
  1794. # "." # OpStr # "." # TypeStr
  1795. # " \t$result, [$src], $b, $c;",
  1796. !cast<Intrinsic>(
  1797. "int_nvvm_atomic_" # OpStr
  1798. # "_" # SpaceStr # "_" # IntTypeStr
  1799. # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
  1800. regclass, ImmType, Imm, ImmTy, Preds>;
  1801. }
  1802. // Constructs variants for different address spaces.
  1803. // For now we only need variants for generic space pointers.
  1804. multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
  1805. string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
  1806. SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
  1807. defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
  1808. regclass, ImmType, Imm, ImmTy, Preds>;
  1809. }
  1810. multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
  1811. string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
  1812. SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
  1813. defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
  1814. regclass, ImmType, Imm, ImmTy, Preds>;
  1815. }
  1816. // Constructs variants for different scopes of atomic op.
  1817. multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
  1818. NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
  1819. ValueType ImmTy, list<Predicate> Preds> {
  1820. // .gpu scope is default and is currently covered by existing
  1821. // atomics w/o explicitly specified scope.
  1822. defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
  1823. regclass, ImmType, Imm, ImmTy,
  1824. !listconcat(Preds,[hasAtomScope])>;
  1825. defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
  1826. regclass, ImmType, Imm, ImmTy,
  1827. !listconcat(Preds,[hasAtomScope])>;
  1828. }
  1829. multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
  1830. NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
  1831. list<Predicate> Preds> {
  1832. // No need to define ".gpu"-scoped atomics. They do the same thing
  1833. // as the regular, non-scoped atomics defined elsewhere.
  1834. defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
  1835. regclass, ImmType, Imm, ImmTy,
  1836. !listconcat(Preds,[hasAtomScope])>;
  1837. defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
  1838. regclass, ImmType, Imm, ImmTy,
  1839. !listconcat(Preds,[hasAtomScope])>;
  1840. }
  1841. // atom.add
  1842. multiclass ATOM2_add_impl<string OpStr> {
  1843. defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
  1844. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1845. defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
  1846. defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
  1847. []>;
  1848. defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
  1849. [hasAtomAddF64]>;
  1850. }
  1851. // atom.{and,or,xor}
  1852. multiclass ATOM2_bitwise_impl<string OpStr> {
  1853. defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1854. defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
  1855. [hasAtomBitwise64]>;
  1856. }
  1857. // atom.exch
  1858. multiclass ATOM2_exch_impl<string OpStr> {
  1859. defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1860. defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
  1861. }
  1862. // atom.{min,max}
  1863. multiclass ATOM2_minmax_impl<string OpStr> {
  1864. defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
  1865. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1866. defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
  1867. [hasAtomMinMax64]>;
  1868. defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
  1869. [hasAtomMinMax64]>;
  1870. }
  1871. // atom.{inc,dec}
  1872. multiclass ATOM2_incdec_impl<string OpStr> {
  1873. defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
  1874. }
  1875. // atom.cas
  1876. multiclass ATOM3_cas_impl<string OpStr> {
  1877. defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
  1878. defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
  1879. }
  1880. defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
  1881. defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
  1882. defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
  1883. defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
  1884. defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
  1885. defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
  1886. defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
  1887. defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
  1888. defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
  1889. defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
  1890. //-----------------------------------
  1891. // Support for ldu on sm_20 or later
  1892. //-----------------------------------
  1893. // Don't annotate ldu instructions as mayLoad, as they load from memory that is
  1894. // read-only in a kernel.
  1895. // Scalar
  1896. multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
  1897. def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
  1898. !strconcat("ldu.global.", TyStr),
  1899. []>, Requires<[hasLDU]>;
  1900. def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
  1901. !strconcat("ldu.global.", TyStr),
  1902. []>, Requires<[hasLDU]>;
  1903. def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
  1904. !strconcat("ldu.global.", TyStr),
  1905. []>, Requires<[hasLDU]>;
  1906. def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
  1907. !strconcat("ldu.global.", TyStr),
  1908. []>, Requires<[hasLDU]>;
  1909. def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
  1910. !strconcat("ldu.global.", TyStr),
  1911. []>, Requires<[hasLDU]>;
  1912. }
  1913. defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
  1914. defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
  1915. defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
  1916. defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
  1917. defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
  1918. defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
  1919. defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
  1920. defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
  1921. defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
  1922. defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
  1923. // vector
  1924. // Elementized vector ldu
  1925. multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
  1926. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1927. (ins Int32Regs:$src),
  1928. !strconcat("ldu.global.", TyStr), []>;
  1929. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1930. (ins Int64Regs:$src),
  1931. !strconcat("ldu.global.", TyStr), []>;
  1932. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1933. (ins MEMri:$src),
  1934. !strconcat("ldu.global.", TyStr), []>;
  1935. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1936. (ins MEMri64:$src),
  1937. !strconcat("ldu.global.", TyStr), []>;
  1938. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  1939. (ins imemAny:$src),
  1940. !strconcat("ldu.global.", TyStr), []>;
  1941. }
  1942. multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
  1943. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1944. regclass:$dst4), (ins Int32Regs:$src),
  1945. !strconcat("ldu.global.", TyStr), []>;
  1946. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1947. regclass:$dst4), (ins Int64Regs:$src),
  1948. !strconcat("ldu.global.", TyStr), []>;
  1949. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1950. regclass:$dst4), (ins MEMri:$src),
  1951. !strconcat("ldu.global.", TyStr), []>;
  1952. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1953. regclass:$dst4), (ins MEMri64:$src),
  1954. !strconcat("ldu.global.", TyStr), []>;
  1955. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  1956. regclass:$dst4), (ins imemAny:$src),
  1957. !strconcat("ldu.global.", TyStr), []>;
  1958. }
  1959. defm INT_PTX_LDU_G_v2i8_ELE
  1960. : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1961. defm INT_PTX_LDU_G_v2i16_ELE
  1962. : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  1963. defm INT_PTX_LDU_G_v2i32_ELE
  1964. : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
  1965. defm INT_PTX_LDU_G_v2f16_ELE
  1966. : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
  1967. defm INT_PTX_LDU_G_v2f16x2_ELE
  1968. : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
  1969. defm INT_PTX_LDU_G_v2f32_ELE
  1970. : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
  1971. defm INT_PTX_LDU_G_v2i64_ELE
  1972. : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
  1973. defm INT_PTX_LDU_G_v2f64_ELE
  1974. : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
  1975. defm INT_PTX_LDU_G_v4i8_ELE
  1976. : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  1977. defm INT_PTX_LDU_G_v4i16_ELE
  1978. : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1979. Int16Regs>;
  1980. defm INT_PTX_LDU_G_v4i32_ELE
  1981. : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1982. Int32Regs>;
  1983. defm INT_PTX_LDU_G_v4f16_ELE
  1984. : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1985. Float16Regs>;
  1986. defm INT_PTX_LDU_G_v4f16x2_ELE
  1987. : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1988. Float16x2Regs>;
  1989. defm INT_PTX_LDU_G_v4f32_ELE
  1990. : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
  1991. Float32Regs>;
  1992. //-----------------------------------
  1993. // Support for ldg on sm_35 or later
  1994. //-----------------------------------
  1995. // Don't annotate ld.global.nc as mayLoad, because these loads go through the
  1996. // non-coherent texture cache, and therefore the values read must be read-only
  1997. // during the lifetime of the kernel.
  1998. multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
  1999. def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
  2000. !strconcat("ld.global.nc.", TyStr),
  2001. []>, Requires<[hasLDG]>;
  2002. def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
  2003. !strconcat("ld.global.nc.", TyStr),
  2004. []>, Requires<[hasLDG]>;
  2005. def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
  2006. !strconcat("ld.global.nc.", TyStr),
  2007. []>, Requires<[hasLDG]>;
  2008. def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
  2009. !strconcat("ld.global.nc.", TyStr),
  2010. []>, Requires<[hasLDG]>;
  2011. def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
  2012. !strconcat("ld.global.nc.", TyStr),
  2013. []>, Requires<[hasLDG]>;
  2014. }
  2015. defm INT_PTX_LDG_GLOBAL_i8
  2016. : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
  2017. defm INT_PTX_LDG_GLOBAL_i16
  2018. : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
  2019. defm INT_PTX_LDG_GLOBAL_i32
  2020. : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
  2021. defm INT_PTX_LDG_GLOBAL_i64
  2022. : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
  2023. defm INT_PTX_LDG_GLOBAL_f16
  2024. : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
  2025. defm INT_PTX_LDG_GLOBAL_f16x2
  2026. : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
  2027. defm INT_PTX_LDG_GLOBAL_f32
  2028. : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
  2029. defm INT_PTX_LDG_GLOBAL_f64
  2030. : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
  2031. defm INT_PTX_LDG_GLOBAL_p32
  2032. : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
  2033. defm INT_PTX_LDG_GLOBAL_p64
  2034. : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
  2035. // vector
  2036. // Elementized vector ldg
  2037. multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
  2038. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  2039. (ins Int32Regs:$src),
  2040. !strconcat("ld.global.nc.", TyStr), []>;
  2041. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  2042. (ins Int64Regs:$src),
  2043. !strconcat("ld.global.nc.", TyStr), []>;
  2044. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  2045. (ins MEMri:$src),
  2046. !strconcat("ld.global.nc.", TyStr), []>;
  2047. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  2048. (ins MEMri64:$src),
  2049. !strconcat("ld.global.nc.", TyStr), []>;
  2050. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
  2051. (ins imemAny:$src),
  2052. !strconcat("ld.global.nc.", TyStr), []>;
  2053. }
  2054. multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
  2055. def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  2056. regclass:$dst4), (ins Int32Regs:$src),
  2057. !strconcat("ld.global.nc.", TyStr), []>;
  2058. def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  2059. regclass:$dst4), (ins Int64Regs:$src),
  2060. !strconcat("ld.global.nc.", TyStr), []>;
  2061. def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  2062. regclass:$dst4), (ins MEMri:$src),
  2063. !strconcat("ld.global.nc.", TyStr), []>;
  2064. def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  2065. regclass:$dst4), (ins MEMri64:$src),
  2066. !strconcat("ld.global.nc.", TyStr), []>;
  2067. def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
  2068. regclass:$dst4), (ins imemAny:$src),
  2069. !strconcat("ld.global.nc.", TyStr), []>;
  2070. }
  2071. // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
  2072. defm INT_PTX_LDG_G_v2i8_ELE
  2073. : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  2074. defm INT_PTX_LDG_G_v2i16_ELE
  2075. : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
  2076. defm INT_PTX_LDG_G_v2i32_ELE
  2077. : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
  2078. defm INT_PTX_LDG_G_v2f16_ELE
  2079. : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
  2080. defm INT_PTX_LDG_G_v2f16x2_ELE
  2081. : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
  2082. defm INT_PTX_LDG_G_v2f32_ELE
  2083. : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
  2084. defm INT_PTX_LDG_G_v2i64_ELE
  2085. : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
  2086. defm INT_PTX_LDG_G_v2f64_ELE
  2087. : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
  2088. defm INT_PTX_LDG_G_v4i8_ELE
  2089. : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  2090. defm INT_PTX_LDG_G_v4i16_ELE
  2091. : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
  2092. defm INT_PTX_LDG_G_v4i32_ELE
  2093. : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
  2094. defm INT_PTX_LDG_G_v4f16_ELE
  2095. : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
  2096. defm INT_PTX_LDG_G_v4f16x2_ELE
  2097. : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
  2098. defm INT_PTX_LDG_G_v4f32_ELE
  2099. : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
  2100. multiclass NG_TO_G<string Str, Intrinsic Intrin> {
  2101. def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
  2102. !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
  2103. [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
  2104. def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
  2105. !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
  2106. [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
  2107. def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
  2108. "{{ .reg .b64 %tmp;\n\t"
  2109. #" cvt.u64.u32 \t%tmp, $src;\n\t"
  2110. #" cvta." # Str # ".u64 \t$result, %tmp; }}",
  2111. [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
  2112. Requires<[useShortPtr]>;
  2113. }
  2114. multiclass G_TO_NG<string Str, Intrinsic Intrin> {
  2115. def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
  2116. !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
  2117. [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
  2118. def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
  2119. !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
  2120. [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
  2121. def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
  2122. "{{ .reg .b64 %tmp;\n\t"
  2123. #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
  2124. #" cvt.u32.u64 \t$result, %tmp; }}",
  2125. [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
  2126. Requires<[useShortPtr]>;
  2127. }
  2128. defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
  2129. defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
  2130. defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
  2131. defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
  2132. defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
  2133. defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
  2134. defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
  2135. defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
  2136. // nvvm.ptr.gen.to.param
  2137. def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
  2138. (ins Int32Regs:$src),
  2139. "mov.u32 \t$result, $src;",
  2140. [(set Int32Regs:$result,
  2141. (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
  2142. def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
  2143. (ins Int64Regs:$src),
  2144. "mov.u64 \t$result, $src;",
  2145. [(set Int64Regs:$result,
  2146. (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
  2147. // nvvm.move intrinsicc
  2148. def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
  2149. "mov.b16 \t$r, $s;",
  2150. [(set Int16Regs:$r,
  2151. (int_nvvm_move_i16 Int16Regs:$s))]>;
  2152. def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
  2153. "mov.b32 \t$r, $s;",
  2154. [(set Int32Regs:$r,
  2155. (int_nvvm_move_i32 Int32Regs:$s))]>;
  2156. def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
  2157. "mov.b64 \t$r, $s;",
  2158. [(set Int64Regs:$r,
  2159. (int_nvvm_move_i64 Int64Regs:$s))]>;
  2160. def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
  2161. "mov.f32 \t$r, $s;",
  2162. [(set Float32Regs:$r,
  2163. (int_nvvm_move_float Float32Regs:$s))]>;
  2164. def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
  2165. "mov.f64 \t$r, $s;",
  2166. [(set Float64Regs:$r,
  2167. (int_nvvm_move_double Float64Regs:$s))]>;
  2168. def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
  2169. "mov.u32 \t$r, $s;",
  2170. [(set Int32Regs:$r,
  2171. (int_nvvm_move_ptr Int32Regs:$s))]>;
  2172. def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
  2173. "mov.u64 \t$r, $s;",
  2174. [(set Int64Regs:$r,
  2175. (int_nvvm_move_ptr Int64Regs:$s))]>;
  2176. // @TODO: Are these actually needed, or will we always just see symbols
  2177. // copied to registers first?
  2178. /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
  2179. "mov.u32 \t$r, $s;",
  2180. [(set Int32Regs:$r,
  2181. (int_nvvm_move_ptr texternalsym:$s))]>;
  2182. def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
  2183. "mov.u64 \t$r, $s;",
  2184. [(set Int64Regs:$r,
  2185. (int_nvvm_move_ptr texternalsym:$s))]>;*/
  2186. // MoveParam %r1, param
  2187. // ptr_local_to_gen %r2, %r1
  2188. // ptr_gen_to_local %r3, %r2
  2189. // ->
  2190. // mov %r1, param
  2191. // @TODO: Revisit this. There is a type
  2192. // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
  2193. // instructions are not currently defined. However, we can use the ptr
  2194. // variants and the asm printer will do the right thing.
  2195. def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
  2196. (MoveParam texternalsym:$src)))),
  2197. (nvvm_move_ptr64 texternalsym:$src)>;
  2198. def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
  2199. (MoveParam texternalsym:$src)))),
  2200. (nvvm_move_ptr32 texternalsym:$src)>;
  2201. def texsurf_handles
  2202. : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
  2203. "mov.u64 \t$result, $src;", []>;
  2204. //-----------------------------------
  2205. // Compiler Error Warn
  2206. // - Just ignore them in codegen
  2207. //-----------------------------------
  2208. def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
  2209. "// llvm.nvvm.compiler.warn()",
  2210. [(int_nvvm_compiler_warn Int32Regs:$a)]>;
  2211. def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
  2212. "// llvm.nvvm.compiler.warn()",
  2213. [(int_nvvm_compiler_warn Int64Regs:$a)]>;
  2214. def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
  2215. "// llvm.nvvm.compiler.error()",
  2216. [(int_nvvm_compiler_error Int32Regs:$a)]>;
  2217. def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
  2218. "// llvm.nvvm.compiler.error()",
  2219. [(int_nvvm_compiler_error Int64Regs:$a)]>;
  2220. // isspacep
  2221. def ISSPACEP_CONST_32
  2222. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2223. "isspacep.const \t$d, $a;",
  2224. [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
  2225. Requires<[hasPTX31]>;
  2226. def ISSPACEP_CONST_64
  2227. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2228. "isspacep.const \t$d, $a;",
  2229. [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
  2230. Requires<[hasPTX31]>;
  2231. def ISSPACEP_GLOBAL_32
  2232. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2233. "isspacep.global \t$d, $a;",
  2234. [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
  2235. def ISSPACEP_GLOBAL_64
  2236. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2237. "isspacep.global \t$d, $a;",
  2238. [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
  2239. def ISSPACEP_LOCAL_32
  2240. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2241. "isspacep.local \t$d, $a;",
  2242. [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
  2243. def ISSPACEP_LOCAL_64
  2244. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2245. "isspacep.local \t$d, $a;",
  2246. [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
  2247. def ISSPACEP_SHARED_32
  2248. : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
  2249. "isspacep.shared \t$d, $a;",
  2250. [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
  2251. def ISSPACEP_SHARED_64
  2252. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  2253. "isspacep.shared \t$d, $a;",
  2254. [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
  2255. // Special register reads
  2256. def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
  2257. (ins SpecialRegs:$r),
  2258. "mov.b32 \t$d, $r;", []>;
  2259. def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
  2260. def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
  2261. def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
  2262. def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
  2263. def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
  2264. def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
  2265. def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
  2266. def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
  2267. def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
  2268. def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
  2269. def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
  2270. def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
  2271. def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
  2272. def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
  2273. def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
  2274. def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
  2275. def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
  2276. def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
  2277. def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
  2278. def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
  2279. def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
  2280. def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
  2281. def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
  2282. def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
  2283. def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
  2284. def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
  2285. def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
  2286. def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
  2287. def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
  2288. def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
  2289. def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
  2290. def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
  2291. // rotate builtin support
  2292. def ROTATE_B32_HW_IMM
  2293. : NVPTXInst<(outs Int32Regs:$dst),
  2294. (ins Int32Regs:$src, i32imm:$amt),
  2295. "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
  2296. [(set Int32Regs:$dst,
  2297. (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
  2298. Requires<[hasHWROT32]> ;
  2299. def ROTATE_B32_HW_REG
  2300. : NVPTXInst<(outs Int32Regs:$dst),
  2301. (ins Int32Regs:$src, Int32Regs:$amt),
  2302. "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
  2303. [(set Int32Regs:$dst,
  2304. (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
  2305. Requires<[hasHWROT32]> ;
  2306. def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
  2307. (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
  2308. Requires<[noHWROT32]> ;
  2309. def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
  2310. (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
  2311. Requires<[noHWROT32]> ;
  2312. let hasSideEffects = false in {
  2313. def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
  2314. !strconcat("{{\n\t",
  2315. ".reg .b32 %dummy;\n\t",
  2316. "mov.b64 \t{$dst,%dummy}, $src;\n\t",
  2317. "}}"),
  2318. []> ;
  2319. def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
  2320. !strconcat("{{\n\t",
  2321. ".reg .b32 %dummy;\n\t",
  2322. "mov.b64 \t{%dummy,$dst}, $src;\n\t",
  2323. "}}"),
  2324. []> ;
  2325. }
  2326. let hasSideEffects = false in {
  2327. def PACK_TWO_INT32
  2328. : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
  2329. "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
  2330. }
  2331. def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
  2332. (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
  2333. (GET_LO_INT64 Int64Regs:$src))> ;
  2334. // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
  2335. // no side effects.
  2336. let hasSideEffects = false in {
  2337. def SHF_L_WRAP_B32_IMM
  2338. : NVPTXInst<(outs Int32Regs:$dst),
  2339. (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
  2340. "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2341. Requires<[hasHWROT32]>;
  2342. def SHF_L_WRAP_B32_REG
  2343. : NVPTXInst<(outs Int32Regs:$dst),
  2344. (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
  2345. "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2346. Requires<[hasHWROT32]>;
  2347. def SHF_R_WRAP_B32_IMM
  2348. : NVPTXInst<(outs Int32Regs:$dst),
  2349. (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
  2350. "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2351. Requires<[hasHWROT32]>;
  2352. def SHF_R_WRAP_B32_REG
  2353. : NVPTXInst<(outs Int32Regs:$dst),
  2354. (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
  2355. "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
  2356. Requires<[hasHWROT32]>;
  2357. }
  2358. // HW version of rotate 64
  2359. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
  2360. (PACK_TWO_INT32
  2361. (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
  2362. (GET_LO_INT64 Int64Regs:$src), imm:$amt),
  2363. (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
  2364. (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
  2365. Requires<[hasHWROT32]>;
  2366. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
  2367. (PACK_TWO_INT32
  2368. (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
  2369. (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
  2370. (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
  2371. (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
  2372. Requires<[hasHWROT32]>;
  2373. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
  2374. (PACK_TWO_INT32
  2375. (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
  2376. (GET_HI_INT64 Int64Regs:$src), imm:$amt),
  2377. (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
  2378. (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
  2379. Requires<[hasHWROT32]>;
  2380. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
  2381. (PACK_TWO_INT32
  2382. (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
  2383. (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
  2384. (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
  2385. (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
  2386. Requires<[hasHWROT32]>;
  2387. // SW version of rotate 64
  2388. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
  2389. (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
  2390. Requires<[noHWROT32]>;
  2391. def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
  2392. (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
  2393. Requires<[noHWROT32]>;
  2394. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
  2395. (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
  2396. Requires<[noHWROT32]>;
  2397. def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
  2398. (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
  2399. Requires<[noHWROT32]>;
  2400. //-----------------------------------
  2401. // Texture Intrinsics
  2402. //-----------------------------------
  2403. // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
  2404. // also defined in NVPTXReplaceImageHandles.cpp
  2405. // texmode_independent
  2406. let IsTex = true, IsTexModeUnified = false in {
  2407. // Texture fetch instructions using handles
  2408. class TEX_1D_base<string inst, NVPTXRegClass outtype,
  2409. NVPTXRegClass intype, dag texsamp>
  2410. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2411. outtype:$b, outtype:$a),
  2412. !con(texsamp, (ins intype:$x)),
  2413. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
  2414. []>;
  2415. multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2416. def _RR : TEX_1D_base<inst, outtype, intype,
  2417. (ins Int64Regs:$t, Int64Regs:$s)>;
  2418. def _RI : TEX_1D_base<inst, outtype, intype,
  2419. (ins Int64Regs:$t, i64imm:$s)>;
  2420. def _IR : TEX_1D_base<inst, outtype, intype,
  2421. (ins i64imm:$t, Int64Regs:$s)>;
  2422. def _II : TEX_1D_base<inst, outtype, intype,
  2423. (ins i64imm:$t, i64imm:$s)>;
  2424. }
  2425. defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2426. defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2427. defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2428. defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2429. defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2430. defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2431. class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2432. NVPTXRegClass intype, dag texsamp>
  2433. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2434. outtype:$b, outtype:$a),
  2435. !con(texsamp, (ins intype:$x, intype:$lod)),
  2436. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
  2437. []>;
  2438. multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
  2439. NVPTXRegClass intype> {
  2440. def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
  2441. (ins Int64Regs:$t, Int64Regs:$s)>;
  2442. def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
  2443. (ins Int64Regs:$t, i64imm:$s)>;
  2444. def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
  2445. (ins i64imm:$t, Int64Regs:$s)>;
  2446. def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
  2447. (ins i64imm:$t, i64imm:$s)>;
  2448. }
  2449. defm TEX_1D_F32_F32_LEVEL :
  2450. TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2451. defm TEX_1D_S32_F32_LEVEL :
  2452. TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2453. defm TEX_1D_U32_F32_LEVEL :
  2454. TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2455. class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
  2456. NVPTXRegClass intype, dag texsamp>
  2457. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2458. outtype:$b, outtype:$a),
  2459. !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
  2460. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
  2461. " \\{$gradx\\}, \\{$grady\\};",
  2462. []>;
  2463. multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
  2464. NVPTXRegClass intype> {
  2465. def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
  2466. (ins Int64Regs:$t, Int64Regs:$s)>;
  2467. def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
  2468. (ins Int64Regs:$t, i64imm:$s)>;
  2469. def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
  2470. (ins i64imm:$t, Int64Regs:$s)>;
  2471. def _II : TEX_1D_GRAD_base<inst, outtype, intype,
  2472. (ins i64imm:$t, i64imm:$s)>;
  2473. }
  2474. defm TEX_1D_F32_F32_GRAD
  2475. : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2476. defm TEX_1D_S32_F32_GRAD
  2477. : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2478. defm TEX_1D_U32_F32_GRAD
  2479. : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2480. class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2481. NVPTXRegClass intype, dag texsamp>
  2482. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2483. outtype:$b, outtype:$a),
  2484. !con(texsamp, (ins Int32Regs:$l, intype:$x)),
  2485. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
  2486. []>;
  2487. multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
  2488. NVPTXRegClass intype> {
  2489. def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
  2490. (ins Int64Regs:$t, Int64Regs:$s)>;
  2491. def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
  2492. (ins Int64Regs:$t, i64imm:$s)>;
  2493. def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
  2494. (ins i64imm:$t, Int64Regs:$s)>;
  2495. def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
  2496. (ins i64imm:$t, i64imm:$s)>;
  2497. }
  2498. defm TEX_1D_ARRAY_F32_F32
  2499. : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2500. defm TEX_1D_ARRAY_F32_S32
  2501. : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2502. defm TEX_1D_ARRAY_S32_S32
  2503. : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2504. defm TEX_1D_ARRAY_S32_F32
  2505. : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2506. defm TEX_1D_ARRAY_U32_S32
  2507. : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2508. defm TEX_1D_ARRAY_U32_F32
  2509. : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2510. class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2511. NVPTXRegClass intype, dag texsamp>
  2512. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2513. outtype:$b, outtype:$a),
  2514. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
  2515. inst # " \t\\{$r, $g, $b, $a\\},"
  2516. " [$t, $s, \\{$l, $x\\}], $lod;",
  2517. []>;
  2518. multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2519. NVPTXRegClass intype> {
  2520. def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2521. (ins Int64Regs:$t, Int64Regs:$s)>;
  2522. def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2523. (ins Int64Regs:$t, i64imm:$s)>;
  2524. def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2525. (ins i64imm:$t, Int64Regs:$s)>;
  2526. def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  2527. (ins i64imm:$t, i64imm:$s)>;
  2528. }
  2529. defm TEX_1D_ARRAY_F32_F32_LEVEL
  2530. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2531. defm TEX_1D_ARRAY_S32_F32_LEVEL
  2532. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2533. defm TEX_1D_ARRAY_U32_F32_LEVEL
  2534. : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2535. class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  2536. NVPTXRegClass intype, dag texsamp>
  2537. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2538. outtype:$b, outtype:$a),
  2539. !con(texsamp, (ins Int32Regs:$l, intype:$x,
  2540. intype:$gradx, intype:$grady)),
  2541. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
  2542. " \\{$gradx\\}, \\{$grady\\};",
  2543. []>;
  2544. multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  2545. NVPTXRegClass intype> {
  2546. def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2547. (ins Int64Regs:$t, Int64Regs:$s)>;
  2548. def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2549. (ins Int64Regs:$t, i64imm:$s)>;
  2550. def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2551. (ins i64imm:$t, Int64Regs:$s)>;
  2552. def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
  2553. (ins i64imm:$t, i64imm:$s)>;
  2554. }
  2555. defm TEX_1D_ARRAY_F32_F32_GRAD
  2556. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2557. defm TEX_1D_ARRAY_S32_F32_GRAD
  2558. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2559. defm TEX_1D_ARRAY_U32_F32_GRAD
  2560. : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2561. class TEX_2D_base<string inst, NVPTXRegClass outtype,
  2562. NVPTXRegClass intype, dag texsamp>
  2563. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2564. outtype:$b, outtype:$a),
  2565. !con(texsamp, (ins intype:$x, intype:$y)),
  2566. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
  2567. []>;
  2568. multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2569. def _RR : TEX_2D_base<inst, outtype, intype,
  2570. (ins Int64Regs:$t, Int64Regs:$s)>;
  2571. def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
  2572. def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
  2573. def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
  2574. }
  2575. defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2576. defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2577. defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2578. defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2579. defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2580. defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2581. class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2582. NVPTXRegClass intype, dag texsamp>
  2583. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2584. outtype:$b, outtype:$a),
  2585. !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
  2586. inst # " \t\\{$r, $g, $b, $a\\},"
  2587. " [$t, $s, \\{$x, $y\\}], $lod;",
  2588. []>;
  2589. multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
  2590. NVPTXRegClass intype> {
  2591. def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
  2592. (ins Int64Regs:$t, Int64Regs:$s)>;
  2593. def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
  2594. (ins Int64Regs:$t, i64imm:$s)>;
  2595. def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
  2596. (ins i64imm:$t, Int64Regs:$s)>;
  2597. def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
  2598. (ins i64imm:$t, i64imm:$s)>;
  2599. }
  2600. defm TEX_2D_F32_F32_LEVEL :
  2601. TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2602. defm TEX_2D_S32_F32_LEVEL :
  2603. TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2604. defm TEX_2D_U32_F32_LEVEL :
  2605. TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2606. class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
  2607. NVPTXRegClass intype, dag texsamp>
  2608. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2609. outtype:$b, outtype:$a),
  2610. !con(texsamp, (ins intype:$x, intype:$y,
  2611. intype:$gradx0, intype:$gradx1,
  2612. intype:$grady0, intype:$grady1)),
  2613. inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
  2614. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  2615. []>;
  2616. multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
  2617. NVPTXRegClass intype> {
  2618. def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
  2619. (ins Int64Regs:$t, Int64Regs:$s)>;
  2620. def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
  2621. (ins Int64Regs:$t, i64imm:$s)>;
  2622. def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
  2623. (ins i64imm:$t, Int64Regs:$s)>;
  2624. def _II : TEX_2D_GRAD_base<inst, outtype, intype,
  2625. (ins i64imm:$t, i64imm:$s)>;
  2626. }
  2627. defm TEX_2D_F32_F32_GRAD :
  2628. TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2629. defm TEX_2D_S32_F32_GRAD :
  2630. TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2631. defm TEX_2D_U32_F32_GRAD :
  2632. TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2633. class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
  2634. NVPTXRegClass intype, dag texsamp>
  2635. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2636. outtype:$b, outtype:$a),
  2637. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
  2638. inst # " \t\\{$r, $g, $b, $a\\},"
  2639. " [$t, $s, \\{$l, $x, $y, $y\\}];",
  2640. []>;
  2641. multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
  2642. NVPTXRegClass intype> {
  2643. def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
  2644. (ins Int64Regs:$t, Int64Regs:$s)>;
  2645. def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
  2646. (ins Int64Regs:$t, i64imm:$s)>;
  2647. def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
  2648. (ins i64imm:$t, Int64Regs:$s)>;
  2649. def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
  2650. (ins i64imm:$t, i64imm:$s)>;
  2651. }
  2652. defm TEX_2D_ARRAY_F32_F32
  2653. : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2654. defm TEX_2D_ARRAY_F32_S32
  2655. : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
  2656. defm TEX_2D_ARRAY_S32_S32
  2657. : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
  2658. defm TEX_2D_ARRAY_S32_F32
  2659. : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2660. defm TEX_2D_ARRAY_U32_S32
  2661. : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
  2662. defm TEX_2D_ARRAY_U32_F32
  2663. : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2664. class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2665. NVPTXRegClass intype, dag texsamp>
  2666. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2667. outtype:$b, outtype:$a),
  2668. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2669. intype:$lod)),
  2670. inst # " \t\\{$r, $g, $b, $a\\},"
  2671. " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
  2672. []>;
  2673. multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2674. NVPTXRegClass intype> {
  2675. def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2676. (ins Int64Regs:$t, Int64Regs:$s)>;
  2677. def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2678. (ins Int64Regs:$t, i64imm:$s)>;
  2679. def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2680. (ins i64imm:$t, Int64Regs:$s)>;
  2681. def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  2682. (ins i64imm:$t, i64imm:$s)>;
  2683. }
  2684. defm TEX_2D_ARRAY_F32_F32_LEVEL
  2685. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2686. defm TEX_2D_ARRAY_S32_F32_LEVEL
  2687. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2688. defm TEX_2D_ARRAY_U32_F32_LEVEL
  2689. : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2690. class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  2691. NVPTXRegClass intype, dag texsamp>
  2692. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2693. outtype:$b, outtype:$a),
  2694. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2695. intype:$gradx0, intype:$gradx1,
  2696. intype:$grady0, intype:$grady1)),
  2697. inst # " \t\\{$r, $g, $b, $a\\},"
  2698. " [$t, $s, \\{$l, $x, $y, $y\\}],"
  2699. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  2700. []>;
  2701. multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  2702. NVPTXRegClass intype> {
  2703. def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2704. (ins Int64Regs:$t, Int64Regs:$s)>;
  2705. def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2706. (ins Int64Regs:$t, i64imm:$s)>;
  2707. def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2708. (ins i64imm:$t, Int64Regs:$s)>;
  2709. def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
  2710. (ins i64imm:$t, i64imm:$s)>;
  2711. }
  2712. defm TEX_2D_ARRAY_F32_F32_GRAD
  2713. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2714. defm TEX_2D_ARRAY_S32_F32_GRAD
  2715. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2716. defm TEX_2D_ARRAY_U32_F32_GRAD
  2717. : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2718. class TEX_3D_base<string inst, NVPTXRegClass outtype,
  2719. NVPTXRegClass intype, dag texsamp>
  2720. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2721. outtype:$b, outtype:$a),
  2722. !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
  2723. inst # " \t\\{$r, $g, $b, $a\\},"
  2724. " [$t, $s, \\{$x, $y, $z, $z\\}];",
  2725. []>;
  2726. multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2727. def _RR : TEX_3D_base<inst, outtype, intype,
  2728. (ins Int64Regs:$t, Int64Regs:$s)>;
  2729. def _RI : TEX_3D_base<inst, outtype, intype,
  2730. (ins Int64Regs:$t, i64imm:$s)>;
  2731. def _IR : TEX_3D_base<inst, outtype, intype,
  2732. (ins i64imm:$t, Int64Regs:$s)>;
  2733. def _II : TEX_3D_base<inst, outtype, intype,
  2734. (ins i64imm:$t, i64imm:$s)>;
  2735. }
  2736. defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2737. defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
  2738. defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
  2739. defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2740. defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
  2741. defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2742. class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2743. NVPTXRegClass intype, dag texsamp>
  2744. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2745. outtype:$b, outtype:$a),
  2746. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2747. intype:$lod)),
  2748. inst # " \t\\{$r, $g, $b, $a\\},"
  2749. " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
  2750. []>;
  2751. multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
  2752. NVPTXRegClass intype> {
  2753. def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
  2754. (ins Int64Regs:$t, Int64Regs:$s)>;
  2755. def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
  2756. (ins Int64Regs:$t, i64imm:$s)>;
  2757. def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
  2758. (ins i64imm:$t, Int64Regs:$s)>;
  2759. def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
  2760. (ins i64imm:$t, i64imm:$s)>;
  2761. }
  2762. defm TEX_3D_F32_F32_LEVEL
  2763. : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2764. defm TEX_3D_S32_F32_LEVEL
  2765. : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2766. defm TEX_3D_U32_F32_LEVEL
  2767. : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2768. class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
  2769. NVPTXRegClass intype, dag texsamp>
  2770. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2771. outtype:$b, outtype:$a),
  2772. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2773. intype :$gradx0, intype:$gradx1,
  2774. intype:$gradx2, intype:$grady0,
  2775. intype:$grady1, intype:$grady2)),
  2776. inst # " \t\\{$r, $g, $b, $a\\},"
  2777. " [$t, $s, \\{$x, $y, $z, $z\\}],"
  2778. " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
  2779. " \\{$grady0, $grady1, $grady2, $grady2\\};",
  2780. []>;
  2781. multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
  2782. NVPTXRegClass intype> {
  2783. def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
  2784. (ins Int64Regs:$t, Int64Regs:$s)>;
  2785. def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
  2786. (ins Int64Regs:$t, i64imm:$s)>;
  2787. def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
  2788. (ins i64imm:$t, Int64Regs:$s)>;
  2789. def _II : TEX_3D_GRAD_base<inst, outtype, intype,
  2790. (ins i64imm:$t, i64imm:$s)>;
  2791. }
  2792. defm TEX_3D_F32_F32_GRAD
  2793. : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  2794. defm TEX_3D_S32_F32_GRAD
  2795. : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  2796. defm TEX_3D_U32_F32_GRAD
  2797. : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  2798. class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
  2799. NVPTXRegClass intype, dag texsamp>
  2800. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2801. outtype:$b, outtype:$a),
  2802. !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
  2803. inst # " \t\\{$r, $g, $b, $a\\},"
  2804. " [$t, $s, \\{$x, $y, $z, $z\\}];",
  2805. []>;
  2806. multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2807. def _RR : TEX_CUBE_base<inst, outtype, intype,
  2808. (ins Int64Regs:$t, Int64Regs:$s)>;
  2809. def _RI : TEX_CUBE_base<inst, outtype, intype,
  2810. (ins Int64Regs:$t, i64imm:$s)>;
  2811. def _IR : TEX_CUBE_base<inst, outtype, intype,
  2812. (ins i64imm:$t, Int64Regs:$s)>;
  2813. def _II : TEX_CUBE_base<inst, outtype, intype,
  2814. (ins i64imm:$t, i64imm:$s)>;
  2815. }
  2816. defm TEX_CUBE_F32_F32
  2817. : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  2818. defm TEX_CUBE_S32_F32
  2819. : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  2820. defm TEX_CUBE_U32_F32
  2821. : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  2822. class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
  2823. NVPTXRegClass intype, dag texsamp>
  2824. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2825. outtype:$b, outtype:$a),
  2826. !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
  2827. intype:$lod)),
  2828. inst # " \t\\{$r, $g, $b, $a\\},"
  2829. " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
  2830. []>;
  2831. multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
  2832. NVPTXRegClass intype> {
  2833. def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2834. (ins Int64Regs:$t, Int64Regs:$s)>;
  2835. def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2836. (ins Int64Regs:$t, i64imm:$s)>;
  2837. def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2838. (ins i64imm:$t, Int64Regs:$s)>;
  2839. def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
  2840. (ins i64imm:$t, i64imm:$s)>;
  2841. }
  2842. defm TEX_CUBE_F32_F32_LEVEL
  2843. : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  2844. defm TEX_CUBE_S32_F32_LEVEL
  2845. : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  2846. defm TEX_CUBE_U32_F32_LEVEL
  2847. : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  2848. class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
  2849. NVPTXRegClass intype, dag texsamp>
  2850. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2851. outtype:$b, outtype:$a),
  2852. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2853. intype:$z)),
  2854. inst # " \t\\{$r, $g, $b, $a\\},"
  2855. " [$t, $s, \\{$l, $x, $y, $z\\}];",
  2856. []>;
  2857. multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
  2858. NVPTXRegClass intype> {
  2859. def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2860. (ins Int64Regs:$t, Int64Regs:$s)>;
  2861. def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2862. (ins Int64Regs:$t, i64imm:$s)>;
  2863. def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2864. (ins i64imm:$t, Int64Regs:$s)>;
  2865. def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
  2866. (ins i64imm:$t, i64imm:$s)>;
  2867. }
  2868. defm TEX_CUBE_ARRAY_F32_F32
  2869. : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
  2870. defm TEX_CUBE_ARRAY_S32_F32
  2871. : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
  2872. defm TEX_CUBE_ARRAY_U32_F32
  2873. : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
  2874. class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  2875. NVPTXRegClass intype, dag texsamp>
  2876. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2877. outtype:$b, outtype:$a),
  2878. !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
  2879. intype:$z, intype:$lod)),
  2880. inst # " \t\\{$r, $g, $b, $a\\},"
  2881. " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
  2882. []>;
  2883. multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  2884. NVPTXRegClass intype> {
  2885. def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2886. (ins Int64Regs:$t, Int64Regs:$s)>;
  2887. def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2888. (ins Int64Regs:$t, i64imm:$s)>;
  2889. def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2890. (ins i64imm:$t, Int64Regs:$s)>;
  2891. def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  2892. (ins i64imm:$t, i64imm:$s)>;
  2893. }
  2894. defm TEX_CUBE_ARRAY_F32_F32_LEVEL
  2895. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
  2896. Float32Regs, Float32Regs>;
  2897. defm TEX_CUBE_ARRAY_S32_F32_LEVEL
  2898. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
  2899. Int32Regs, Float32Regs>;
  2900. defm TEX_CUBE_ARRAY_U32_F32_LEVEL
  2901. : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
  2902. Int32Regs, Float32Regs>;
  2903. class TLD4_2D_base<string inst, NVPTXRegClass outtype,
  2904. NVPTXRegClass intype, dag texsamp>
  2905. : NVPTXInst<(outs outtype:$v0, outtype:$v1,
  2906. outtype:$v2, outtype:$v3),
  2907. !con(texsamp, (ins intype:$x, intype:$y)),
  2908. inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
  2909. []>;
  2910. multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
  2911. def _RR : TLD4_2D_base<inst, outtype, intype,
  2912. (ins Int64Regs:$t, Int64Regs:$s)>;
  2913. def _RI : TLD4_2D_base<inst, outtype, intype,
  2914. (ins Int64Regs:$t, i64imm:$s)>;
  2915. def _IR : TLD4_2D_base<inst, outtype, intype,
  2916. (ins i64imm:$t, Int64Regs:$s)>;
  2917. def _II : TLD4_2D_base<inst, outtype, intype,
  2918. (ins i64imm:$t, i64imm:$s)>;
  2919. }
  2920. defm TLD4_R_2D_F32_F32
  2921. : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2922. defm TLD4_G_2D_F32_F32
  2923. : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2924. defm TLD4_B_2D_F32_F32
  2925. : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2926. defm TLD4_A_2D_F32_F32
  2927. : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  2928. defm TLD4_R_2D_S32_F32
  2929. : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2930. defm TLD4_G_2D_S32_F32
  2931. : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2932. defm TLD4_B_2D_S32_F32
  2933. : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2934. defm TLD4_A_2D_S32_F32
  2935. : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  2936. defm TLD4_R_2D_U32_F32
  2937. : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2938. defm TLD4_G_2D_U32_F32
  2939. : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2940. defm TLD4_B_2D_U32_F32
  2941. : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2942. defm TLD4_A_2D_U32_F32
  2943. : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  2944. }
  2945. // texmode_unified
  2946. let IsTex = true, IsTexModeUnified = true in {
  2947. // Texture fetch instructions using handles
  2948. class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
  2949. NVPTXRegClass intype, dag tex>
  2950. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2951. outtype:$b, outtype:$a),
  2952. !con(tex, (ins intype:$x)),
  2953. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
  2954. []>;
  2955. multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
  2956. NVPTXRegClass intype> {
  2957. def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2958. def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
  2959. }
  2960. defm TEX_UNIFIED_1D_F32_S32
  2961. : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
  2962. defm TEX_UNIFIED_1D_F32_F32
  2963. : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2964. defm TEX_UNIFIED_1D_S32_S32
  2965. : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
  2966. defm TEX_UNIFIED_1D_S32_F32
  2967. : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2968. defm TEX_UNIFIED_1D_U32_S32
  2969. : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
  2970. defm TEX_UNIFIED_1D_U32_F32
  2971. : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2972. class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
  2973. NVPTXRegClass intype, dag tex>
  2974. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2975. outtype:$b, outtype:$a),
  2976. !con(tex, (ins intype:$x, intype:$lod)),
  2977. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
  2978. []>;
  2979. multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
  2980. NVPTXRegClass intype> {
  2981. def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  2982. def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  2983. }
  2984. defm TEX_UNIFIED_1D_F32_F32_LEVEL
  2985. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  2986. defm TEX_UNIFIED_1D_S32_F32_LEVEL
  2987. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  2988. defm TEX_UNIFIED_1D_U32_F32_LEVEL
  2989. : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  2990. class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
  2991. NVPTXRegClass intype, dag tex>
  2992. : NVPTXInst<(outs outtype:$r, outtype:$g,
  2993. outtype:$b, outtype:$a),
  2994. !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
  2995. inst # " \t\\{$r, $g, $b, $a\\},"
  2996. " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
  2997. []>;
  2998. multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
  2999. NVPTXRegClass intype> {
  3000. def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3001. def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  3002. }
  3003. defm TEX_UNIFIED_1D_F32_F32_GRAD
  3004. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
  3005. defm TEX_UNIFIED_1D_S32_F32_GRAD
  3006. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
  3007. defm TEX_UNIFIED_1D_U32_F32_GRAD
  3008. : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
  3009. class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
  3010. NVPTXRegClass intype, dag tex>
  3011. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3012. outtype:$b, outtype:$a),
  3013. !con(tex, (ins Int32Regs:$l, intype:$x)),
  3014. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
  3015. []>;
  3016. multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
  3017. NVPTXRegClass intype> {
  3018. def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3019. def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
  3020. }
  3021. defm TEX_UNIFIED_1D_ARRAY_F32_S32
  3022. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
  3023. defm TEX_UNIFIED_1D_ARRAY_F32_F32
  3024. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
  3025. defm TEX_UNIFIED_1D_ARRAY_S32_S32
  3026. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
  3027. defm TEX_UNIFIED_1D_ARRAY_S32_F32
  3028. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
  3029. defm TEX_UNIFIED_1D_ARRAY_U32_S32
  3030. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
  3031. defm TEX_UNIFIED_1D_ARRAY_U32_F32
  3032. : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
  3033. class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  3034. NVPTXRegClass intype, dag tex>
  3035. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3036. outtype:$b, outtype:$a),
  3037. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
  3038. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
  3039. []>;
  3040. multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  3041. NVPTXRegClass intype> {
  3042. def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  3043. (ins Int64Regs:$t)>;
  3044. def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
  3045. (ins i64imm:$t)>;
  3046. }
  3047. defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
  3048. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
  3049. Float32Regs, Float32Regs>;
  3050. defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
  3051. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
  3052. Int32Regs, Float32Regs>;
  3053. defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
  3054. : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
  3055. Int32Regs, Float32Regs>;
  3056. class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  3057. NVPTXRegClass intype, dag tex>
  3058. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3059. outtype:$b, outtype:$a),
  3060. !con(tex, (ins Int32Regs:$l, intype:$x,
  3061. intype:$gradx, intype:$grady)),
  3062. inst # " \t\\{$r, $g, $b, $a\\},"
  3063. " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
  3064. []>;
  3065. multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  3066. NVPTXRegClass intype> {
  3067. def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
  3068. (ins Int64Regs:$t)>;
  3069. def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
  3070. (ins i64imm:$t)>;
  3071. }
  3072. defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
  3073. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
  3074. Float32Regs, Float32Regs>;
  3075. defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
  3076. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
  3077. Int32Regs, Float32Regs>;
  3078. defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
  3079. : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
  3080. Int32Regs, Float32Regs>;
  3081. class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
  3082. NVPTXRegClass intype, dag tex>
  3083. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3084. outtype:$b, outtype:$a),
  3085. !con(tex, (ins intype:$x, intype:$y)),
  3086. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
  3087. []>;
  3088. multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
  3089. NVPTXRegClass intype> {
  3090. def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3091. def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
  3092. }
  3093. defm TEX_UNIFIED_2D_F32_S32
  3094. : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
  3095. defm TEX_UNIFIED_2D_F32_F32
  3096. : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3097. defm TEX_UNIFIED_2D_S32_S32
  3098. : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
  3099. defm TEX_UNIFIED_2D_S32_F32
  3100. : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3101. defm TEX_UNIFIED_2D_U32_S32
  3102. : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
  3103. defm TEX_UNIFIED_2D_U32_F32
  3104. : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3105. class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
  3106. NVPTXRegClass intype, dag tex>
  3107. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3108. outtype:$b, outtype:$a),
  3109. !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
  3110. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
  3111. []>;
  3112. multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
  3113. NVPTXRegClass intype> {
  3114. def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3115. def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  3116. }
  3117. defm TEX_UNIFIED_2D_F32_F32_LEVEL
  3118. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3119. defm TEX_UNIFIED_2D_S32_F32_LEVEL
  3120. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3121. defm TEX_UNIFIED_2D_U32_F32_LEVEL
  3122. : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3123. class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
  3124. NVPTXRegClass intype, dag tex>
  3125. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3126. outtype:$b, outtype:$a),
  3127. !con(tex, (ins intype:$x, intype:$y,
  3128. intype:$gradx0, intype:$gradx1,
  3129. intype:$grady0, intype:$grady1)),
  3130. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
  3131. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  3132. []>;
  3133. multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
  3134. NVPTXRegClass intype> {
  3135. def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3136. def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  3137. }
  3138. defm TEX_UNIFIED_2D_F32_F32_GRAD
  3139. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3140. defm TEX_UNIFIED_2D_S32_F32_GRAD
  3141. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3142. defm TEX_UNIFIED_2D_U32_F32_GRAD
  3143. : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3144. class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
  3145. NVPTXRegClass intype, dag tex>
  3146. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3147. outtype:$b, outtype:$a),
  3148. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
  3149. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
  3150. []>;
  3151. multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
  3152. NVPTXRegClass intype> {
  3153. def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3154. def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
  3155. }
  3156. defm TEX_UNIFIED_2D_ARRAY_F32_S32
  3157. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
  3158. defm TEX_UNIFIED_2D_ARRAY_F32_F32
  3159. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3160. defm TEX_UNIFIED_2D_ARRAY_S32_S32
  3161. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
  3162. defm TEX_UNIFIED_2D_ARRAY_S32_F32
  3163. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3164. defm TEX_UNIFIED_2D_ARRAY_U32_S32
  3165. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
  3166. defm TEX_UNIFIED_2D_ARRAY_U32_F32
  3167. : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3168. class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  3169. NVPTXRegClass intype, dag tex>
  3170. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3171. outtype:$b, outtype:$a),
  3172. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
  3173. intype:$lod)),
  3174. inst # " \t\\{$r, $g, $b, $a\\},"
  3175. " [$t, \\{$l, $x, $y, $y\\}], $lod;",
  3176. []>;
  3177. multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  3178. NVPTXRegClass intype> {
  3179. def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  3180. (ins Int64Regs:$t)>;
  3181. def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
  3182. (ins i64imm:$t)>;
  3183. }
  3184. defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
  3185. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
  3186. Float32Regs, Float32Regs>;
  3187. defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
  3188. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
  3189. Int32Regs, Float32Regs>;
  3190. defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
  3191. : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
  3192. Int32Regs, Float32Regs>;
  3193. class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
  3194. NVPTXRegClass intype, dag tex>
  3195. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3196. outtype:$b, outtype:$a),
  3197. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
  3198. intype:$gradx0, intype:$gradx1,
  3199. intype:$grady0, intype:$grady1)),
  3200. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
  3201. " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
  3202. []>;
  3203. multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
  3204. NVPTXRegClass intype> {
  3205. def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
  3206. (ins Int64Regs:$t)>;
  3207. def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
  3208. (ins i64imm:$t)>;
  3209. }
  3210. defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
  3211. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
  3212. Float32Regs, Float32Regs>;
  3213. defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
  3214. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
  3215. Int32Regs, Float32Regs>;
  3216. defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
  3217. : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
  3218. Int32Regs, Float32Regs>;
  3219. class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
  3220. NVPTXRegClass intype, dag tex>
  3221. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3222. outtype:$b, outtype:$a),
  3223. !con(tex, (ins intype:$x, intype:$y, intype:$z)),
  3224. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
  3225. []>;
  3226. multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
  3227. NVPTXRegClass intype> {
  3228. def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3229. def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
  3230. }
  3231. defm TEX_UNIFIED_3D_F32_S32
  3232. : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
  3233. defm TEX_UNIFIED_3D_F32_F32
  3234. : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3235. defm TEX_UNIFIED_3D_S32_S32
  3236. : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
  3237. defm TEX_UNIFIED_3D_S32_F32
  3238. : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3239. defm TEX_UNIFIED_3D_U32_S32
  3240. : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
  3241. defm TEX_UNIFIED_3D_U32_F32
  3242. : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3243. class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
  3244. NVPTXRegClass intype, dag tex>
  3245. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3246. outtype:$b, outtype:$a),
  3247. !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
  3248. inst # " \t\\{$r, $g, $b, $a\\},"
  3249. " [$t, \\{$x, $y, $z, $z\\}], $lod;",
  3250. []>;
  3251. multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
  3252. NVPTXRegClass intype> {
  3253. def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3254. def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
  3255. }
  3256. defm TEX_UNIFIED_3D_F32_F32_LEVEL
  3257. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3258. defm TEX_UNIFIED_3D_S32_F32_LEVEL
  3259. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3260. defm TEX_UNIFIED_3D_U32_F32_LEVEL
  3261. : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3262. class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
  3263. NVPTXRegClass intype, dag tex>
  3264. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3265. outtype:$b, outtype:$a),
  3266. !con(tex, (ins intype:$x, intype:$y, intype:$z,
  3267. intype:$gradx0, intype:$gradx1,
  3268. intype:$gradx2, intype:$grady0,
  3269. intype:$grady1, intype:$grady2)),
  3270. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
  3271. " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
  3272. " \\{$grady0, $grady1, $grady2, $grady2\\};",
  3273. []>;
  3274. multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
  3275. NVPTXRegClass intype> {
  3276. def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3277. def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
  3278. }
  3279. defm TEX_UNIFIED_3D_F32_F32_GRAD
  3280. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
  3281. defm TEX_UNIFIED_3D_S32_F32_GRAD
  3282. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
  3283. defm TEX_UNIFIED_3D_U32_F32_GRAD
  3284. : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
  3285. class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
  3286. NVPTXRegClass intype, dag tex>
  3287. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3288. outtype:$b, outtype:$a),
  3289. !con(tex, (ins intype:$x, intype:$y, intype:$z)),
  3290. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
  3291. []>;
  3292. multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
  3293. NVPTXRegClass intype> {
  3294. def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3295. def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
  3296. }
  3297. defm TEX_UNIFIED_CUBE_F32_F32
  3298. : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
  3299. defm TEX_UNIFIED_CUBE_S32_F32
  3300. : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
  3301. defm TEX_UNIFIED_CUBE_U32_F32
  3302. : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
  3303. class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
  3304. NVPTXRegClass intype, dag tex>
  3305. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3306. outtype:$b, outtype:$a),
  3307. !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
  3308. inst # " \t\\{$r, $g, $b, $a\\},"
  3309. " [$t, \\{$x, $y, $z, $z\\}], $lod;",
  3310. []>;
  3311. multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
  3312. NVPTXRegClass intype> {
  3313. def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
  3314. (ins Int64Regs:$t)>;
  3315. def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
  3316. (ins i64imm:$t)>;
  3317. }
  3318. defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
  3319. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
  3320. Float32Regs, Float32Regs>;
  3321. defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
  3322. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
  3323. Int32Regs, Float32Regs>;
  3324. defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
  3325. : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
  3326. Int32Regs, Float32Regs>;
  3327. class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
  3328. NVPTXRegClass intype, dag tex>
  3329. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3330. outtype:$b, outtype:$a),
  3331. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
  3332. inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
  3333. []>;
  3334. multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
  3335. NVPTXRegClass intype> {
  3336. def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
  3337. (ins Int64Regs:$t)>;
  3338. def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
  3339. (ins i64imm:$t)>;
  3340. }
  3341. defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
  3342. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
  3343. defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
  3344. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
  3345. defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
  3346. : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
  3347. class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
  3348. NVPTXRegClass intype, dag tex>
  3349. : NVPTXInst<(outs outtype:$r, outtype:$g,
  3350. outtype:$b, outtype:$a),
  3351. !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
  3352. intype:$lod)),
  3353. inst # " \t\\{$r, $g, $b, $a\\},"
  3354. " [$t, \\{$l, $x, $y, $z\\}], $lod;",
  3355. []>;
  3356. multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
  3357. NVPTXRegClass intype> {
  3358. def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  3359. (ins Int64Regs:$t)>;
  3360. def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
  3361. (ins i64imm:$t)>;
  3362. }
  3363. defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
  3364. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
  3365. Float32Regs, Float32Regs>;
  3366. defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
  3367. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
  3368. Int32Regs, Float32Regs>;
  3369. defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
  3370. : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
  3371. Int32Regs, Float32Regs>;
  3372. class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
  3373. NVPTXRegClass intype, dag tex>
  3374. : NVPTXInst<(outs outtype:$v0, outtype:$v1,
  3375. outtype:$v2, outtype:$v3),
  3376. !con(tex, (ins intype:$x, intype:$y)),
  3377. inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
  3378. []>;
  3379. multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
  3380. NVPTXRegClass intype> {
  3381. def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
  3382. def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
  3383. }
  3384. defm TLD4_UNIFIED_R_2D_F32_F32
  3385. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3386. defm TLD4_UNIFIED_G_2D_F32_F32
  3387. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3388. defm TLD4_UNIFIED_B_2D_F32_F32
  3389. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3390. defm TLD4_UNIFIED_A_2D_F32_F32
  3391. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
  3392. defm TLD4_UNIFIED_R_2D_S32_F32
  3393. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3394. defm TLD4_UNIFIED_G_2D_S32_F32
  3395. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3396. defm TLD4_UNIFIED_B_2D_S32_F32
  3397. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3398. defm TLD4_UNIFIED_A_2D_S32_F32
  3399. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
  3400. defm TLD4_UNIFIED_R_2D_U32_F32
  3401. : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3402. defm TLD4_UNIFIED_G_2D_U32_F32
  3403. : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3404. defm TLD4_UNIFIED_B_2D_U32_F32
  3405. : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3406. defm TLD4_UNIFIED_A_2D_U32_F32
  3407. : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
  3408. }
  3409. //=== Surface load instructions
  3410. let IsSuld = true in {
  3411. class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
  3412. : NVPTXInst<(outs outtype:$r),
  3413. !con(surf, (ins Int32Regs:$x)),
  3414. inst # " \\{$r\\}, [$s, \\{$x\\}];",
  3415. []>;
  3416. multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
  3417. def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
  3418. def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
  3419. }
  3420. defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
  3421. defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
  3422. defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
  3423. defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
  3424. defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
  3425. defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
  3426. defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
  3427. defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
  3428. defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
  3429. defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
  3430. defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
  3431. defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
  3432. class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
  3433. : NVPTXInst<(outs outtype:$r),
  3434. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3435. inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
  3436. []>;
  3437. multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
  3438. def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
  3439. def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
  3440. }
  3441. defm SULD_1D_ARRAY_I8_CLAMP
  3442. : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
  3443. defm SULD_1D_ARRAY_I16_CLAMP
  3444. : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
  3445. defm SULD_1D_ARRAY_I32_CLAMP
  3446. : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
  3447. defm SULD_1D_ARRAY_I64_CLAMP
  3448. : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
  3449. defm SULD_1D_ARRAY_I8_TRAP
  3450. : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
  3451. defm SULD_1D_ARRAY_I16_TRAP
  3452. : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
  3453. defm SULD_1D_ARRAY_I32_TRAP
  3454. : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
  3455. defm SULD_1D_ARRAY_I64_TRAP
  3456. : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
  3457. defm SULD_1D_ARRAY_I8_ZERO
  3458. : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
  3459. defm SULD_1D_ARRAY_I16_ZERO
  3460. : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
  3461. defm SULD_1D_ARRAY_I32_ZERO
  3462. : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
  3463. defm SULD_1D_ARRAY_I64_ZERO
  3464. : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
  3465. class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
  3466. : NVPTXInst<(outs outtype:$r),
  3467. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3468. inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
  3469. []>;
  3470. multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
  3471. def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
  3472. def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
  3473. }
  3474. defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
  3475. defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
  3476. defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
  3477. defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
  3478. defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
  3479. defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
  3480. defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
  3481. defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
  3482. defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
  3483. defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
  3484. defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
  3485. defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
  3486. class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
  3487. : NVPTXInst<(outs outtype:$r),
  3488. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3489. inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3490. []>;
  3491. multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
  3492. def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
  3493. def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
  3494. }
  3495. defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
  3496. defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
  3497. defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
  3498. defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
  3499. defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
  3500. defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
  3501. defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
  3502. defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
  3503. defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
  3504. defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
  3505. defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
  3506. defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
  3507. class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
  3508. : NVPTXInst<(outs outtype:$r),
  3509. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3510. inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3511. []>;
  3512. multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
  3513. def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
  3514. def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
  3515. }
  3516. defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
  3517. defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
  3518. defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
  3519. defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
  3520. defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
  3521. defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
  3522. defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
  3523. defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
  3524. defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
  3525. defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
  3526. defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
  3527. defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
  3528. }
  3529. let IsSuld = 2 in {
  3530. class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3531. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3532. !con(surf, (ins Int32Regs:$x)),
  3533. inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
  3534. []>;
  3535. multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
  3536. def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3537. def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3538. }
  3539. defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
  3540. defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
  3541. defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
  3542. defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
  3543. defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
  3544. defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
  3545. defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
  3546. defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
  3547. defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
  3548. defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
  3549. defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
  3550. defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
  3551. class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3552. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3553. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3554. inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
  3555. []>;
  3556. multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
  3557. def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3558. def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
  3559. }
  3560. defm SULD_1D_ARRAY_V2I8_CLAMP
  3561. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
  3562. defm SULD_1D_ARRAY_V2I16_CLAMP
  3563. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
  3564. defm SULD_1D_ARRAY_V2I32_CLAMP
  3565. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
  3566. defm SULD_1D_ARRAY_V2I64_CLAMP
  3567. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
  3568. defm SULD_1D_ARRAY_V2I8_TRAP
  3569. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
  3570. defm SULD_1D_ARRAY_V2I16_TRAP
  3571. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
  3572. defm SULD_1D_ARRAY_V2I32_TRAP
  3573. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
  3574. defm SULD_1D_ARRAY_V2I64_TRAP
  3575. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
  3576. defm SULD_1D_ARRAY_V2I8_ZERO
  3577. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
  3578. defm SULD_1D_ARRAY_V2I16_ZERO
  3579. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
  3580. defm SULD_1D_ARRAY_V2I32_ZERO
  3581. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
  3582. defm SULD_1D_ARRAY_V2I64_ZERO
  3583. : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
  3584. class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3585. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3586. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3587. inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
  3588. []>;
  3589. multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
  3590. def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3591. def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3592. }
  3593. defm SULD_2D_V2I8_CLAMP
  3594. : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
  3595. defm SULD_2D_V2I16_CLAMP
  3596. : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
  3597. defm SULD_2D_V2I32_CLAMP
  3598. : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
  3599. defm SULD_2D_V2I64_CLAMP
  3600. : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
  3601. defm SULD_2D_V2I8_TRAP
  3602. : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
  3603. defm SULD_2D_V2I16_TRAP
  3604. : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
  3605. defm SULD_2D_V2I32_TRAP
  3606. : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
  3607. defm SULD_2D_V2I64_TRAP
  3608. : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
  3609. defm SULD_2D_V2I8_ZERO
  3610. : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
  3611. defm SULD_2D_V2I16_ZERO
  3612. : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
  3613. defm SULD_2D_V2I32_ZERO
  3614. : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
  3615. defm SULD_2D_V2I64_ZERO
  3616. : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
  3617. class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3618. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3619. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3620. inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3621. []>;
  3622. multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
  3623. def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3624. def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
  3625. }
  3626. defm SULD_2D_ARRAY_V2I8_CLAMP
  3627. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
  3628. defm SULD_2D_ARRAY_V2I16_CLAMP
  3629. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
  3630. defm SULD_2D_ARRAY_V2I32_CLAMP
  3631. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
  3632. defm SULD_2D_ARRAY_V2I64_CLAMP
  3633. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
  3634. defm SULD_2D_ARRAY_V2I8_TRAP
  3635. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
  3636. defm SULD_2D_ARRAY_V2I16_TRAP
  3637. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
  3638. defm SULD_2D_ARRAY_V2I32_TRAP
  3639. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
  3640. defm SULD_2D_ARRAY_V2I64_TRAP
  3641. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
  3642. defm SULD_2D_ARRAY_V2I8_ZERO
  3643. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
  3644. defm SULD_2D_ARRAY_V2I16_ZERO
  3645. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
  3646. defm SULD_2D_ARRAY_V2I32_ZERO
  3647. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
  3648. defm SULD_2D_ARRAY_V2I64_ZERO
  3649. : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
  3650. class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
  3651. : NVPTXInst<(outs outtype:$r, outtype:$g),
  3652. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3653. inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3654. []>;
  3655. multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
  3656. def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
  3657. def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
  3658. }
  3659. defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
  3660. defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
  3661. defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
  3662. defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
  3663. defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
  3664. defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
  3665. defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
  3666. defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
  3667. defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
  3668. defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
  3669. defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
  3670. defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
  3671. }
  3672. let IsSuld = 3 in {
  3673. class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3674. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3675. !con(surf, (ins Int32Regs:$x)),
  3676. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
  3677. []>;
  3678. multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
  3679. def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3680. def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3681. }
  3682. defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
  3683. defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
  3684. defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
  3685. defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
  3686. defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
  3687. defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
  3688. defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
  3689. defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
  3690. defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
  3691. class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3692. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3693. !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
  3694. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
  3695. []>;
  3696. multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
  3697. def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3698. def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
  3699. }
  3700. defm SULD_1D_ARRAY_V4I8_CLAMP
  3701. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
  3702. defm SULD_1D_ARRAY_V4I16_CLAMP
  3703. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
  3704. defm SULD_1D_ARRAY_V4I32_CLAMP
  3705. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
  3706. defm SULD_1D_ARRAY_V4I8_TRAP
  3707. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
  3708. defm SULD_1D_ARRAY_V4I16_TRAP
  3709. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
  3710. defm SULD_1D_ARRAY_V4I32_TRAP
  3711. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
  3712. defm SULD_1D_ARRAY_V4I8_ZERO
  3713. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
  3714. defm SULD_1D_ARRAY_V4I16_ZERO
  3715. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
  3716. defm SULD_1D_ARRAY_V4I32_ZERO
  3717. : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
  3718. class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3719. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3720. !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
  3721. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
  3722. []>;
  3723. multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
  3724. def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3725. def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3726. }
  3727. defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
  3728. defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
  3729. defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
  3730. defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
  3731. defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
  3732. defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
  3733. defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
  3734. defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
  3735. defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
  3736. class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3737. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3738. !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
  3739. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
  3740. []>;
  3741. multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
  3742. def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3743. def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
  3744. }
  3745. defm SULD_2D_ARRAY_V4I8_CLAMP
  3746. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
  3747. defm SULD_2D_ARRAY_V4I16_CLAMP
  3748. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
  3749. defm SULD_2D_ARRAY_V4I32_CLAMP
  3750. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
  3751. defm SULD_2D_ARRAY_V4I8_TRAP
  3752. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
  3753. defm SULD_2D_ARRAY_V4I16_TRAP
  3754. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
  3755. defm SULD_2D_ARRAY_V4I32_TRAP
  3756. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
  3757. defm SULD_2D_ARRAY_V4I8_ZERO
  3758. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
  3759. defm SULD_2D_ARRAY_V4I16_ZERO
  3760. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
  3761. defm SULD_2D_ARRAY_V4I32_ZERO
  3762. : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
  3763. class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
  3764. : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
  3765. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
  3766. inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
  3767. []>;
  3768. multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
  3769. def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
  3770. def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
  3771. }
  3772. defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
  3773. defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
  3774. defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
  3775. defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
  3776. defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
  3777. defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
  3778. defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
  3779. defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
  3780. defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
  3781. }
  3782. //-----------------------------------
  3783. // Texture Query Intrinsics
  3784. //-----------------------------------
  3785. let IsSurfTexQuery = true in {
  3786. def TXQ_CHANNEL_ORDER_R
  3787. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3788. "txq.channel_order.b32 \t$d, [$a];",
  3789. []>;
  3790. def TXQ_CHANNEL_ORDER_I
  3791. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3792. "txq.channel_order.b32 \t$d, [$a];",
  3793. []>;
  3794. def TXQ_CHANNEL_DATA_TYPE_R
  3795. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3796. "txq.channel_data_type.b32 \t$d, [$a];",
  3797. []>;
  3798. def TXQ_CHANNEL_DATA_TYPE_I
  3799. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3800. "txq.channel_data_type.b32 \t$d, [$a];",
  3801. []>;
  3802. def TXQ_WIDTH_R
  3803. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3804. "txq.width.b32 \t$d, [$a];",
  3805. []>;
  3806. def TXQ_WIDTH_I
  3807. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3808. "txq.width.b32 \t$d, [$a];",
  3809. []>;
  3810. def TXQ_HEIGHT_R
  3811. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3812. "txq.height.b32 \t$d, [$a];",
  3813. []>;
  3814. def TXQ_HEIGHT_I
  3815. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3816. "txq.height.b32 \t$d, [$a];",
  3817. []>;
  3818. def TXQ_DEPTH_R
  3819. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3820. "txq.depth.b32 \t$d, [$a];",
  3821. []>;
  3822. def TXQ_DEPTH_I
  3823. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3824. "txq.depth.b32 \t$d, [$a];",
  3825. []>;
  3826. def TXQ_ARRAY_SIZE_R
  3827. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3828. "txq.array_size.b32 \t$d, [$a];",
  3829. []>;
  3830. def TXQ_ARRAY_SIZE_I
  3831. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3832. "txq.array_size.b32 \t$d, [$a];",
  3833. []>;
  3834. def TXQ_NUM_SAMPLES_R
  3835. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3836. "txq.num_samples.b32 \t$d, [$a];",
  3837. []>;
  3838. def TXQ_NUM_SAMPLES_I
  3839. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3840. "txq.num_samples.b32 \t$d, [$a];",
  3841. []>;
  3842. def TXQ_NUM_MIPMAP_LEVELS_R
  3843. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3844. "txq.num_mipmap_levels.b32 \t$d, [$a];",
  3845. []>;
  3846. def TXQ_NUM_MIPMAP_LEVELS_I
  3847. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3848. "txq.num_mipmap_levels.b32 \t$d, [$a];",
  3849. []>;
  3850. }
  3851. def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
  3852. (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
  3853. def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
  3854. (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
  3855. def : Pat<(int_nvvm_txq_width Int64Regs:$a),
  3856. (TXQ_WIDTH_R Int64Regs:$a)>;
  3857. def : Pat<(int_nvvm_txq_height Int64Regs:$a),
  3858. (TXQ_HEIGHT_R Int64Regs:$a)>;
  3859. def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
  3860. (TXQ_DEPTH_R Int64Regs:$a)>;
  3861. def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
  3862. (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
  3863. def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
  3864. (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
  3865. def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
  3866. (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
  3867. //-----------------------------------
  3868. // Surface Query Intrinsics
  3869. //-----------------------------------
  3870. let IsSurfTexQuery = true in {
  3871. def SUQ_CHANNEL_ORDER_R
  3872. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3873. "suq.channel_order.b32 \t$d, [$a];",
  3874. []>;
  3875. def SUQ_CHANNEL_ORDER_I
  3876. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3877. "suq.channel_order.b32 \t$d, [$a];",
  3878. []>;
  3879. def SUQ_CHANNEL_DATA_TYPE_R
  3880. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3881. "suq.channel_data_type.b32 \t$d, [$a];",
  3882. []>;
  3883. def SUQ_CHANNEL_DATA_TYPE_I
  3884. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3885. "suq.channel_data_type.b32 \t$d, [$a];",
  3886. []>;
  3887. def SUQ_WIDTH_R
  3888. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3889. "suq.width.b32 \t$d, [$a];",
  3890. []>;
  3891. def SUQ_WIDTH_I
  3892. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3893. "suq.width.b32 \t$d, [$a];",
  3894. []>;
  3895. def SUQ_HEIGHT_R
  3896. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3897. "suq.height.b32 \t$d, [$a];",
  3898. []>;
  3899. def SUQ_HEIGHT_I
  3900. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3901. "suq.height.b32 \t$d, [$a];",
  3902. []>;
  3903. def SUQ_DEPTH_R
  3904. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3905. "suq.depth.b32 \t$d, [$a];",
  3906. []>;
  3907. def SUQ_DEPTH_I
  3908. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3909. "suq.depth.b32 \t$d, [$a];",
  3910. []>;
  3911. def SUQ_ARRAY_SIZE_R
  3912. : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
  3913. "suq.array_size.b32 \t$d, [$a];",
  3914. []>;
  3915. def SUQ_ARRAY_SIZE_I
  3916. : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
  3917. "suq.array_size.b32 \t$d, [$a];",
  3918. []>;
  3919. }
  3920. def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
  3921. (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
  3922. def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
  3923. (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
  3924. def : Pat<(int_nvvm_suq_width Int64Regs:$a),
  3925. (SUQ_WIDTH_R Int64Regs:$a)>;
  3926. def : Pat<(int_nvvm_suq_height Int64Regs:$a),
  3927. (SUQ_HEIGHT_R Int64Regs:$a)>;
  3928. def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
  3929. (SUQ_DEPTH_R Int64Regs:$a)>;
  3930. def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
  3931. (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
  3932. //===- Handle Query -------------------------------------------------------===//
  3933. // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
  3934. def ISTYPEP_SAMPLER
  3935. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3936. "istypep.samplerref \t$d, $a;",
  3937. [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
  3938. def ISTYPEP_SURFACE
  3939. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3940. "istypep.surfref \t$d, $a;",
  3941. [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
  3942. def ISTYPEP_TEXTURE
  3943. : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
  3944. "istypep.texref \t$d, $a;",
  3945. [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
  3946. //===- Surface Stores -----------------------------------------------------===//
  3947. let IsSust = true in {
  3948. class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
  3949. : NVPTXInst<(outs),
  3950. !con(surf, (ins Int32Regs:$x, intype:$r)),
  3951. inst # " \t[$s, \\{$x\\}], \\{$r\\};",
  3952. []>;
  3953. multiclass SUST_1D<string inst, NVPTXRegClass intype> {
  3954. def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
  3955. def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
  3956. }
  3957. defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
  3958. defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
  3959. defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
  3960. defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
  3961. defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
  3962. defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
  3963. defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
  3964. defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
  3965. defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
  3966. defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
  3967. defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
  3968. defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
  3969. defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
  3970. defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
  3971. defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
  3972. class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  3973. : NVPTXInst<(outs),
  3974. !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
  3975. inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
  3976. []>;
  3977. multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
  3978. def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  3979. def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
  3980. }
  3981. defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
  3982. defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
  3983. defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
  3984. defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
  3985. defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
  3986. defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
  3987. defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
  3988. defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
  3989. defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
  3990. defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
  3991. defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
  3992. defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
  3993. defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
  3994. defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
  3995. defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
  3996. class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  3997. : NVPTXInst<(outs),
  3998. !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
  3999. intype:$b, intype:$a)),
  4000. inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
  4001. []>;
  4002. multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
  4003. def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4004. def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
  4005. }
  4006. defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
  4007. defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
  4008. defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
  4009. defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
  4010. defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
  4011. defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
  4012. defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
  4013. defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
  4014. defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
  4015. defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
  4016. defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
  4017. defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
  4018. class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
  4019. : NVPTXInst<(outs),
  4020. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
  4021. inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
  4022. []>;
  4023. multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
  4024. def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
  4025. def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
  4026. }
  4027. defm SUST_B_1D_ARRAY_B8_CLAMP
  4028. : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
  4029. defm SUST_B_1D_ARRAY_B16_CLAMP
  4030. : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
  4031. defm SUST_B_1D_ARRAY_B32_CLAMP
  4032. : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
  4033. defm SUST_B_1D_ARRAY_B64_CLAMP
  4034. : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
  4035. defm SUST_B_1D_ARRAY_B8_TRAP
  4036. : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
  4037. defm SUST_B_1D_ARRAY_B16_TRAP
  4038. : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
  4039. defm SUST_B_1D_ARRAY_B32_TRAP
  4040. : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
  4041. defm SUST_B_1D_ARRAY_B64_TRAP
  4042. : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
  4043. defm SUST_B_1D_ARRAY_B8_ZERO
  4044. : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
  4045. defm SUST_B_1D_ARRAY_B16_ZERO
  4046. : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
  4047. defm SUST_B_1D_ARRAY_B32_ZERO
  4048. : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
  4049. defm SUST_B_1D_ARRAY_B64_ZERO
  4050. : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
  4051. defm SUST_P_1D_ARRAY_B8_TRAP
  4052. : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
  4053. defm SUST_P_1D_ARRAY_B16_TRAP
  4054. : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
  4055. defm SUST_P_1D_ARRAY_B32_TRAP
  4056. : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
  4057. class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4058. : NVPTXInst<(outs),
  4059. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
  4060. intype:$r, intype:$g)),
  4061. inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
  4062. []>;
  4063. multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
  4064. def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4065. def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
  4066. }
  4067. defm SUST_B_1D_ARRAY_V2B8_CLAMP
  4068. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
  4069. defm SUST_B_1D_ARRAY_V2B16_CLAMP
  4070. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
  4071. defm SUST_B_1D_ARRAY_V2B32_CLAMP
  4072. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
  4073. defm SUST_B_1D_ARRAY_V2B64_CLAMP
  4074. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
  4075. defm SUST_B_1D_ARRAY_V2B8_TRAP
  4076. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
  4077. defm SUST_B_1D_ARRAY_V2B16_TRAP
  4078. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
  4079. defm SUST_B_1D_ARRAY_V2B32_TRAP
  4080. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
  4081. defm SUST_B_1D_ARRAY_V2B64_TRAP
  4082. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
  4083. defm SUST_B_1D_ARRAY_V2B8_ZERO
  4084. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
  4085. defm SUST_B_1D_ARRAY_V2B16_ZERO
  4086. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
  4087. defm SUST_B_1D_ARRAY_V2B32_ZERO
  4088. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
  4089. defm SUST_B_1D_ARRAY_V2B64_ZERO
  4090. : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
  4091. defm SUST_P_1D_ARRAY_V2B8_TRAP
  4092. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
  4093. defm SUST_P_1D_ARRAY_V2B16_TRAP
  4094. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
  4095. defm SUST_P_1D_ARRAY_V2B32_TRAP
  4096. : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
  4097. class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4098. : NVPTXInst<(outs),
  4099. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
  4100. intype:$r, intype:$g, intype:$b, intype:$a)),
  4101. inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
  4102. []>;
  4103. multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
  4104. def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4105. def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
  4106. }
  4107. defm SUST_B_1D_ARRAY_V4B8_CLAMP
  4108. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
  4109. defm SUST_B_1D_ARRAY_V4B16_CLAMP
  4110. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
  4111. defm SUST_B_1D_ARRAY_V4B32_CLAMP
  4112. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
  4113. defm SUST_B_1D_ARRAY_V4B8_TRAP
  4114. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
  4115. defm SUST_B_1D_ARRAY_V4B16_TRAP
  4116. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
  4117. defm SUST_B_1D_ARRAY_V4B32_TRAP
  4118. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
  4119. defm SUST_B_1D_ARRAY_V4B8_ZERO
  4120. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
  4121. defm SUST_B_1D_ARRAY_V4B16_ZERO
  4122. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
  4123. defm SUST_B_1D_ARRAY_V4B32_ZERO
  4124. : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
  4125. defm SUST_P_1D_ARRAY_V4B8_TRAP
  4126. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
  4127. defm SUST_P_1D_ARRAY_V4B16_TRAP
  4128. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
  4129. defm SUST_P_1D_ARRAY_V4B32_TRAP
  4130. : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
  4131. class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
  4132. : NVPTXInst<(outs),
  4133. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
  4134. inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
  4135. []>;
  4136. multiclass SUST_2D<string inst, NVPTXRegClass intype> {
  4137. def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
  4138. def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
  4139. }
  4140. defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
  4141. defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
  4142. defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
  4143. defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
  4144. defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
  4145. defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
  4146. defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
  4147. defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
  4148. defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
  4149. defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
  4150. defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
  4151. defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
  4152. defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
  4153. defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
  4154. defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
  4155. class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4156. : NVPTXInst<(outs),
  4157. !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
  4158. intype:$r, intype:$g)),
  4159. inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
  4160. []>;
  4161. multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
  4162. def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4163. def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
  4164. }
  4165. defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
  4166. defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
  4167. defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
  4168. defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
  4169. defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
  4170. defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
  4171. defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
  4172. defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
  4173. defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
  4174. defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
  4175. defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
  4176. defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
  4177. defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
  4178. defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
  4179. defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
  4180. class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4181. : NVPTXInst<(outs),
  4182. !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
  4183. intype:$r, intype:$g, intype:$b, intype:$a)),
  4184. inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
  4185. []>;
  4186. multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
  4187. def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4188. def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
  4189. }
  4190. defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
  4191. defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
  4192. defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
  4193. defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
  4194. defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
  4195. defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
  4196. defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
  4197. defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
  4198. defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
  4199. defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
  4200. defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
  4201. defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
  4202. class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
  4203. : NVPTXInst<(outs),
  4204. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4205. intype:$r)),
  4206. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
  4207. []>;
  4208. multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
  4209. def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
  4210. def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
  4211. }
  4212. defm SUST_B_2D_ARRAY_B8_CLAMP
  4213. : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
  4214. defm SUST_B_2D_ARRAY_B16_CLAMP
  4215. : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
  4216. defm SUST_B_2D_ARRAY_B32_CLAMP
  4217. : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
  4218. defm SUST_B_2D_ARRAY_B64_CLAMP
  4219. : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
  4220. defm SUST_B_2D_ARRAY_B8_TRAP
  4221. : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
  4222. defm SUST_B_2D_ARRAY_B16_TRAP
  4223. : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
  4224. defm SUST_B_2D_ARRAY_B32_TRAP
  4225. : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
  4226. defm SUST_B_2D_ARRAY_B64_TRAP
  4227. : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
  4228. defm SUST_B_2D_ARRAY_B8_ZERO
  4229. : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
  4230. defm SUST_B_2D_ARRAY_B16_ZERO
  4231. : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
  4232. defm SUST_B_2D_ARRAY_B32_ZERO
  4233. : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
  4234. defm SUST_B_2D_ARRAY_B64_ZERO
  4235. : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
  4236. defm SUST_P_2D_ARRAY_B8_TRAP
  4237. : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
  4238. defm SUST_P_2D_ARRAY_B16_TRAP
  4239. : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
  4240. defm SUST_P_2D_ARRAY_B32_TRAP
  4241. : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
  4242. class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4243. : NVPTXInst<(outs),
  4244. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4245. intype:$r, intype:$g)),
  4246. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
  4247. []>;
  4248. multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
  4249. def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4250. def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
  4251. }
  4252. defm SUST_B_2D_ARRAY_V2B8_CLAMP
  4253. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
  4254. defm SUST_B_2D_ARRAY_V2B16_CLAMP
  4255. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
  4256. defm SUST_B_2D_ARRAY_V2B32_CLAMP
  4257. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
  4258. defm SUST_B_2D_ARRAY_V2B64_CLAMP
  4259. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
  4260. defm SUST_B_2D_ARRAY_V2B8_TRAP
  4261. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
  4262. defm SUST_B_2D_ARRAY_V2B16_TRAP
  4263. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
  4264. defm SUST_B_2D_ARRAY_V2B32_TRAP
  4265. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
  4266. defm SUST_B_2D_ARRAY_V2B64_TRAP
  4267. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
  4268. defm SUST_B_2D_ARRAY_V2B8_ZERO
  4269. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
  4270. defm SUST_B_2D_ARRAY_V2B16_ZERO
  4271. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
  4272. defm SUST_B_2D_ARRAY_V2B32_ZERO
  4273. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
  4274. defm SUST_B_2D_ARRAY_V2B64_ZERO
  4275. : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
  4276. defm SUST_P_2D_ARRAY_V2B8_TRAP
  4277. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
  4278. defm SUST_P_2D_ARRAY_V2B16_TRAP
  4279. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
  4280. defm SUST_P_2D_ARRAY_V2B32_TRAP
  4281. : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
  4282. class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4283. : NVPTXInst<(outs),
  4284. !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
  4285. intype:$r, intype:$g, intype:$b, intype:$a)),
  4286. inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
  4287. []>;
  4288. multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
  4289. def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4290. def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
  4291. }
  4292. defm SUST_B_2D_ARRAY_V4B8_CLAMP
  4293. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
  4294. defm SUST_B_2D_ARRAY_V4B16_CLAMP
  4295. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
  4296. defm SUST_B_2D_ARRAY_V4B32_CLAMP
  4297. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
  4298. defm SUST_B_2D_ARRAY_V4B8_TRAP
  4299. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
  4300. defm SUST_B_2D_ARRAY_V4B16_TRAP
  4301. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
  4302. defm SUST_B_2D_ARRAY_V4B32_TRAP
  4303. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
  4304. defm SUST_B_2D_ARRAY_V4B8_ZERO
  4305. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
  4306. defm SUST_B_2D_ARRAY_V4B16_ZERO
  4307. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
  4308. defm SUST_B_2D_ARRAY_V4B32_ZERO
  4309. : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
  4310. defm SUST_P_2D_ARRAY_V4B8_TRAP
  4311. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
  4312. defm SUST_P_2D_ARRAY_V4B16_TRAP
  4313. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
  4314. defm SUST_P_2D_ARRAY_V4B32_TRAP
  4315. : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
  4316. class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
  4317. : NVPTXInst<(outs),
  4318. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4319. intype:$r)),
  4320. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
  4321. []>;
  4322. multiclass SUST_3D<string inst, NVPTXRegClass intype> {
  4323. def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
  4324. def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
  4325. }
  4326. defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
  4327. defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
  4328. defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
  4329. defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
  4330. defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
  4331. defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
  4332. defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
  4333. defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
  4334. defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
  4335. defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
  4336. defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
  4337. defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
  4338. defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
  4339. defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
  4340. defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
  4341. class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
  4342. : NVPTXInst<(outs),
  4343. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4344. intype:$r, intype:$g)),
  4345. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
  4346. []>;
  4347. multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
  4348. def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
  4349. def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
  4350. }
  4351. defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
  4352. defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
  4353. defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
  4354. defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
  4355. defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
  4356. defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
  4357. defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
  4358. defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
  4359. defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
  4360. defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
  4361. defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
  4362. defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
  4363. defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
  4364. defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
  4365. defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
  4366. class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
  4367. : NVPTXInst<(outs),
  4368. !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4369. intype:$r, intype:$g, intype:$b, intype:$a)),
  4370. inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
  4371. []>;
  4372. multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
  4373. def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
  4374. def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
  4375. }
  4376. defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
  4377. defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
  4378. defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
  4379. defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
  4380. defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
  4381. defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
  4382. defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
  4383. defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
  4384. defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
  4385. defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
  4386. defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
  4387. defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
  4388. }
  4389. // Surface store instruction patterns
  4390. // I'm not sure why we can't just include these in the instruction definitions,
  4391. // but TableGen complains of type errors :(
  4392. // .clamp variant
  4393. def : Pat<(int_nvvm_sust_b_1d_i8_clamp
  4394. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4395. (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4396. def : Pat<(int_nvvm_sust_b_1d_i16_clamp
  4397. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4398. (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4399. def : Pat<(int_nvvm_sust_b_1d_i32_clamp
  4400. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4401. (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4402. def : Pat<(int_nvvm_sust_b_1d_i64_clamp
  4403. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4404. (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4405. def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
  4406. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4407. (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4408. Int16Regs:$r, Int16Regs:$g)>;
  4409. def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
  4410. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4411. (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4412. Int16Regs:$r, Int16Regs:$g)>;
  4413. def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
  4414. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4415. (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4416. Int32Regs:$r, Int32Regs:$g)>;
  4417. def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
  4418. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4419. (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4420. Int64Regs:$r, Int64Regs:$g)>;
  4421. def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
  4422. Int64Regs:$s, Int32Regs:$x,
  4423. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4424. (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4425. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4426. def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
  4427. Int64Regs:$s, Int32Regs:$x,
  4428. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4429. (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4430. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4431. def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
  4432. Int64Regs:$s, Int32Regs:$x,
  4433. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4434. (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
  4435. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4436. def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
  4437. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4438. (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4439. Int16Regs:$r)>;
  4440. def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
  4441. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4442. (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4443. Int16Regs:$r)>;
  4444. def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
  4445. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4446. (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4447. Int32Regs:$r)>;
  4448. def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
  4449. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4450. (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4451. Int64Regs:$r)>;
  4452. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
  4453. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4454. (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4455. Int16Regs:$r, Int16Regs:$g)>;
  4456. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
  4457. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4458. (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4459. Int16Regs:$r, Int16Regs:$g)>;
  4460. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
  4461. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4462. (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4463. Int32Regs:$r, Int32Regs:$g)>;
  4464. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
  4465. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4466. (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4467. Int64Regs:$r, Int64Regs:$g)>;
  4468. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
  4469. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4470. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4471. (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4472. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4473. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
  4474. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4475. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4476. (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4477. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4478. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
  4479. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4480. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4481. (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4482. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4483. def : Pat<(int_nvvm_sust_b_2d_i8_clamp
  4484. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4485. (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4486. Int16Regs:$r)>;
  4487. def : Pat<(int_nvvm_sust_b_2d_i16_clamp
  4488. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4489. (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4490. Int16Regs:$r)>;
  4491. def : Pat<(int_nvvm_sust_b_2d_i32_clamp
  4492. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4493. (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4494. Int32Regs:$r)>;
  4495. def : Pat<(int_nvvm_sust_b_2d_i64_clamp
  4496. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4497. (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4498. Int64Regs:$r)>;
  4499. def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
  4500. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4501. (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4502. Int16Regs:$r, Int16Regs:$g)>;
  4503. def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
  4504. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4505. (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4506. Int16Regs:$r, Int16Regs:$g)>;
  4507. def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
  4508. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  4509. (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4510. Int32Regs:$r, Int32Regs:$g)>;
  4511. def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
  4512. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  4513. (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4514. Int64Regs:$r, Int64Regs:$g)>;
  4515. def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
  4516. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4517. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4518. (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4519. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4520. def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
  4521. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4522. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4523. (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4524. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4525. def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
  4526. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4527. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4528. (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4529. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4530. def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
  4531. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4532. (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
  4533. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4534. Int16Regs:$r)>;
  4535. def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
  4536. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4537. (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
  4538. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4539. Int16Regs:$r)>;
  4540. def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
  4541. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4542. (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
  4543. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4544. Int32Regs:$r)>;
  4545. def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
  4546. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4547. (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
  4548. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4549. Int64Regs:$r)>;
  4550. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
  4551. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4552. Int16Regs:$r, Int16Regs:$g),
  4553. (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4554. Int32Regs:$x, Int32Regs:$y,
  4555. Int16Regs:$r, Int16Regs:$g)>;
  4556. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
  4557. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4558. Int16Regs:$r, Int16Regs:$g),
  4559. (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4560. Int32Regs:$x, Int32Regs:$y,
  4561. Int16Regs:$r, Int16Regs:$g)>;
  4562. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
  4563. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  4564. Int32Regs:$g),
  4565. (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4566. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  4567. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
  4568. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  4569. Int64Regs:$g),
  4570. (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4571. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  4572. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
  4573. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4574. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4575. (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
  4576. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4577. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4578. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
  4579. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4580. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4581. (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
  4582. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4583. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4584. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
  4585. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4586. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4587. (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
  4588. Int32Regs:$x, Int32Regs:$y,
  4589. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4590. def : Pat<(int_nvvm_sust_b_3d_i8_clamp
  4591. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4592. Int16Regs:$r),
  4593. (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
  4594. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4595. Int16Regs:$r)>;
  4596. def : Pat<(int_nvvm_sust_b_3d_i16_clamp
  4597. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4598. Int16Regs:$r),
  4599. (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
  4600. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4601. Int16Regs:$r)>;
  4602. def : Pat<(int_nvvm_sust_b_3d_i32_clamp
  4603. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4604. Int32Regs:$r),
  4605. (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
  4606. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4607. Int32Regs:$r)>;
  4608. def : Pat<(int_nvvm_sust_b_3d_i64_clamp
  4609. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4610. Int64Regs:$r),
  4611. (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
  4612. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4613. Int64Regs:$r)>;
  4614. def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
  4615. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4616. Int16Regs:$r, Int16Regs:$g),
  4617. (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
  4618. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4619. Int16Regs:$r, Int16Regs:$g)>;
  4620. def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
  4621. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4622. Int16Regs:$r, Int16Regs:$g),
  4623. (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
  4624. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4625. Int16Regs:$r, Int16Regs:$g)>;
  4626. def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
  4627. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4628. Int32Regs:$r, Int32Regs:$g),
  4629. (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
  4630. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4631. Int32Regs:$r, Int32Regs:$g)>;
  4632. def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
  4633. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4634. Int64Regs:$r, Int64Regs:$g),
  4635. (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
  4636. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4637. Int64Regs:$r, Int64Regs:$g)>;
  4638. def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
  4639. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4640. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4641. (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
  4642. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4643. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4644. def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
  4645. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4646. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4647. (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
  4648. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4649. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4650. def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
  4651. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4652. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4653. (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
  4654. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4655. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4656. // .trap variant
  4657. def : Pat<(int_nvvm_sust_b_1d_i8_trap
  4658. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4659. (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4660. def : Pat<(int_nvvm_sust_b_1d_i16_trap
  4661. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4662. (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4663. def : Pat<(int_nvvm_sust_b_1d_i32_trap
  4664. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4665. (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4666. def : Pat<(int_nvvm_sust_b_1d_i64_trap
  4667. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4668. (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4669. def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
  4670. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4671. (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4672. Int16Regs:$r, Int16Regs:$g)>;
  4673. def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
  4674. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4675. (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4676. Int16Regs:$r, Int16Regs:$g)>;
  4677. def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
  4678. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4679. (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4680. Int32Regs:$r, Int32Regs:$g)>;
  4681. def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
  4682. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4683. (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4684. Int64Regs:$r, Int64Regs:$g)>;
  4685. def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
  4686. Int64Regs:$s, Int32Regs:$x,
  4687. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4688. (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4689. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4690. def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
  4691. Int64Regs:$s, Int32Regs:$x,
  4692. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4693. (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4694. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4695. def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
  4696. Int64Regs:$s, Int32Regs:$x,
  4697. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4698. (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  4699. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4700. def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
  4701. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4702. (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4703. Int16Regs:$r)>;
  4704. def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
  4705. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4706. (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4707. Int16Regs:$r)>;
  4708. def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
  4709. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4710. (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4711. Int32Regs:$r)>;
  4712. def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
  4713. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4714. (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4715. Int64Regs:$r)>;
  4716. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
  4717. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4718. (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4719. Int16Regs:$r, Int16Regs:$g)>;
  4720. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
  4721. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4722. (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4723. Int16Regs:$r, Int16Regs:$g)>;
  4724. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
  4725. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4726. (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4727. Int32Regs:$r, Int32Regs:$g)>;
  4728. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
  4729. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4730. (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4731. Int64Regs:$r, Int64Regs:$g)>;
  4732. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
  4733. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4734. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4735. (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4736. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4737. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
  4738. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4739. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4740. (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4741. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4742. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
  4743. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4744. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4745. (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4746. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4747. def : Pat<(int_nvvm_sust_b_2d_i8_trap
  4748. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4749. (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4750. Int16Regs:$r)>;
  4751. def : Pat<(int_nvvm_sust_b_2d_i16_trap
  4752. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4753. (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4754. Int16Regs:$r)>;
  4755. def : Pat<(int_nvvm_sust_b_2d_i32_trap
  4756. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4757. (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4758. Int32Regs:$r)>;
  4759. def : Pat<(int_nvvm_sust_b_2d_i64_trap
  4760. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4761. (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4762. Int64Regs:$r)>;
  4763. def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
  4764. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4765. (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4766. Int16Regs:$r, Int16Regs:$g)>;
  4767. def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
  4768. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  4769. (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4770. Int16Regs:$r, Int16Regs:$g)>;
  4771. def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
  4772. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  4773. (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4774. Int32Regs:$r, Int32Regs:$g)>;
  4775. def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
  4776. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  4777. (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4778. Int64Regs:$r, Int64Regs:$g)>;
  4779. def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
  4780. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4781. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4782. (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4783. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4784. def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
  4785. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4786. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4787. (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4788. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4789. def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
  4790. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4791. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4792. (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  4793. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4794. def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
  4795. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4796. (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
  4797. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4798. Int16Regs:$r)>;
  4799. def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
  4800. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  4801. (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
  4802. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4803. Int16Regs:$r)>;
  4804. def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
  4805. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  4806. (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
  4807. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4808. Int32Regs:$r)>;
  4809. def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
  4810. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  4811. (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
  4812. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4813. Int64Regs:$r)>;
  4814. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
  4815. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4816. Int16Regs:$r, Int16Regs:$g),
  4817. (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4818. Int32Regs:$x, Int32Regs:$y,
  4819. Int16Regs:$r, Int16Regs:$g)>;
  4820. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
  4821. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4822. Int16Regs:$r, Int16Regs:$g),
  4823. (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4824. Int32Regs:$x, Int32Regs:$y,
  4825. Int16Regs:$r, Int16Regs:$g)>;
  4826. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
  4827. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  4828. Int32Regs:$g),
  4829. (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4830. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  4831. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
  4832. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  4833. Int64Regs:$g),
  4834. (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4835. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  4836. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
  4837. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4838. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4839. (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
  4840. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4841. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4842. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
  4843. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4844. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4845. (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
  4846. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4847. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4848. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
  4849. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  4850. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4851. (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  4852. Int32Regs:$x, Int32Regs:$y,
  4853. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4854. def : Pat<(int_nvvm_sust_b_3d_i8_trap
  4855. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4856. Int16Regs:$r),
  4857. (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
  4858. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4859. Int16Regs:$r)>;
  4860. def : Pat<(int_nvvm_sust_b_3d_i16_trap
  4861. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4862. Int16Regs:$r),
  4863. (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
  4864. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4865. Int16Regs:$r)>;
  4866. def : Pat<(int_nvvm_sust_b_3d_i32_trap
  4867. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4868. Int32Regs:$r),
  4869. (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
  4870. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4871. Int32Regs:$r)>;
  4872. def : Pat<(int_nvvm_sust_b_3d_i64_trap
  4873. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4874. Int64Regs:$r),
  4875. (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
  4876. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4877. Int64Regs:$r)>;
  4878. def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
  4879. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4880. Int16Regs:$r, Int16Regs:$g),
  4881. (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
  4882. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4883. Int16Regs:$r, Int16Regs:$g)>;
  4884. def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
  4885. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4886. Int16Regs:$r, Int16Regs:$g),
  4887. (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
  4888. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4889. Int16Regs:$r, Int16Regs:$g)>;
  4890. def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
  4891. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4892. Int32Regs:$r, Int32Regs:$g),
  4893. (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
  4894. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4895. Int32Regs:$r, Int32Regs:$g)>;
  4896. def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
  4897. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4898. Int64Regs:$r, Int64Regs:$g),
  4899. (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
  4900. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4901. Int64Regs:$r, Int64Regs:$g)>;
  4902. def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
  4903. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4904. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4905. (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
  4906. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4907. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4908. def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
  4909. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4910. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4911. (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
  4912. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4913. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4914. def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
  4915. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4916. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4917. (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
  4918. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  4919. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4920. // .zero variant
  4921. def : Pat<(int_nvvm_sust_b_1d_i8_zero
  4922. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4923. (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4924. def : Pat<(int_nvvm_sust_b_1d_i16_zero
  4925. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  4926. (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  4927. def : Pat<(int_nvvm_sust_b_1d_i32_zero
  4928. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  4929. (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  4930. def : Pat<(int_nvvm_sust_b_1d_i64_zero
  4931. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
  4932. (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
  4933. def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
  4934. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4935. (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4936. Int16Regs:$r, Int16Regs:$g)>;
  4937. def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
  4938. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4939. (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4940. Int16Regs:$r, Int16Regs:$g)>;
  4941. def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
  4942. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4943. (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4944. Int32Regs:$r, Int32Regs:$g)>;
  4945. def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
  4946. Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4947. (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4948. Int64Regs:$r, Int64Regs:$g)>;
  4949. def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
  4950. Int64Regs:$s, Int32Regs:$x,
  4951. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4952. (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4953. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4954. def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
  4955. Int64Regs:$s, Int32Regs:$x,
  4956. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4957. (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4958. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  4959. def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
  4960. Int64Regs:$s, Int32Regs:$x,
  4961. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  4962. (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
  4963. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  4964. def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
  4965. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4966. (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4967. Int16Regs:$r)>;
  4968. def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
  4969. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  4970. (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4971. Int16Regs:$r)>;
  4972. def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
  4973. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  4974. (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4975. Int32Regs:$r)>;
  4976. def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
  4977. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
  4978. (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4979. Int64Regs:$r)>;
  4980. def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
  4981. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4982. (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4983. Int16Regs:$r, Int16Regs:$g)>;
  4984. def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
  4985. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  4986. (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4987. Int16Regs:$r, Int16Regs:$g)>;
  4988. def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
  4989. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  4990. (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4991. Int32Regs:$r, Int32Regs:$g)>;
  4992. def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
  4993. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
  4994. (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4995. Int64Regs:$r, Int64Regs:$g)>;
  4996. def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
  4997. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  4998. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  4999. (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5000. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5001. def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
  5002. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5003. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5004. (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5005. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5006. def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
  5007. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5008. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5009. (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5010. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5011. def : Pat<(int_nvvm_sust_b_2d_i8_zero
  5012. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5013. (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5014. Int16Regs:$r)>;
  5015. def : Pat<(int_nvvm_sust_b_2d_i16_zero
  5016. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5017. (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5018. Int16Regs:$r)>;
  5019. def : Pat<(int_nvvm_sust_b_2d_i32_zero
  5020. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5021. (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5022. Int32Regs:$r)>;
  5023. def : Pat<(int_nvvm_sust_b_2d_i64_zero
  5024. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  5025. (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5026. Int64Regs:$r)>;
  5027. def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
  5028. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5029. (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5030. Int16Regs:$r, Int16Regs:$g)>;
  5031. def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
  5032. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5033. (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5034. Int16Regs:$r, Int16Regs:$g)>;
  5035. def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
  5036. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  5037. (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5038. Int32Regs:$r, Int32Regs:$g)>;
  5039. def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
  5040. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
  5041. (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5042. Int64Regs:$r, Int64Regs:$g)>;
  5043. def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
  5044. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5045. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5046. (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5047. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5048. def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
  5049. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5050. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5051. (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5052. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5053. def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
  5054. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5055. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5056. (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5057. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5058. def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
  5059. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5060. (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
  5061. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5062. Int16Regs:$r)>;
  5063. def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
  5064. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5065. (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
  5066. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5067. Int16Regs:$r)>;
  5068. def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
  5069. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5070. (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
  5071. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5072. Int32Regs:$r)>;
  5073. def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
  5074. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
  5075. (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
  5076. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5077. Int64Regs:$r)>;
  5078. def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
  5079. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5080. Int16Regs:$r, Int16Regs:$g),
  5081. (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
  5082. Int32Regs:$x, Int32Regs:$y,
  5083. Int16Regs:$r, Int16Regs:$g)>;
  5084. def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
  5085. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5086. Int16Regs:$r, Int16Regs:$g),
  5087. (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
  5088. Int32Regs:$x, Int32Regs:$y,
  5089. Int16Regs:$r, Int16Regs:$g)>;
  5090. def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
  5091. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  5092. Int32Regs:$g),
  5093. (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
  5094. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  5095. def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
  5096. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
  5097. Int64Regs:$g),
  5098. (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
  5099. Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
  5100. def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
  5101. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5102. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5103. (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
  5104. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5105. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5106. def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
  5107. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5108. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5109. (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
  5110. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5111. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5112. def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
  5113. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5114. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5115. (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
  5116. Int32Regs:$x, Int32Regs:$y,
  5117. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5118. def : Pat<(int_nvvm_sust_b_3d_i8_zero
  5119. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5120. Int16Regs:$r),
  5121. (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
  5122. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5123. Int16Regs:$r)>;
  5124. def : Pat<(int_nvvm_sust_b_3d_i16_zero
  5125. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5126. Int16Regs:$r),
  5127. (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
  5128. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5129. Int16Regs:$r)>;
  5130. def : Pat<(int_nvvm_sust_b_3d_i32_zero
  5131. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5132. Int32Regs:$r),
  5133. (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
  5134. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5135. Int32Regs:$r)>;
  5136. def : Pat<(int_nvvm_sust_b_3d_i64_zero
  5137. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5138. Int64Regs:$r),
  5139. (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
  5140. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5141. Int64Regs:$r)>;
  5142. def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
  5143. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5144. Int16Regs:$r, Int16Regs:$g),
  5145. (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
  5146. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5147. Int16Regs:$r, Int16Regs:$g)>;
  5148. def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
  5149. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5150. Int16Regs:$r, Int16Regs:$g),
  5151. (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
  5152. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5153. Int16Regs:$r, Int16Regs:$g)>;
  5154. def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
  5155. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5156. Int32Regs:$r, Int32Regs:$g),
  5157. (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
  5158. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5159. Int32Regs:$r, Int32Regs:$g)>;
  5160. def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
  5161. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5162. Int64Regs:$r, Int64Regs:$g),
  5163. (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
  5164. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5165. Int64Regs:$r, Int64Regs:$g)>;
  5166. def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
  5167. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5168. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5169. (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
  5170. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5171. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5172. def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
  5173. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5174. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5175. (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
  5176. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5177. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5178. def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
  5179. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5180. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5181. (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
  5182. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5183. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5184. def : Pat<(int_nvvm_sust_p_1d_i8_trap
  5185. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  5186. (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  5187. def : Pat<(int_nvvm_sust_p_1d_i16_trap
  5188. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
  5189. (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
  5190. def : Pat<(int_nvvm_sust_p_1d_i32_trap
  5191. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
  5192. (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
  5193. def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
  5194. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5195. (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5196. Int16Regs:$r, Int16Regs:$g)>;
  5197. def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
  5198. Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5199. (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5200. Int16Regs:$r, Int16Regs:$g)>;
  5201. def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
  5202. Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  5203. (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5204. Int32Regs:$r, Int32Regs:$g)>;
  5205. def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
  5206. Int64Regs:$s, Int32Regs:$x,
  5207. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5208. (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5209. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5210. def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
  5211. Int64Regs:$s, Int32Regs:$x,
  5212. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5213. (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5214. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5215. def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
  5216. Int64Regs:$s, Int32Regs:$x,
  5217. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5218. (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
  5219. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5220. def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
  5221. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  5222. (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5223. Int16Regs:$r)>;
  5224. def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
  5225. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
  5226. (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5227. Int16Regs:$r)>;
  5228. def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
  5229. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
  5230. (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5231. Int32Regs:$r)>;
  5232. def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
  5233. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5234. (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5235. Int16Regs:$r, Int16Regs:$g)>;
  5236. def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
  5237. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
  5238. (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5239. Int16Regs:$r, Int16Regs:$g)>;
  5240. def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
  5241. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
  5242. (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5243. Int32Regs:$r, Int32Regs:$g)>;
  5244. def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
  5245. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5246. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5247. (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5248. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5249. def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
  5250. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5251. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5252. (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5253. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5254. def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
  5255. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5256. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5257. (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
  5258. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5259. def : Pat<(int_nvvm_sust_p_2d_i8_trap
  5260. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5261. (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5262. Int16Regs:$r)>;
  5263. def : Pat<(int_nvvm_sust_p_2d_i16_trap
  5264. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5265. (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5266. Int16Regs:$r)>;
  5267. def : Pat<(int_nvvm_sust_p_2d_i32_trap
  5268. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5269. (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5270. Int32Regs:$r)>;
  5271. def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
  5272. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5273. (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5274. Int16Regs:$r, Int16Regs:$g)>;
  5275. def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
  5276. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
  5277. (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5278. Int16Regs:$r, Int16Regs:$g)>;
  5279. def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
  5280. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
  5281. (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5282. Int32Regs:$r, Int32Regs:$g)>;
  5283. def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
  5284. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5285. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5286. (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5287. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5288. def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
  5289. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5290. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5291. (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5292. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5293. def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
  5294. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5295. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5296. (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
  5297. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5298. def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
  5299. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5300. (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
  5301. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5302. Int16Regs:$r)>;
  5303. def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
  5304. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
  5305. (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
  5306. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5307. Int16Regs:$r)>;
  5308. def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
  5309. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
  5310. (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
  5311. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5312. Int32Regs:$r)>;
  5313. def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
  5314. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5315. Int16Regs:$r, Int16Regs:$g),
  5316. (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5317. Int32Regs:$x, Int32Regs:$y,
  5318. Int16Regs:$r, Int16Regs:$g)>;
  5319. def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
  5320. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5321. Int16Regs:$r, Int16Regs:$g),
  5322. (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5323. Int32Regs:$x, Int32Regs:$y,
  5324. Int16Regs:$r, Int16Regs:$g)>;
  5325. def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
  5326. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
  5327. Int32Regs:$g),
  5328. (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5329. Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
  5330. def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
  5331. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5332. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5333. (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
  5334. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5335. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5336. def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
  5337. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5338. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5339. (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
  5340. Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5341. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5342. def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
  5343. Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
  5344. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5345. (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
  5346. Int32Regs:$x, Int32Regs:$y,
  5347. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5348. def : Pat<(int_nvvm_sust_p_3d_i8_trap
  5349. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5350. Int16Regs:$r),
  5351. (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
  5352. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5353. Int16Regs:$r)>;
  5354. def : Pat<(int_nvvm_sust_p_3d_i16_trap
  5355. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5356. Int16Regs:$r),
  5357. (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
  5358. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5359. Int16Regs:$r)>;
  5360. def : Pat<(int_nvvm_sust_p_3d_i32_trap
  5361. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5362. Int32Regs:$r),
  5363. (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
  5364. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5365. Int32Regs:$r)>;
  5366. def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
  5367. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5368. Int16Regs:$r, Int16Regs:$g),
  5369. (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
  5370. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5371. Int16Regs:$r, Int16Regs:$g)>;
  5372. def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
  5373. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5374. Int16Regs:$r, Int16Regs:$g),
  5375. (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
  5376. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5377. Int16Regs:$r, Int16Regs:$g)>;
  5378. def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
  5379. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5380. Int32Regs:$r, Int32Regs:$g),
  5381. (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
  5382. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5383. Int32Regs:$r, Int32Regs:$g)>;
  5384. def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
  5385. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5386. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5387. (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
  5388. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5389. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5390. def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
  5391. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5392. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
  5393. (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
  5394. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5395. Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
  5396. def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
  5397. Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5398. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
  5399. (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
  5400. Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
  5401. Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
  5402. //-----------------------------------
  5403. // Read Special Registers
  5404. //-----------------------------------
  5405. class PTX_READ_SREG_R64<string regname, Intrinsic intop>
  5406. : NVPTXInst<(outs Int64Regs:$d), (ins),
  5407. !strconcat("mov.u64 \t$d, %", regname, ";"),
  5408. [(set Int64Regs:$d, (intop))]>;
  5409. class PTX_READ_SREG_R32<string regname, Intrinsic intop>
  5410. : NVPTXInst<(outs Int32Regs:$d), (ins),
  5411. !strconcat("mov.u32 \t$d, %", regname, ";"),
  5412. [(set Int32Regs:$d, (intop))]>;
  5413. // TODO Add read vector-version of special registers
  5414. def INT_PTX_SREG_TID_X :
  5415. PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
  5416. def INT_PTX_SREG_TID_Y :
  5417. PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
  5418. def INT_PTX_SREG_TID_Z :
  5419. PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
  5420. def INT_PTX_SREG_TID_W :
  5421. PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
  5422. def INT_PTX_SREG_NTID_X :
  5423. PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
  5424. def INT_PTX_SREG_NTID_Y :
  5425. PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
  5426. def INT_PTX_SREG_NTID_Z :
  5427. PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
  5428. def INT_PTX_SREG_NTID_W :
  5429. PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
  5430. def INT_PTX_SREG_LANEID :
  5431. PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
  5432. def INT_PTX_SREG_WARPID :
  5433. PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
  5434. def INT_PTX_SREG_NWARPID :
  5435. PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
  5436. def INT_PTX_SREG_CTAID_X :
  5437. PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
  5438. def INT_PTX_SREG_CTAID_Y :
  5439. PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
  5440. def INT_PTX_SREG_CTAID_Z :
  5441. PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
  5442. def INT_PTX_SREG_CTAID_W :
  5443. PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
  5444. def INT_PTX_SREG_NCTAID_X :
  5445. PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
  5446. def INT_PTX_SREG_NCTAID_Y :
  5447. PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
  5448. def INT_PTX_SREG_NCTAID_Z :
  5449. PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
  5450. def INT_PTX_SREG_NCTAID_W :
  5451. PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
  5452. def INT_PTX_SREG_SMID :
  5453. PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
  5454. def INT_PTX_SREG_NSMID :
  5455. PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
  5456. def INT_PTX_SREG_GRIDID :
  5457. PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
  5458. def INT_PTX_SREG_LANEMASK_EQ :
  5459. PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
  5460. def INT_PTX_SREG_LANEMASK_LE :
  5461. PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
  5462. def INT_PTX_SREG_LANEMASK_LT :
  5463. PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
  5464. def INT_PTX_SREG_LANEMASK_GE :
  5465. PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
  5466. def INT_PTX_SREG_LANEMASK_GT :
  5467. PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
  5468. def INT_PTX_SREG_CLOCK :
  5469. PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
  5470. def INT_PTX_SREG_CLOCK64 :
  5471. PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
  5472. def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
  5473. def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
  5474. def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
  5475. def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
  5476. // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
  5477. // handle the constant.
  5478. def INT_PTX_SREG_WARPSIZE :
  5479. NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
  5480. [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
  5481. // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
  5482. // In addition to target-independent fields provided by WMMA_REGS, it adds
  5483. // the fields commonly used to implement specific PTX instruction -- register
  5484. // types and names, constraints, parts of assembly, etc.
  5485. class WMMA_REGINFO<WMMA_REGS r, string op>
  5486. : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
  5487. // NVPTX register types used to carry fragment data.
  5488. NVPTXRegClass regclass = !cond(
  5489. !eq(ptx_elt_type, "f16") : Float16x2Regs,
  5490. !eq(ptx_elt_type, "f32") : Float32Regs,
  5491. !eq(ptx_elt_type, "f64") : Float64Regs,
  5492. !eq(ptx_elt_type, "bf16") : Int32Regs,
  5493. !eq(ptx_elt_type, "tf32") : Int32Regs,
  5494. !eq(ptx_elt_type, "s32") : Int32Regs,
  5495. !eq(ptx_elt_type, "b16") : Int32Regs,
  5496. !eq(ptx_elt_type, "s8") : Int32Regs,
  5497. !eq(ptx_elt_type, "u8") : Int32Regs,
  5498. !eq(ptx_elt_type, "s4") : Int32Regs,
  5499. !eq(ptx_elt_type, "u4") : Int32Regs,
  5500. !eq(ptx_elt_type, "b1") : Int32Regs);
  5501. // Instruction input/output arguments for the fragment.
  5502. list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
  5503. // List of register names for the fragment -- ["ra0", "ra1",...]
  5504. list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
  5505. // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
  5506. string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
  5507. // Predicates for particular fragment variant. Technically those are
  5508. // per-instruction predicates, but currently all fragments that can be used in
  5509. // a given instruction are subject to the same constraints, so an instruction
  5510. // can use predicates from any of its fragments. If/when this is no
  5511. // longer the case, we can concat all per-fragment predicates to enforce that
  5512. // all fragments of the instruction are viable.
  5513. list<Predicate> Predicates = !cond(
  5514. // fp16 -> fp16/fp32 @ m16n16k16
  5515. !and(!eq(geom, "m16n16k16"),
  5516. !or(!eq(ptx_elt_type, "f16"),
  5517. !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
  5518. !and(!eq(geom,"m8n8k4"),
  5519. !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
  5520. // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
  5521. !and(!or(!eq(geom, "m8n32k16"),
  5522. !eq(geom, "m32n8k16")),
  5523. !or(!eq(ptx_elt_type, "f16"),
  5524. !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
  5525. // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
  5526. !and(!or(!eq(geom,"m16n16k16"),
  5527. !eq(geom,"m8n32k16"),
  5528. !eq(geom,"m32n8k16")),
  5529. !or(!eq(ptx_elt_type, "u8"),
  5530. !eq(ptx_elt_type, "s8"),
  5531. !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
  5532. !and(!or(!eq(geom,"m16n16k16"),
  5533. !eq(geom,"m8n32k16"),
  5534. !eq(geom,"m32n8k16")),
  5535. !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
  5536. !and(!eq(geom,"m16n16k8"),
  5537. !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
  5538. !and(!eq(geom,"m16n16k8"),
  5539. !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
  5540. // b1 -> s32 @ m8n8k128(b1)
  5541. !and(!ne(op,"mma"),
  5542. !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
  5543. // u4/s4 -> s32 @ m8n8k32 (u4/s4)
  5544. !and(!ne(op,"mma"),
  5545. !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
  5546. !or(!eq(geom,"m16n8k8"),
  5547. !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
  5548. !and(!ne(ptx_elt_type,"f64"),
  5549. !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
  5550. // mma m8n8k32 requires higher PTX version
  5551. !and(!eq(op,"mma"),
  5552. !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
  5553. !and(!eq(ptx_elt_type,"f64"),
  5554. !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
  5555. !and(!eq(op,"mma"),
  5556. !or(!eq(geom, "m16n8k16"),
  5557. !eq(geom, "m16n8k4"),
  5558. !eq(geom, "m16n8k32"),
  5559. !eq(geom, "m16n8k64"),
  5560. !eq(geom, "m8n8k128"),
  5561. !eq(geom, "m16n8k128"),
  5562. !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
  5563. !and(!eq(op,"ldmatrix"),
  5564. !eq(ptx_elt_type,"b16"),
  5565. !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
  5566. // template DAGs for instruction inputs/output.
  5567. dag Outs = !dag(outs, ptx_regs, reg_names);
  5568. dag Ins = !dag(ins, ptx_regs, reg_names);
  5569. }
  5570. // Convert dag of arguments into a dag to match given intrinsic.
  5571. class BuildPatternI<Intrinsic Intr, dag Ins> {
  5572. // Build a dag pattern that matches the intrinsic call.
  5573. dag ret = !foreach(tmp, Ins,
  5574. !subst(imem, ADDRvar,
  5575. !subst(MEMri64, ADDRri64,
  5576. !subst(MEMri, ADDRri,
  5577. !subst(ins, Intr, tmp)))));
  5578. }
  5579. // Same as above, but uses PatFrag instead of an Intrinsic.
  5580. class BuildPatternPF<PatFrag Intr, dag Ins> {
  5581. // Build a dag pattern that matches the intrinsic call.
  5582. dag ret = !foreach(tmp, Ins,
  5583. !subst(imem, ADDRvar,
  5584. !subst(MEMri64, ADDRri64,
  5585. !subst(MEMri, ADDRri,
  5586. !subst(ins, Intr, tmp)))));
  5587. }
  5588. // Common WMMA-related fields used for building patterns for all MMA instructions.
  5589. class WMMA_INSTR<string _Intr, list<dag> _Args>
  5590. : NVPTXInst<(outs), (ins), "?", []> {
  5591. Intrinsic Intr = !cast<Intrinsic>(_Intr);
  5592. // Concatenate all arguments into a single dag.
  5593. dag Args = !foldl((ins), _Args, a, b, !con(a,b));
  5594. // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
  5595. dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
  5596. }
  5597. //
  5598. // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
  5599. //
  5600. class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
  5601. DAGOperand SrcOp>
  5602. : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
  5603. [!con((ins SrcOp:$src),
  5604. !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
  5605. Requires<Frag.Predicates> {
  5606. // Load/store intrinsics are overloaded on pointer's address space.
  5607. // To match the right intrinsic, we need to build AS-constrained PatFrag.
  5608. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
  5609. dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
  5610. dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
  5611. // Build PatFrag that only matches particular address space.
  5612. PatFrag IntrFrag = PatFrag<PFOperands,
  5613. PFOperandsIntr,
  5614. !cond(!eq(Space, ".shared"): AS_match.shared,
  5615. !eq(Space, ".global"): AS_match.global,
  5616. true: AS_match.generic)>;
  5617. // Build AS-constrained pattern.
  5618. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5619. let OutOperandList = Frag.Outs;
  5620. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5621. let AsmString = "wmma.load."
  5622. # Frag.frag
  5623. # ".sync"
  5624. # "${ptx:aligned}"
  5625. # "." # Layout
  5626. # "." # Frag.geom
  5627. # Space
  5628. # "." # Frag.ptx_elt_type # " \t"
  5629. # Frag.regstring
  5630. # ", [$src]"
  5631. # !if(WithStride, ", $ldm", "")
  5632. # ";";
  5633. }
  5634. //
  5635. // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
  5636. //
  5637. class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
  5638. bit WithStride, DAGOperand DstOp>
  5639. : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
  5640. [!con((ins DstOp:$dst),
  5641. Frag.Ins,
  5642. !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
  5643. Requires<Frag.Predicates> {
  5644. // Load/store intrinsics are overloaded on pointer's address space.
  5645. // To match the right intrinsic, we need to build AS-constrained PatFrag.
  5646. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
  5647. dag PFOperands = !con((ops node:$dst),
  5648. !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
  5649. !if(WithStride, (ops node:$ldm), (ops)));
  5650. // Build PatFrag that only matches particular address space.
  5651. PatFrag IntrFrag = PatFrag<PFOperands,
  5652. !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
  5653. !cond(!eq(Space, ".shared"): AS_match.shared,
  5654. !eq(Space, ".global"): AS_match.global,
  5655. true: AS_match.generic)>;
  5656. // Build AS-constrained pattern.
  5657. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5658. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5659. let OutOperandList = (outs);
  5660. let AsmString = "wmma.store.d.sync"
  5661. # "${ptx:aligned}"
  5662. # "." # Layout
  5663. # "." # Frag.geom
  5664. # Space
  5665. # "." # Frag.ptx_elt_type
  5666. # " \t[$dst],"
  5667. # Frag.regstring
  5668. # !if(WithStride, ", $ldm", "")
  5669. # ";";
  5670. }
  5671. // Create all load/store variants
  5672. defset list<WMMA_INSTR> MMA_LDSTs = {
  5673. foreach layout = ["row", "col"] in {
  5674. foreach stride = [false, true] in {
  5675. foreach space = [".global", ".shared", ""] in {
  5676. foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
  5677. foreach frag = NVVM_MMA_OPS.all_ld_ops in
  5678. if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
  5679. def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
  5680. foreach frag = NVVM_MMA_OPS.all_st_ops in
  5681. if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
  5682. def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
  5683. } // addr
  5684. } // space
  5685. } // stride
  5686. } // layout
  5687. } // defset
  5688. // B1 instruction variants need extra constraints.
  5689. class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
  5690. string Op = b1op;
  5691. WMMA_REGINFO Frag = FragA;
  5692. list<Predicate> ret = !listconcat(
  5693. FragA.Predicates,
  5694. !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
  5695. );
  5696. }
  5697. // WMMA.MMA
  5698. class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
  5699. WMMA_REGINFO FragC, WMMA_REGINFO FragD,
  5700. string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
  5701. : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
  5702. [FragA.Ins, FragB.Ins, FragC.Ins]>,
  5703. // Requires does not seem to have effect on Instruction w/o Patterns.
  5704. // We set it here anyways and propagate to the Pat<> we construct below.
  5705. Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
  5706. let OutOperandList = FragD.Outs;
  5707. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5708. string TypeList = !cond(
  5709. !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
  5710. # "." # FragC.ptx_elt_type,
  5711. 1: "." # FragD.ptx_elt_type
  5712. # "." # FragA.ptx_elt_type
  5713. # "." # FragB.ptx_elt_type
  5714. # "." # FragC.ptx_elt_type,
  5715. );
  5716. let AsmString = "wmma.mma"
  5717. # b1op
  5718. # ".sync"
  5719. # "${ptx:aligned}"
  5720. # "." # ALayout
  5721. # "." # BLayout
  5722. # "." # FragA.geom
  5723. # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
  5724. # TypeList
  5725. # !if(Satfinite, ".satfinite", "") # "\n\t\t"
  5726. # FragD.regstring # ",\n\t\t"
  5727. # FragA.regstring # ",\n\t\t"
  5728. # FragB.regstring # ",\n\t\t"
  5729. # FragC.regstring # ";";
  5730. }
  5731. defset list<WMMA_INSTR> WMMAs = {
  5732. foreach layout_a = ["row", "col"] in {
  5733. foreach layout_b = ["row", "col"] in {
  5734. foreach satf = [0, 1] in {
  5735. foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
  5736. foreach op = NVVM_MMA_OPS.all_wmma_ops in {
  5737. foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
  5738. if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
  5739. def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
  5740. WMMA_REGINFO<op[1], "wmma.mma">,
  5741. WMMA_REGINFO<op[2], "wmma.mma">,
  5742. WMMA_REGINFO<op[3], "wmma.mma">,
  5743. layout_a, layout_b, satf, rnd, b1op>;
  5744. }
  5745. } // b1op
  5746. } // op
  5747. } // rnd
  5748. } // satf
  5749. } // layout_b
  5750. } // layout_a
  5751. } // defset
  5752. // MMA
  5753. class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
  5754. WMMA_REGINFO FragC, WMMA_REGINFO FragD,
  5755. string ALayout, string BLayout, int Satfinite, string b1op>
  5756. : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
  5757. [FragA.Ins, FragB.Ins, FragC.Ins]>,
  5758. // Requires does not seem to have effect on Instruction w/o Patterns.
  5759. // We set it here anyways and propagate to the Pat<> we construct below.
  5760. Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
  5761. let OutOperandList = FragD.Outs;
  5762. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5763. string TypeList = "." # FragD.ptx_elt_type
  5764. # "." # FragA.ptx_elt_type
  5765. # "." # FragB.ptx_elt_type
  5766. # "." # FragC.ptx_elt_type;
  5767. let AsmString = "mma.sync.aligned."
  5768. # FragA.geom
  5769. # "." # ALayout
  5770. # "." # BLayout
  5771. # !if(Satfinite, ".satfinite", "")
  5772. # TypeList
  5773. # b1op # "\n\t\t"
  5774. # FragD.regstring # ",\n\t\t"
  5775. # FragA.regstring # ",\n\t\t"
  5776. # FragB.regstring # ",\n\t\t"
  5777. # FragC.regstring # ";";
  5778. }
  5779. defset list<WMMA_INSTR> MMAs = {
  5780. foreach layout_a = ["row", "col"] in {
  5781. foreach layout_b = ["row", "col"] in {
  5782. foreach satf = [0, 1] in {
  5783. foreach op = NVVM_MMA_OPS.all_mma_ops in {
  5784. foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
  5785. if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
  5786. def : MMA<WMMA_REGINFO<op[0], "mma">,
  5787. WMMA_REGINFO<op[1], "mma">,
  5788. WMMA_REGINFO<op[2], "mma">,
  5789. WMMA_REGINFO<op[3], "mma">,
  5790. layout_a, layout_b, satf, b1op>;
  5791. }
  5792. } // b1op
  5793. } // op
  5794. } // satf
  5795. } // layout_b
  5796. } // layout_a
  5797. } // defset
  5798. //
  5799. // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
  5800. //
  5801. class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
  5802. DAGOperand SrcOp>
  5803. : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
  5804. Requires<Frag.Predicates> {
  5805. // Build PatFrag that only matches particular address space.
  5806. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
  5807. !cond(!eq(Space, ".shared"): AS_match.shared,
  5808. true: AS_match.generic)>;
  5809. // Build AS-constrained pattern.
  5810. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
  5811. let OutOperandList = Frag.Outs;
  5812. let InOperandList = !con(Args, (ins MmaCode:$ptx));
  5813. let AsmString = "ldmatrix.sync.aligned."
  5814. # Frag.geom
  5815. # "." # Frag.frag
  5816. # !if(Transposed, ".trans", "")
  5817. # Space
  5818. # "." # Frag.ptx_elt_type
  5819. # " " # Frag.regstring # ", [$src];";
  5820. }
  5821. // Create all ldmatrix variants
  5822. defset list<WMMA_INSTR> LDMATRIXs = {
  5823. foreach transposed = [false, true] in {
  5824. foreach space = [".shared", ""] in {
  5825. foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
  5826. foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
  5827. if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
  5828. def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
  5829. addr>;
  5830. } // addr
  5831. } // space
  5832. } // transposed
  5833. } // defset
  5834. // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
  5835. // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
  5836. // the instruction record.
  5837. class MMA_PAT<WMMA_INSTR wi>
  5838. : Pat<wi.IntrinsicPattern,
  5839. !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
  5840. (wi ptx.version))>,
  5841. Requires<wi.Predicates>;
  5842. // Build intrinsic->instruction patterns for all MMA instructions.
  5843. foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
  5844. def : MMA_PAT<mma>;