AMDGPUEmitPrintf.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Utility function to lower a printf call into a series of device
  10. // library calls on the AMDGPU target.
  11. //
  12. // WARNING: This file knows about certain library functions. It recognizes them
  13. // by name, and hardwires knowledge of their semantics.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
  17. #include "llvm/ADT/SparseBitVector.h"
  18. #include "llvm/Analysis/ValueTracking.h"
  19. using namespace llvm;
  20. #define DEBUG_TYPE "amdgpu-emit-printf"
  21. static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
  22. auto Int64Ty = Builder.getInt64Ty();
  23. auto Ty = Arg->getType();
  24. if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
  25. switch (IntTy->getBitWidth()) {
  26. case 32:
  27. return Builder.CreateZExt(Arg, Int64Ty);
  28. case 64:
  29. return Arg;
  30. }
  31. }
  32. if (Ty->getTypeID() == Type::DoubleTyID) {
  33. return Builder.CreateBitCast(Arg, Int64Ty);
  34. }
  35. if (isa<PointerType>(Ty)) {
  36. return Builder.CreatePtrToInt(Arg, Int64Ty);
  37. }
  38. llvm_unreachable("unexpected type");
  39. }
  40. static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
  41. auto Int64Ty = Builder.getInt64Ty();
  42. auto M = Builder.GetInsertBlock()->getModule();
  43. auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
  44. if (!M->getModuleFlag("amdgpu_hostcall")) {
  45. M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
  46. }
  47. return Builder.CreateCall(Fn, Version);
  48. }
  49. static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
  50. Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
  51. Value *Arg4, Value *Arg5, Value *Arg6,
  52. bool IsLast) {
  53. auto Int64Ty = Builder.getInt64Ty();
  54. auto Int32Ty = Builder.getInt32Ty();
  55. auto M = Builder.GetInsertBlock()->getModule();
  56. auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
  57. Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
  58. Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
  59. auto IsLastValue = Builder.getInt32(IsLast);
  60. auto NumArgsValue = Builder.getInt32(NumArgs);
  61. return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
  62. Arg4, Arg5, Arg6, IsLastValue});
  63. }
  64. static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  65. bool IsLast) {
  66. auto Arg0 = fitArgInto64Bits(Builder, Arg);
  67. auto Zero = Builder.getInt64(0);
  68. return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
  69. Zero, IsLast);
  70. }
  71. // The device library does not provide strlen, so we build our own loop
  72. // here. While we are at it, we also include the terminating null in the length.
  73. static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
  74. auto *Prev = Builder.GetInsertBlock();
  75. Module *M = Prev->getModule();
  76. auto CharZero = Builder.getInt8(0);
  77. auto One = Builder.getInt64(1);
  78. auto Zero = Builder.getInt64(0);
  79. auto Int64Ty = Builder.getInt64Ty();
  80. // The length is either zero for a null pointer, or the computed value for an
  81. // actual string. We need a join block for a phi that represents the final
  82. // value.
  83. //
  84. // Strictly speaking, the zero does not matter since
  85. // __ockl_printf_append_string_n ignores the length if the pointer is null.
  86. BasicBlock *Join = nullptr;
  87. if (Prev->getTerminator()) {
  88. Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
  89. "strlen.join");
  90. Prev->getTerminator()->eraseFromParent();
  91. } else {
  92. Join = BasicBlock::Create(M->getContext(), "strlen.join",
  93. Prev->getParent());
  94. }
  95. BasicBlock *While =
  96. BasicBlock::Create(M->getContext(), "strlen.while",
  97. Prev->getParent(), Join);
  98. BasicBlock *WhileDone = BasicBlock::Create(
  99. M->getContext(), "strlen.while.done",
  100. Prev->getParent(), Join);
  101. // Emit an early return for when the pointer is null.
  102. Builder.SetInsertPoint(Prev);
  103. auto CmpNull =
  104. Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
  105. BranchInst::Create(Join, While, CmpNull, Prev);
  106. // Entry to the while loop.
  107. Builder.SetInsertPoint(While);
  108. auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
  109. PtrPhi->addIncoming(Str, Prev);
  110. auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
  111. PtrPhi->addIncoming(PtrNext, While);
  112. // Condition for the while loop.
  113. auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
  114. auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
  115. Builder.CreateCondBr(Cmp, WhileDone, While);
  116. // Add one to the computed length.
  117. Builder.SetInsertPoint(WhileDone, WhileDone->begin());
  118. auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
  119. auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
  120. auto Len = Builder.CreateSub(End, Begin);
  121. Len = Builder.CreateAdd(Len, One);
  122. // Final join.
  123. BranchInst::Create(Join, WhileDone);
  124. Builder.SetInsertPoint(Join, Join->begin());
  125. auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
  126. LenPhi->addIncoming(Len, WhileDone);
  127. LenPhi->addIncoming(Zero, Prev);
  128. return LenPhi;
  129. }
  130. static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
  131. Value *Length, bool isLast) {
  132. auto Int64Ty = Builder.getInt64Ty();
  133. auto CharPtrTy = Builder.getInt8PtrTy();
  134. auto Int32Ty = Builder.getInt32Ty();
  135. auto M = Builder.GetInsertBlock()->getModule();
  136. auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
  137. Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
  138. auto IsLastInt32 = Builder.getInt32(isLast);
  139. return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
  140. }
  141. static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  142. bool IsLast) {
  143. Arg = Builder.CreateBitCast(
  144. Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
  145. auto Length = getStrlenWithNull(Builder, Arg);
  146. return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
  147. }
  148. static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  149. bool SpecIsCString, bool IsLast) {
  150. if (SpecIsCString && isa<PointerType>(Arg->getType())) {
  151. return appendString(Builder, Desc, Arg, IsLast);
  152. }
  153. // If the format specifies a string but the argument is not, the frontend will
  154. // have printed a warning. We just rely on undefined behaviour and send the
  155. // argument anyway.
  156. return appendArg(Builder, Desc, Arg, IsLast);
  157. }
  158. // Scan the format string to locate all specifiers, and mark the ones that
  159. // specify a string, i.e, the "%s" specifier with optional '*' characters.
  160. static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
  161. StringRef Str;
  162. if (!getConstantStringInfo(Fmt, Str) || Str.empty())
  163. return;
  164. static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
  165. size_t SpecPos = 0;
  166. // Skip the first argument, the format string.
  167. unsigned ArgIdx = 1;
  168. while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
  169. if (Str[SpecPos + 1] == '%') {
  170. SpecPos += 2;
  171. continue;
  172. }
  173. auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
  174. if (SpecEnd == StringRef::npos)
  175. return;
  176. auto Spec = Str.slice(SpecPos, SpecEnd + 1);
  177. ArgIdx += Spec.count('*');
  178. if (Str[SpecEnd] == 's') {
  179. BV.set(ArgIdx);
  180. }
  181. SpecPos = SpecEnd + 1;
  182. ++ArgIdx;
  183. }
  184. }
  185. Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
  186. ArrayRef<Value *> Args) {
  187. auto NumOps = Args.size();
  188. assert(NumOps >= 1);
  189. auto Fmt = Args[0];
  190. SparseBitVector<8> SpecIsCString;
  191. locateCStrings(SpecIsCString, Fmt);
  192. auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
  193. Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
  194. // FIXME: This invokes hostcall once for each argument. We can pack up to
  195. // seven scalar printf arguments in a single hostcall. See the signature of
  196. // callAppendArgs().
  197. for (unsigned int i = 1; i != NumOps; ++i) {
  198. bool IsLast = i == NumOps - 1;
  199. bool IsCString = SpecIsCString.test(i);
  200. Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
  201. }
  202. return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
  203. }