AMDGPUEmitPrintf.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Utility function to lower a printf call into a series of device
  10. // library calls on the AMDGPU target.
  11. //
  12. // WARNING: This file knows about certain library functions. It recognizes them
  13. // by name, and hardwires knowledge of their semantics.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
  17. #include "llvm/ADT/SparseBitVector.h"
  18. #include "llvm/Analysis/ValueTracking.h"
  19. using namespace llvm;
  20. #define DEBUG_TYPE "amdgpu-emit-printf"
  21. static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
  22. auto Int64Ty = Builder.getInt64Ty();
  23. auto Ty = Arg->getType();
  24. if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
  25. switch (IntTy->getBitWidth()) {
  26. case 32:
  27. return Builder.CreateZExt(Arg, Int64Ty);
  28. case 64:
  29. return Arg;
  30. }
  31. }
  32. if (Ty->getTypeID() == Type::DoubleTyID) {
  33. return Builder.CreateBitCast(Arg, Int64Ty);
  34. }
  35. if (isa<PointerType>(Ty)) {
  36. return Builder.CreatePtrToInt(Arg, Int64Ty);
  37. }
  38. llvm_unreachable("unexpected type");
  39. }
  40. static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
  41. auto Int64Ty = Builder.getInt64Ty();
  42. auto M = Builder.GetInsertBlock()->getModule();
  43. auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
  44. return Builder.CreateCall(Fn, Version);
  45. }
  46. static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
  47. Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
  48. Value *Arg4, Value *Arg5, Value *Arg6,
  49. bool IsLast) {
  50. auto Int64Ty = Builder.getInt64Ty();
  51. auto Int32Ty = Builder.getInt32Ty();
  52. auto M = Builder.GetInsertBlock()->getModule();
  53. auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
  54. Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
  55. Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
  56. auto IsLastValue = Builder.getInt32(IsLast);
  57. auto NumArgsValue = Builder.getInt32(NumArgs);
  58. return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
  59. Arg4, Arg5, Arg6, IsLastValue});
  60. }
  61. static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  62. bool IsLast) {
  63. auto Arg0 = fitArgInto64Bits(Builder, Arg);
  64. auto Zero = Builder.getInt64(0);
  65. return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
  66. Zero, IsLast);
  67. }
  68. // The device library does not provide strlen, so we build our own loop
  69. // here. While we are at it, we also include the terminating null in the length.
  70. static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
  71. auto *Prev = Builder.GetInsertBlock();
  72. Module *M = Prev->getModule();
  73. auto CharZero = Builder.getInt8(0);
  74. auto One = Builder.getInt64(1);
  75. auto Zero = Builder.getInt64(0);
  76. auto Int64Ty = Builder.getInt64Ty();
  77. // The length is either zero for a null pointer, or the computed value for an
  78. // actual string. We need a join block for a phi that represents the final
  79. // value.
  80. //
  81. // Strictly speaking, the zero does not matter since
  82. // __ockl_printf_append_string_n ignores the length if the pointer is null.
  83. BasicBlock *Join = nullptr;
  84. if (Prev->getTerminator()) {
  85. Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
  86. "strlen.join");
  87. Prev->getTerminator()->eraseFromParent();
  88. } else {
  89. Join = BasicBlock::Create(M->getContext(), "strlen.join",
  90. Prev->getParent());
  91. }
  92. BasicBlock *While =
  93. BasicBlock::Create(M->getContext(), "strlen.while",
  94. Prev->getParent(), Join);
  95. BasicBlock *WhileDone = BasicBlock::Create(
  96. M->getContext(), "strlen.while.done",
  97. Prev->getParent(), Join);
  98. // Emit an early return for when the pointer is null.
  99. Builder.SetInsertPoint(Prev);
  100. auto CmpNull =
  101. Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
  102. BranchInst::Create(Join, While, CmpNull, Prev);
  103. // Entry to the while loop.
  104. Builder.SetInsertPoint(While);
  105. auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
  106. PtrPhi->addIncoming(Str, Prev);
  107. auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
  108. PtrPhi->addIncoming(PtrNext, While);
  109. // Condition for the while loop.
  110. auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
  111. auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
  112. Builder.CreateCondBr(Cmp, WhileDone, While);
  113. // Add one to the computed length.
  114. Builder.SetInsertPoint(WhileDone, WhileDone->begin());
  115. auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
  116. auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
  117. auto Len = Builder.CreateSub(End, Begin);
  118. Len = Builder.CreateAdd(Len, One);
  119. // Final join.
  120. BranchInst::Create(Join, WhileDone);
  121. Builder.SetInsertPoint(Join, Join->begin());
  122. auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
  123. LenPhi->addIncoming(Len, WhileDone);
  124. LenPhi->addIncoming(Zero, Prev);
  125. return LenPhi;
  126. }
  127. static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
  128. Value *Length, bool isLast) {
  129. auto Int64Ty = Builder.getInt64Ty();
  130. auto CharPtrTy = Builder.getInt8PtrTy();
  131. auto Int32Ty = Builder.getInt32Ty();
  132. auto M = Builder.GetInsertBlock()->getModule();
  133. auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
  134. Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
  135. auto IsLastInt32 = Builder.getInt32(isLast);
  136. return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
  137. }
  138. static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  139. bool IsLast) {
  140. Arg = Builder.CreateBitCast(
  141. Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
  142. auto Length = getStrlenWithNull(Builder, Arg);
  143. return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
  144. }
  145. static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
  146. bool SpecIsCString, bool IsLast) {
  147. if (SpecIsCString && isa<PointerType>(Arg->getType())) {
  148. return appendString(Builder, Desc, Arg, IsLast);
  149. }
  150. // If the format specifies a string but the argument is not, the frontend will
  151. // have printed a warning. We just rely on undefined behaviour and send the
  152. // argument anyway.
  153. return appendArg(Builder, Desc, Arg, IsLast);
  154. }
  155. // Scan the format string to locate all specifiers, and mark the ones that
  156. // specify a string, i.e, the "%s" specifier with optional '*' characters.
  157. static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
  158. StringRef Str;
  159. if (!getConstantStringInfo(Fmt, Str) || Str.empty())
  160. return;
  161. static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
  162. size_t SpecPos = 0;
  163. // Skip the first argument, the format string.
  164. unsigned ArgIdx = 1;
  165. while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
  166. if (Str[SpecPos + 1] == '%') {
  167. SpecPos += 2;
  168. continue;
  169. }
  170. auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
  171. if (SpecEnd == StringRef::npos)
  172. return;
  173. auto Spec = Str.slice(SpecPos, SpecEnd + 1);
  174. ArgIdx += Spec.count('*');
  175. if (Str[SpecEnd] == 's') {
  176. BV.set(ArgIdx);
  177. }
  178. SpecPos = SpecEnd + 1;
  179. ++ArgIdx;
  180. }
  181. }
  182. Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
  183. ArrayRef<Value *> Args) {
  184. auto NumOps = Args.size();
  185. assert(NumOps >= 1);
  186. auto Fmt = Args[0];
  187. SparseBitVector<8> SpecIsCString;
  188. locateCStrings(SpecIsCString, Fmt);
  189. auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
  190. Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
  191. // FIXME: This invokes hostcall once for each argument. We can pack up to
  192. // seven scalar printf arguments in a single hostcall. See the signature of
  193. // callAppendArgs().
  194. for (unsigned int i = 1; i != NumOps; ++i) {
  195. bool IsLast = i == NumOps - 1;
  196. bool IsCString = SpecIsCString.test(i);
  197. Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
  198. }
  199. return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
  200. }