PoisonChecking.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. //===- PoisonChecking.cpp - -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Implements a transform pass which instruments IR such that poison semantics
  10. // are made explicit. That is, it provides a (possibly partial) executable
  11. // semantics for every instruction w.r.t. poison as specified in the LLVM
  12. // LangRef. There are obvious parallels to the sanitizer tools, but this pass
  13. // is focused purely on the semantics of LLVM IR, not any particular source
  14. // language. If you're looking for something to see if your C/C++ contains
  15. // UB, this is not it.
  16. //
  17. // The rewritten semantics of each instruction will include the following
  18. // components:
  19. //
  20. // 1) The original instruction, unmodified.
  21. // 2) A propagation rule which translates dynamic information about the poison
  22. // state of each input to whether the dynamic output of the instruction
  23. // produces poison.
  24. // 3) A creation rule which validates any poison producing flags on the
  25. // instruction itself (e.g. checks for overflow on nsw).
  26. // 4) A check rule which traps (to a handler function) if this instruction must
  27. // execute undefined behavior given the poison state of it's inputs.
  28. //
  29. // This is a must analysis based transform; that is, the resulting code may
  30. // produce a false negative result (not report UB when actually exists
  31. // according to the LangRef spec), but should never produce a false positive
  32. // (report UB where it doesn't exist).
  33. //
  34. // Use cases for this pass include:
  35. // - Understanding (and testing!) the implications of the definition of poison
  36. // from the LangRef.
  37. // - Validating the output of a IR fuzzer to ensure that all programs produced
  38. // are well defined on the specific input used.
  39. // - Finding/confirming poison specific miscompiles by checking the poison
  40. // status of an input/IR pair is the same before and after an optimization
  41. // transform.
  42. // - Checking that a bugpoint reduction does not introduce UB which didn't
  43. // exist in the original program being reduced.
  44. //
  45. // The major sources of inaccuracy are currently:
  46. // - Most validation rules not yet implemented for instructions with poison
  47. // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
  48. // - UB which is control dependent on a branch on poison is not yet
  49. // reported. Currently, only data flow dependence is modeled.
  50. // - Poison which is propagated through memory is not modeled. As such,
  51. // storing poison to memory and then reloading it will cause a false negative
  52. // as we consider the reloaded value to not be poisoned.
  53. // - Poison propagation across function boundaries is not modeled. At the
  54. // moment, all arguments and return values are assumed not to be poison.
  55. // - Undef is not modeled. In particular, the optimizer's freedom to pick
  56. // concrete values for undef bits so as to maximize potential for producing
  57. // poison is not modeled.
  58. //
  59. //===----------------------------------------------------------------------===//
  60. #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
  61. #include "llvm/ADT/DenseMap.h"
  62. #include "llvm/ADT/Statistic.h"
  63. #include "llvm/Analysis/MemoryBuiltins.h"
  64. #include "llvm/Analysis/ValueTracking.h"
  65. #include "llvm/IR/IRBuilder.h"
  66. #include "llvm/IR/InstVisitor.h"
  67. #include "llvm/IR/IntrinsicInst.h"
  68. #include "llvm/IR/PatternMatch.h"
  69. #include "llvm/Support/CommandLine.h"
  70. #include "llvm/Support/Debug.h"
  71. using namespace llvm;
  72. #define DEBUG_TYPE "poison-checking"
  73. static cl::opt<bool>
  74. LocalCheck("poison-checking-function-local",
  75. cl::init(false),
  76. cl::desc("Check that returns are non-poison (for testing)"));
  77. static bool isConstantFalse(Value* V) {
  78. assert(V->getType()->isIntegerTy(1));
  79. if (auto *CI = dyn_cast<ConstantInt>(V))
  80. return CI->isZero();
  81. return false;
  82. }
  83. static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
  84. if (Ops.size() == 0)
  85. return B.getFalse();
  86. unsigned i = 0;
  87. for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
  88. if (i == Ops.size())
  89. return B.getFalse();
  90. Value *Accum = Ops[i++];
  91. for (; i < Ops.size(); i++)
  92. if (!isConstantFalse(Ops[i]))
  93. Accum = B.CreateOr(Accum, Ops[i]);
  94. return Accum;
  95. }
  96. static void generateCreationChecksForBinOp(Instruction &I,
  97. SmallVectorImpl<Value*> &Checks) {
  98. assert(isa<BinaryOperator>(I));
  99. IRBuilder<> B(&I);
  100. Value *LHS = I.getOperand(0);
  101. Value *RHS = I.getOperand(1);
  102. switch (I.getOpcode()) {
  103. default:
  104. return;
  105. case Instruction::Add: {
  106. if (I.hasNoSignedWrap()) {
  107. auto *OverflowOp =
  108. B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
  109. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  110. }
  111. if (I.hasNoUnsignedWrap()) {
  112. auto *OverflowOp =
  113. B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
  114. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  115. }
  116. break;
  117. }
  118. case Instruction::Sub: {
  119. if (I.hasNoSignedWrap()) {
  120. auto *OverflowOp =
  121. B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
  122. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  123. }
  124. if (I.hasNoUnsignedWrap()) {
  125. auto *OverflowOp =
  126. B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
  127. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  128. }
  129. break;
  130. }
  131. case Instruction::Mul: {
  132. if (I.hasNoSignedWrap()) {
  133. auto *OverflowOp =
  134. B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
  135. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  136. }
  137. if (I.hasNoUnsignedWrap()) {
  138. auto *OverflowOp =
  139. B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
  140. Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
  141. }
  142. break;
  143. }
  144. case Instruction::UDiv: {
  145. if (I.isExact()) {
  146. auto *Check =
  147. B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
  148. ConstantInt::get(LHS->getType(), 0));
  149. Checks.push_back(Check);
  150. }
  151. break;
  152. }
  153. case Instruction::SDiv: {
  154. if (I.isExact()) {
  155. auto *Check =
  156. B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
  157. ConstantInt::get(LHS->getType(), 0));
  158. Checks.push_back(Check);
  159. }
  160. break;
  161. }
  162. case Instruction::AShr:
  163. case Instruction::LShr:
  164. case Instruction::Shl: {
  165. Value *ShiftCheck =
  166. B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
  167. ConstantInt::get(RHS->getType(),
  168. LHS->getType()->getScalarSizeInBits()));
  169. Checks.push_back(ShiftCheck);
  170. break;
  171. }
  172. };
  173. }
  174. /// Given an instruction which can produce poison on non-poison inputs
  175. /// (i.e. canCreatePoison returns true), generate runtime checks to produce
  176. /// boolean indicators of when poison would result.
  177. static void generateCreationChecks(Instruction &I,
  178. SmallVectorImpl<Value*> &Checks) {
  179. IRBuilder<> B(&I);
  180. if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
  181. generateCreationChecksForBinOp(I, Checks);
  182. // Handle non-binops separately
  183. switch (I.getOpcode()) {
  184. default:
  185. // Note there are a couple of missing cases here, once implemented, this
  186. // should become an llvm_unreachable.
  187. break;
  188. case Instruction::ExtractElement: {
  189. Value *Vec = I.getOperand(0);
  190. auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
  191. if (!VecVTy)
  192. break;
  193. Value *Idx = I.getOperand(1);
  194. unsigned NumElts = VecVTy->getNumElements();
  195. Value *Check =
  196. B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
  197. ConstantInt::get(Idx->getType(), NumElts));
  198. Checks.push_back(Check);
  199. break;
  200. }
  201. case Instruction::InsertElement: {
  202. Value *Vec = I.getOperand(0);
  203. auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
  204. if (!VecVTy)
  205. break;
  206. Value *Idx = I.getOperand(2);
  207. unsigned NumElts = VecVTy->getNumElements();
  208. Value *Check =
  209. B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
  210. ConstantInt::get(Idx->getType(), NumElts));
  211. Checks.push_back(Check);
  212. break;
  213. }
  214. };
  215. }
  216. static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
  217. auto Itr = ValToPoison.find(V);
  218. if (Itr != ValToPoison.end())
  219. return Itr->second;
  220. if (isa<Constant>(V)) {
  221. return ConstantInt::getFalse(V->getContext());
  222. }
  223. // Return false for unknwon values - this implements a non-strict mode where
  224. // unhandled IR constructs are simply considered to never produce poison. At
  225. // some point in the future, we probably want a "strict mode" for testing if
  226. // nothing else.
  227. return ConstantInt::getFalse(V->getContext());
  228. }
  229. static void CreateAssert(IRBuilder<> &B, Value *Cond) {
  230. assert(Cond->getType()->isIntegerTy(1));
  231. if (auto *CI = dyn_cast<ConstantInt>(Cond))
  232. if (CI->isAllOnesValue())
  233. return;
  234. Module *M = B.GetInsertBlock()->getModule();
  235. M->getOrInsertFunction("__poison_checker_assert",
  236. Type::getVoidTy(M->getContext()),
  237. Type::getInt1Ty(M->getContext()));
  238. Function *TrapFunc = M->getFunction("__poison_checker_assert");
  239. B.CreateCall(TrapFunc, Cond);
  240. }
  241. static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
  242. assert(Cond->getType()->isIntegerTy(1));
  243. CreateAssert(B, B.CreateNot(Cond));
  244. }
  245. static bool rewrite(Function &F) {
  246. auto * const Int1Ty = Type::getInt1Ty(F.getContext());
  247. DenseMap<Value *, Value *> ValToPoison;
  248. for (BasicBlock &BB : F)
  249. for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
  250. auto *OldPHI = cast<PHINode>(&*I);
  251. auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
  252. for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
  253. NewPHI->addIncoming(UndefValue::get(Int1Ty),
  254. OldPHI->getIncomingBlock(i));
  255. NewPHI->insertBefore(OldPHI);
  256. ValToPoison[OldPHI] = NewPHI;
  257. }
  258. for (BasicBlock &BB : F)
  259. for (Instruction &I : BB) {
  260. if (isa<PHINode>(I)) continue;
  261. IRBuilder<> B(cast<Instruction>(&I));
  262. // Note: There are many more sources of documented UB, but this pass only
  263. // attempts to find UB triggered by propagation of poison.
  264. SmallPtrSet<const Value *, 4> NonPoisonOps;
  265. getGuaranteedNonPoisonOps(&I, NonPoisonOps);
  266. for (const Value *Op : NonPoisonOps)
  267. CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
  268. if (LocalCheck)
  269. if (auto *RI = dyn_cast<ReturnInst>(&I))
  270. if (RI->getNumOperands() != 0) {
  271. Value *Op = RI->getOperand(0);
  272. CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
  273. }
  274. SmallVector<Value*, 4> Checks;
  275. if (propagatesPoison(cast<Operator>(&I)))
  276. for (Value *V : I.operands())
  277. Checks.push_back(getPoisonFor(ValToPoison, V));
  278. if (canCreatePoison(cast<Operator>(&I)))
  279. generateCreationChecks(I, Checks);
  280. ValToPoison[&I] = buildOrChain(B, Checks);
  281. }
  282. for (BasicBlock &BB : F)
  283. for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
  284. auto *OldPHI = cast<PHINode>(&*I);
  285. if (!ValToPoison.count(OldPHI))
  286. continue; // skip the newly inserted phis
  287. auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
  288. for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
  289. auto *OldVal = OldPHI->getIncomingValue(i);
  290. NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
  291. }
  292. }
  293. return true;
  294. }
  295. PreservedAnalyses PoisonCheckingPass::run(Module &M,
  296. ModuleAnalysisManager &AM) {
  297. bool Changed = false;
  298. for (auto &F : M)
  299. Changed |= rewrite(F);
  300. return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
  301. }
  302. PreservedAnalyses PoisonCheckingPass::run(Function &F,
  303. FunctionAnalysisManager &AM) {
  304. return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
  305. }
  306. /* Major TODO Items:
  307. - Control dependent poison UB
  308. - Strict mode - (i.e. must analyze every operand)
  309. - Poison through memory
  310. - Function ABIs
  311. - Full coverage of intrinsics, etc.. (ouch)
  312. Instructions w/Unclear Semantics:
  313. - shufflevector - It would seem reasonable for an out of bounds mask element
  314. to produce poison, but the LangRef does not state.
  315. - all binary ops w/vector operands - The likely interpretation would be that
  316. any element overflowing should produce poison for the entire result, but
  317. the LangRef does not state.
  318. - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
  319. strange that only certian flags should be documented as producing poison.
  320. Cases of clear poison semantics not yet implemented:
  321. - Exact flags on ashr/lshr produce poison
  322. - NSW/NUW flags on shl produce poison
  323. - Inbounds flag on getelementptr produce poison
  324. - fptosi/fptoui (out of bounds input) produce poison
  325. - Scalable vector types for insertelement/extractelement
  326. - Floating point binary ops w/fmf nnan/noinfs flags produce poison
  327. */