Taint.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Defines basic, non-domain-specific mechanisms for tracking tainted values.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "Taint.h"
  13. #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
  14. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
  15. using namespace clang;
  16. using namespace ento;
  17. using namespace taint;
  18. // Fully tainted symbols.
  19. REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
  20. // Partially tainted symbols.
  21. REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
  22. TaintTagType)
  23. REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
  24. void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
  25. const char *Sep) {
  26. TaintMapTy TM = State->get<TaintMap>();
  27. if (!TM.isEmpty())
  28. Out << "Tainted symbols:" << NL;
  29. for (const auto &I : TM)
  30. Out << I.first << " : " << I.second << NL;
  31. }
  32. void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
  33. ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
  34. const LocationContext *LCtx,
  35. TaintTagType Kind) {
  36. return addTaint(State, State->getSVal(S, LCtx), Kind);
  37. }
  38. ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
  39. TaintTagType Kind) {
  40. SymbolRef Sym = V.getAsSymbol();
  41. if (Sym)
  42. return addTaint(State, Sym, Kind);
  43. // If the SVal represents a structure, try to mass-taint all values within the
  44. // structure. For now it only works efficiently on lazy compound values that
  45. // were conjured during a conservative evaluation of a function - either as
  46. // return values of functions that return structures or arrays by value, or as
  47. // values of structures or arrays passed into the function by reference,
  48. // directly or through pointer aliasing. Such lazy compound values are
  49. // characterized by having exactly one binding in their captured store within
  50. // their parent region, which is a conjured symbol default-bound to the base
  51. // region of the parent region.
  52. if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
  53. if (Optional<SVal> binding =
  54. State->getStateManager().getStoreManager().getDefaultBinding(
  55. *LCV)) {
  56. if (SymbolRef Sym = binding->getAsSymbol())
  57. return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
  58. }
  59. }
  60. const MemRegion *R = V.getAsRegion();
  61. return addTaint(State, R, Kind);
  62. }
  63. ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
  64. TaintTagType Kind) {
  65. if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
  66. return addTaint(State, SR->getSymbol(), Kind);
  67. return State;
  68. }
  69. ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
  70. TaintTagType Kind) {
  71. // If this is a symbol cast, remove the cast before adding the taint. Taint
  72. // is cast agnostic.
  73. while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
  74. Sym = SC->getOperand();
  75. ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
  76. assert(NewState);
  77. return NewState;
  78. }
  79. ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
  80. SymbolRef Sym = V.getAsSymbol();
  81. if (Sym)
  82. return removeTaint(State, Sym);
  83. const MemRegion *R = V.getAsRegion();
  84. return removeTaint(State, R);
  85. }
  86. ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
  87. if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
  88. return removeTaint(State, SR->getSymbol());
  89. return State;
  90. }
  91. ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
  92. // If this is a symbol cast, remove the cast before adding the taint. Taint
  93. // is cast agnostic.
  94. while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
  95. Sym = SC->getOperand();
  96. ProgramStateRef NewState = State->remove<TaintMap>(Sym);
  97. assert(NewState);
  98. return NewState;
  99. }
  100. ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
  101. SymbolRef ParentSym,
  102. const SubRegion *SubRegion,
  103. TaintTagType Kind) {
  104. // Ignore partial taint if the entire parent symbol is already tainted.
  105. if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
  106. if (*T == Kind)
  107. return State;
  108. // Partial taint applies if only a portion of the symbol is tainted.
  109. if (SubRegion == SubRegion->getBaseRegion())
  110. return addTaint(State, ParentSym, Kind);
  111. const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
  112. TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
  113. TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
  114. Regs = F.add(Regs, SubRegion, Kind);
  115. ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
  116. assert(NewState);
  117. return NewState;
  118. }
  119. bool taint::isTainted(ProgramStateRef State, const Stmt *S,
  120. const LocationContext *LCtx, TaintTagType Kind) {
  121. SVal val = State->getSVal(S, LCtx);
  122. return isTainted(State, val, Kind);
  123. }
  124. bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
  125. if (SymbolRef Sym = V.getAsSymbol())
  126. return isTainted(State, Sym, Kind);
  127. if (const MemRegion *Reg = V.getAsRegion())
  128. return isTainted(State, Reg, Kind);
  129. return false;
  130. }
  131. bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
  132. TaintTagType K) {
  133. if (!Reg)
  134. return false;
  135. // Element region (array element) is tainted if either the base or the offset
  136. // are tainted.
  137. if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
  138. return isTainted(State, ER->getSuperRegion(), K) ||
  139. isTainted(State, ER->getIndex(), K);
  140. if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
  141. return isTainted(State, SR->getSymbol(), K);
  142. if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
  143. return isTainted(State, ER->getSuperRegion(), K);
  144. return false;
  145. }
  146. bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
  147. if (!Sym)
  148. return false;
  149. // Traverse all the symbols this symbol depends on to see if any are tainted.
  150. for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
  151. SE = Sym->symbol_end();
  152. SI != SE; ++SI) {
  153. if (!isa<SymbolData>(*SI))
  154. continue;
  155. if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
  156. if (*Tag == Kind)
  157. return true;
  158. }
  159. if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
  160. // If this is a SymbolDerived with a tainted parent, it's also tainted.
  161. if (isTainted(State, SD->getParentSymbol(), Kind))
  162. return true;
  163. // If this is a SymbolDerived with the same parent symbol as another
  164. // tainted SymbolDerived and a region that's a sub-region of that tainted
  165. // symbol, it's also tainted.
  166. if (const TaintedSubRegions *Regs =
  167. State->get<DerivedSymTaint>(SD->getParentSymbol())) {
  168. const TypedValueRegion *R = SD->getRegion();
  169. for (auto I : *Regs) {
  170. // FIXME: The logic to identify tainted regions could be more
  171. // complete. For example, this would not currently identify
  172. // overlapping fields in a union as tainted. To identify this we can
  173. // check for overlapping/nested byte offsets.
  174. if (Kind == I.second && R->isSubRegionOf(I.first))
  175. return true;
  176. }
  177. }
  178. }
  179. // If memory region is tainted, data is also tainted.
  180. if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
  181. if (isTainted(State, SRV->getRegion(), Kind))
  182. return true;
  183. }
  184. // If this is a SymbolCast from a tainted value, it's also tainted.
  185. if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
  186. if (isTainted(State, SC->getOperand(), Kind))
  187. return true;
  188. }
  189. }
  190. return false;
  191. }
  192. PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
  193. BugReporterContext &BRC,
  194. PathSensitiveBugReport &BR) {
  195. // Find the ExplodedNode where the taint was first introduced
  196. if (!isTainted(N->getState(), V) ||
  197. isTainted(N->getFirstPred()->getState(), V))
  198. return nullptr;
  199. const Stmt *S = N->getStmtForDiagnostics();
  200. if (!S)
  201. return nullptr;
  202. const LocationContext *NCtx = N->getLocationContext();
  203. PathDiagnosticLocation L =
  204. PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
  205. if (!L.isValid() || !L.asLocation().isValid())
  206. return nullptr;
  207. return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
  208. }