DataFlowSanitizer.cpp 118 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129
  1. //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file
  10. /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
  11. /// analysis.
  12. ///
  13. /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
  14. /// class of bugs on its own. Instead, it provides a generic dynamic data flow
  15. /// analysis framework to be used by clients to help detect application-specific
  16. /// issues within their own code.
  17. ///
  18. /// The analysis is based on automatic propagation of data flow labels (also
  19. /// known as taint labels) through a program as it performs computation.
  20. ///
  21. /// Argument and return value labels are passed through TLS variables
  22. /// __dfsan_arg_tls and __dfsan_retval_tls.
  23. ///
  24. /// Each byte of application memory is backed by a shadow memory byte. The
  25. /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
  26. /// laid out as follows:
  27. ///
  28. /// +--------------------+ 0x800000000000 (top of memory)
  29. /// | application 3 |
  30. /// +--------------------+ 0x700000000000
  31. /// | invalid |
  32. /// +--------------------+ 0x610000000000
  33. /// | origin 1 |
  34. /// +--------------------+ 0x600000000000
  35. /// | application 2 |
  36. /// +--------------------+ 0x510000000000
  37. /// | shadow 1 |
  38. /// +--------------------+ 0x500000000000
  39. /// | invalid |
  40. /// +--------------------+ 0x400000000000
  41. /// | origin 3 |
  42. /// +--------------------+ 0x300000000000
  43. /// | shadow 3 |
  44. /// +--------------------+ 0x200000000000
  45. /// | origin 2 |
  46. /// +--------------------+ 0x110000000000
  47. /// | invalid |
  48. /// +--------------------+ 0x100000000000
  49. /// | shadow 2 |
  50. /// +--------------------+ 0x010000000000
  51. /// | application 1 |
  52. /// +--------------------+ 0x000000000000
  53. ///
  54. /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
  55. /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
  56. ///
  57. /// For more information, please refer to the design document:
  58. /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
  59. //
  60. //===----------------------------------------------------------------------===//
  61. #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
  62. #include "llvm/ADT/DenseMap.h"
  63. #include "llvm/ADT/DenseSet.h"
  64. #include "llvm/ADT/DepthFirstIterator.h"
  65. #include "llvm/ADT/None.h"
  66. #include "llvm/ADT/SmallPtrSet.h"
  67. #include "llvm/ADT/SmallVector.h"
  68. #include "llvm/ADT/StringExtras.h"
  69. #include "llvm/ADT/StringRef.h"
  70. #include "llvm/ADT/Triple.h"
  71. #include "llvm/ADT/iterator.h"
  72. #include "llvm/Analysis/ValueTracking.h"
  73. #include "llvm/IR/Argument.h"
  74. #include "llvm/IR/Attributes.h"
  75. #include "llvm/IR/BasicBlock.h"
  76. #include "llvm/IR/Constant.h"
  77. #include "llvm/IR/Constants.h"
  78. #include "llvm/IR/DataLayout.h"
  79. #include "llvm/IR/DerivedTypes.h"
  80. #include "llvm/IR/Dominators.h"
  81. #include "llvm/IR/Function.h"
  82. #include "llvm/IR/GlobalAlias.h"
  83. #include "llvm/IR/GlobalValue.h"
  84. #include "llvm/IR/GlobalVariable.h"
  85. #include "llvm/IR/IRBuilder.h"
  86. #include "llvm/IR/InlineAsm.h"
  87. #include "llvm/IR/InstVisitor.h"
  88. #include "llvm/IR/InstrTypes.h"
  89. #include "llvm/IR/Instruction.h"
  90. #include "llvm/IR/Instructions.h"
  91. #include "llvm/IR/IntrinsicInst.h"
  92. #include "llvm/IR/LLVMContext.h"
  93. #include "llvm/IR/MDBuilder.h"
  94. #include "llvm/IR/Module.h"
  95. #include "llvm/IR/PassManager.h"
  96. #include "llvm/IR/Type.h"
  97. #include "llvm/IR/User.h"
  98. #include "llvm/IR/Value.h"
  99. #include "llvm/InitializePasses.h"
  100. #include "llvm/Pass.h"
  101. #include "llvm/Support/Alignment.h"
  102. #include "llvm/Support/Casting.h"
  103. #include "llvm/Support/CommandLine.h"
  104. #include "llvm/Support/ErrorHandling.h"
  105. #include "llvm/Support/SpecialCaseList.h"
  106. #include "llvm/Support/VirtualFileSystem.h"
  107. #include "llvm/Transforms/Instrumentation.h"
  108. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  109. #include "llvm/Transforms/Utils/Local.h"
  110. #include <algorithm>
  111. #include <cassert>
  112. #include <cstddef>
  113. #include <cstdint>
  114. #include <iterator>
  115. #include <memory>
  116. #include <set>
  117. #include <string>
  118. #include <utility>
  119. #include <vector>
  120. using namespace llvm;
  121. // This must be consistent with ShadowWidthBits.
  122. static const Align ShadowTLSAlignment = Align(2);
  123. static const Align MinOriginAlignment = Align(4);
  124. // The size of TLS variables. These constants must be kept in sync with the ones
  125. // in dfsan.cpp.
  126. static const unsigned ArgTLSSize = 800;
  127. static const unsigned RetvalTLSSize = 800;
  128. // The -dfsan-preserve-alignment flag controls whether this pass assumes that
  129. // alignment requirements provided by the input IR are correct. For example,
  130. // if the input IR contains a load with alignment 8, this flag will cause
  131. // the shadow load to have alignment 16. This flag is disabled by default as
  132. // we have unfortunately encountered too much code (including Clang itself;
  133. // see PR14291) which performs misaligned access.
  134. static cl::opt<bool> ClPreserveAlignment(
  135. "dfsan-preserve-alignment",
  136. cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
  137. cl::init(false));
  138. // The ABI list files control how shadow parameters are passed. The pass treats
  139. // every function labelled "uninstrumented" in the ABI list file as conforming
  140. // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
  141. // additional annotations for those functions, a call to one of those functions
  142. // will produce a warning message, as the labelling behaviour of the function is
  143. // unknown. The other supported annotations for uninstrumented functions are
  144. // "functional" and "discard", which are described below under
  145. // DataFlowSanitizer::WrapperKind.
  146. // Functions will often be labelled with both "uninstrumented" and one of
  147. // "functional" or "discard". This will leave the function unchanged by this
  148. // pass, and create a wrapper function that will call the original.
  149. //
  150. // Instrumented functions can also be annotated as "force_zero_labels", which
  151. // will make all shadow and return values set zero labels.
  152. // Functions should never be labelled with both "force_zero_labels" and
  153. // "uninstrumented" or any of the unistrumented wrapper kinds.
  154. static cl::list<std::string> ClABIListFiles(
  155. "dfsan-abilist",
  156. cl::desc("File listing native ABI functions and how the pass treats them"),
  157. cl::Hidden);
  158. // Controls whether the pass includes or ignores the labels of pointers in load
  159. // instructions.
  160. static cl::opt<bool> ClCombinePointerLabelsOnLoad(
  161. "dfsan-combine-pointer-labels-on-load",
  162. cl::desc("Combine the label of the pointer with the label of the data when "
  163. "loading from memory."),
  164. cl::Hidden, cl::init(true));
  165. // Controls whether the pass includes or ignores the labels of pointers in
  166. // stores instructions.
  167. static cl::opt<bool> ClCombinePointerLabelsOnStore(
  168. "dfsan-combine-pointer-labels-on-store",
  169. cl::desc("Combine the label of the pointer with the label of the data when "
  170. "storing in memory."),
  171. cl::Hidden, cl::init(false));
  172. // Controls whether the pass propagates labels of offsets in GEP instructions.
  173. static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
  174. "dfsan-combine-offset-labels-on-gep",
  175. cl::desc(
  176. "Combine the label of the offset with the label of the pointer when "
  177. "doing pointer arithmetic."),
  178. cl::Hidden, cl::init(true));
  179. static cl::opt<bool> ClDebugNonzeroLabels(
  180. "dfsan-debug-nonzero-labels",
  181. cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
  182. "load or return with a nonzero label"),
  183. cl::Hidden);
  184. // Experimental feature that inserts callbacks for certain data events.
  185. // Currently callbacks are only inserted for loads, stores, memory transfers
  186. // (i.e. memcpy and memmove), and comparisons.
  187. //
  188. // If this flag is set to true, the user must provide definitions for the
  189. // following callback functions:
  190. // void __dfsan_load_callback(dfsan_label Label, void* addr);
  191. // void __dfsan_store_callback(dfsan_label Label, void* addr);
  192. // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
  193. // void __dfsan_cmp_callback(dfsan_label CombinedLabel);
  194. static cl::opt<bool> ClEventCallbacks(
  195. "dfsan-event-callbacks",
  196. cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
  197. cl::Hidden, cl::init(false));
  198. // Experimental feature that inserts callbacks for conditionals, including:
  199. // conditional branch, switch, select.
  200. // This must be true for dfsan_set_conditional_callback() to have effect.
  201. static cl::opt<bool> ClConditionalCallbacks(
  202. "dfsan-conditional-callbacks",
  203. cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
  204. cl::init(false));
  205. // Controls whether the pass tracks the control flow of select instructions.
  206. static cl::opt<bool> ClTrackSelectControlFlow(
  207. "dfsan-track-select-control-flow",
  208. cl::desc("Propagate labels from condition values of select instructions "
  209. "to results."),
  210. cl::Hidden, cl::init(true));
  211. // TODO: This default value follows MSan. DFSan may use a different value.
  212. static cl::opt<int> ClInstrumentWithCallThreshold(
  213. "dfsan-instrument-with-call-threshold",
  214. cl::desc("If the function being instrumented requires more than "
  215. "this number of origin stores, use callbacks instead of "
  216. "inline checks (-1 means never use callbacks)."),
  217. cl::Hidden, cl::init(3500));
  218. // Controls how to track origins.
  219. // * 0: do not track origins.
  220. // * 1: track origins at memory store operations.
  221. // * 2: track origins at memory load and store operations.
  222. // TODO: track callsites.
  223. static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
  224. cl::desc("Track origins of labels"),
  225. cl::Hidden, cl::init(0));
  226. static cl::opt<bool> ClIgnorePersonalityRoutine(
  227. "dfsan-ignore-personality-routine",
  228. cl::desc("If a personality routine is marked uninstrumented from the ABI "
  229. "list, do not create a wrapper for it."),
  230. cl::Hidden, cl::init(false));
  231. static StringRef getGlobalTypeString(const GlobalValue &G) {
  232. // Types of GlobalVariables are always pointer types.
  233. Type *GType = G.getValueType();
  234. // For now we support excluding struct types only.
  235. if (StructType *SGType = dyn_cast<StructType>(GType)) {
  236. if (!SGType->isLiteral())
  237. return SGType->getName();
  238. }
  239. return "<unknown type>";
  240. }
  241. namespace {
  242. // Memory map parameters used in application-to-shadow address calculation.
  243. // Offset = (Addr & ~AndMask) ^ XorMask
  244. // Shadow = ShadowBase + Offset
  245. // Origin = (OriginBase + Offset) & ~3ULL
  246. struct MemoryMapParams {
  247. uint64_t AndMask;
  248. uint64_t XorMask;
  249. uint64_t ShadowBase;
  250. uint64_t OriginBase;
  251. };
  252. } // end anonymous namespace
  253. // x86_64 Linux
  254. // NOLINTNEXTLINE(readability-identifier-naming)
  255. static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
  256. 0, // AndMask (not used)
  257. 0x500000000000, // XorMask
  258. 0, // ShadowBase (not used)
  259. 0x100000000000, // OriginBase
  260. };
  261. namespace {
  262. class DFSanABIList {
  263. std::unique_ptr<SpecialCaseList> SCL;
  264. public:
  265. DFSanABIList() = default;
  266. void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
  267. /// Returns whether either this function or its source file are listed in the
  268. /// given category.
  269. bool isIn(const Function &F, StringRef Category) const {
  270. return isIn(*F.getParent(), Category) ||
  271. SCL->inSection("dataflow", "fun", F.getName(), Category);
  272. }
  273. /// Returns whether this global alias is listed in the given category.
  274. ///
  275. /// If GA aliases a function, the alias's name is matched as a function name
  276. /// would be. Similarly, aliases of globals are matched like globals.
  277. bool isIn(const GlobalAlias &GA, StringRef Category) const {
  278. if (isIn(*GA.getParent(), Category))
  279. return true;
  280. if (isa<FunctionType>(GA.getValueType()))
  281. return SCL->inSection("dataflow", "fun", GA.getName(), Category);
  282. return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
  283. SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
  284. Category);
  285. }
  286. /// Returns whether this module is listed in the given category.
  287. bool isIn(const Module &M, StringRef Category) const {
  288. return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
  289. }
  290. };
  291. /// TransformedFunction is used to express the result of transforming one
  292. /// function type into another. This struct is immutable. It holds metadata
  293. /// useful for updating calls of the old function to the new type.
  294. struct TransformedFunction {
  295. TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
  296. std::vector<unsigned> ArgumentIndexMapping)
  297. : OriginalType(OriginalType), TransformedType(TransformedType),
  298. ArgumentIndexMapping(ArgumentIndexMapping) {}
  299. // Disallow copies.
  300. TransformedFunction(const TransformedFunction &) = delete;
  301. TransformedFunction &operator=(const TransformedFunction &) = delete;
  302. // Allow moves.
  303. TransformedFunction(TransformedFunction &&) = default;
  304. TransformedFunction &operator=(TransformedFunction &&) = default;
  305. /// Type of the function before the transformation.
  306. FunctionType *OriginalType;
  307. /// Type of the function after the transformation.
  308. FunctionType *TransformedType;
  309. /// Transforming a function may change the position of arguments. This
  310. /// member records the mapping from each argument's old position to its new
  311. /// position. Argument positions are zero-indexed. If the transformation
  312. /// from F to F' made the first argument of F into the third argument of F',
  313. /// then ArgumentIndexMapping[0] will equal 2.
  314. std::vector<unsigned> ArgumentIndexMapping;
  315. };
  316. /// Given function attributes from a call site for the original function,
  317. /// return function attributes appropriate for a call to the transformed
  318. /// function.
  319. AttributeList
  320. transformFunctionAttributes(const TransformedFunction &TransformedFunction,
  321. LLVMContext &Ctx, AttributeList CallSiteAttrs) {
  322. // Construct a vector of AttributeSet for each function argument.
  323. std::vector<llvm::AttributeSet> ArgumentAttributes(
  324. TransformedFunction.TransformedType->getNumParams());
  325. // Copy attributes from the parameter of the original function to the
  326. // transformed version. 'ArgumentIndexMapping' holds the mapping from
  327. // old argument position to new.
  328. for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
  329. I < IE; ++I) {
  330. unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
  331. ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
  332. }
  333. // Copy annotations on varargs arguments.
  334. for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
  335. IE = CallSiteAttrs.getNumAttrSets();
  336. I < IE; ++I) {
  337. ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
  338. }
  339. return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
  340. CallSiteAttrs.getRetAttrs(),
  341. llvm::makeArrayRef(ArgumentAttributes));
  342. }
  343. class DataFlowSanitizer {
  344. friend struct DFSanFunction;
  345. friend class DFSanVisitor;
  346. enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
  347. enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
  348. /// How should calls to uninstrumented functions be handled?
  349. enum WrapperKind {
  350. /// This function is present in an uninstrumented form but we don't know
  351. /// how it should be handled. Print a warning and call the function anyway.
  352. /// Don't label the return value.
  353. WK_Warning,
  354. /// This function does not write to (user-accessible) memory, and its return
  355. /// value is unlabelled.
  356. WK_Discard,
  357. /// This function does not write to (user-accessible) memory, and the label
  358. /// of its return value is the union of the label of its arguments.
  359. WK_Functional,
  360. /// Instead of calling the function, a custom wrapper __dfsw_F is called,
  361. /// where F is the name of the function. This function may wrap the
  362. /// original function or provide its own implementation. WK_Custom uses an
  363. /// extra pointer argument to return the shadow. This allows the wrapped
  364. /// form of the function type to be expressed in C.
  365. WK_Custom
  366. };
  367. Module *Mod;
  368. LLVMContext *Ctx;
  369. Type *Int8Ptr;
  370. IntegerType *OriginTy;
  371. PointerType *OriginPtrTy;
  372. ConstantInt *ZeroOrigin;
  373. /// The shadow type for all primitive types and vector types.
  374. IntegerType *PrimitiveShadowTy;
  375. PointerType *PrimitiveShadowPtrTy;
  376. IntegerType *IntptrTy;
  377. ConstantInt *ZeroPrimitiveShadow;
  378. Constant *ArgTLS;
  379. ArrayType *ArgOriginTLSTy;
  380. Constant *ArgOriginTLS;
  381. Constant *RetvalTLS;
  382. Constant *RetvalOriginTLS;
  383. FunctionType *DFSanUnionLoadFnTy;
  384. FunctionType *DFSanLoadLabelAndOriginFnTy;
  385. FunctionType *DFSanUnimplementedFnTy;
  386. FunctionType *DFSanSetLabelFnTy;
  387. FunctionType *DFSanNonzeroLabelFnTy;
  388. FunctionType *DFSanVarargWrapperFnTy;
  389. FunctionType *DFSanConditionalCallbackFnTy;
  390. FunctionType *DFSanConditionalCallbackOriginFnTy;
  391. FunctionType *DFSanCmpCallbackFnTy;
  392. FunctionType *DFSanLoadStoreCallbackFnTy;
  393. FunctionType *DFSanMemTransferCallbackFnTy;
  394. FunctionType *DFSanChainOriginFnTy;
  395. FunctionType *DFSanChainOriginIfTaintedFnTy;
  396. FunctionType *DFSanMemOriginTransferFnTy;
  397. FunctionType *DFSanMaybeStoreOriginFnTy;
  398. FunctionCallee DFSanUnionLoadFn;
  399. FunctionCallee DFSanLoadLabelAndOriginFn;
  400. FunctionCallee DFSanUnimplementedFn;
  401. FunctionCallee DFSanSetLabelFn;
  402. FunctionCallee DFSanNonzeroLabelFn;
  403. FunctionCallee DFSanVarargWrapperFn;
  404. FunctionCallee DFSanLoadCallbackFn;
  405. FunctionCallee DFSanStoreCallbackFn;
  406. FunctionCallee DFSanMemTransferCallbackFn;
  407. FunctionCallee DFSanConditionalCallbackFn;
  408. FunctionCallee DFSanConditionalCallbackOriginFn;
  409. FunctionCallee DFSanCmpCallbackFn;
  410. FunctionCallee DFSanChainOriginFn;
  411. FunctionCallee DFSanChainOriginIfTaintedFn;
  412. FunctionCallee DFSanMemOriginTransferFn;
  413. FunctionCallee DFSanMaybeStoreOriginFn;
  414. SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
  415. MDNode *ColdCallWeights;
  416. MDNode *OriginStoreWeights;
  417. DFSanABIList ABIList;
  418. DenseMap<Value *, Function *> UnwrappedFnMap;
  419. AttributeMask ReadOnlyNoneAttrs;
  420. /// Memory map parameters used in calculation mapping application addresses
  421. /// to shadow addresses and origin addresses.
  422. const MemoryMapParams *MapParams;
  423. Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
  424. Value *getShadowAddress(Value *Addr, Instruction *Pos);
  425. Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
  426. std::pair<Value *, Value *>
  427. getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
  428. bool isInstrumented(const Function *F);
  429. bool isInstrumented(const GlobalAlias *GA);
  430. bool isForceZeroLabels(const Function *F);
  431. FunctionType *getTrampolineFunctionType(FunctionType *T);
  432. TransformedFunction getCustomFunctionType(FunctionType *T);
  433. WrapperKind getWrapperKind(Function *F);
  434. void addGlobalNameSuffix(GlobalValue *GV);
  435. Function *buildWrapperFunction(Function *F, StringRef NewFName,
  436. GlobalValue::LinkageTypes NewFLink,
  437. FunctionType *NewFT);
  438. Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
  439. void initializeCallbackFunctions(Module &M);
  440. void initializeRuntimeFunctions(Module &M);
  441. void injectMetadataGlobals(Module &M);
  442. bool initializeModule(Module &M);
  443. /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
  444. /// from it. Returns the origin's loaded value.
  445. Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
  446. Value **OriginAddr);
  447. /// Returns whether the given load byte size is amenable to inlined
  448. /// optimization patterns.
  449. bool hasLoadSizeForFastPath(uint64_t Size);
  450. /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
  451. bool shouldTrackOrigins();
  452. /// Returns a zero constant with the shadow type of OrigTy.
  453. ///
  454. /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
  455. /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
  456. /// getZeroShadow(other type) = i16(0)
  457. Constant *getZeroShadow(Type *OrigTy);
  458. /// Returns a zero constant with the shadow type of V's type.
  459. Constant *getZeroShadow(Value *V);
  460. /// Checks if V is a zero shadow.
  461. bool isZeroShadow(Value *V);
  462. /// Returns the shadow type of OrigTy.
  463. ///
  464. /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
  465. /// getShadowTy([n x T]) = [n x getShadowTy(T)]
  466. /// getShadowTy(other type) = i16
  467. Type *getShadowTy(Type *OrigTy);
  468. /// Returns the shadow type of of V's type.
  469. Type *getShadowTy(Value *V);
  470. const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
  471. public:
  472. DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
  473. bool runImpl(Module &M);
  474. };
  475. struct DFSanFunction {
  476. DataFlowSanitizer &DFS;
  477. Function *F;
  478. DominatorTree DT;
  479. bool IsNativeABI;
  480. bool IsForceZeroLabels;
  481. AllocaInst *LabelReturnAlloca = nullptr;
  482. AllocaInst *OriginReturnAlloca = nullptr;
  483. DenseMap<Value *, Value *> ValShadowMap;
  484. DenseMap<Value *, Value *> ValOriginMap;
  485. DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
  486. DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
  487. struct PHIFixupElement {
  488. PHINode *Phi;
  489. PHINode *ShadowPhi;
  490. PHINode *OriginPhi;
  491. };
  492. std::vector<PHIFixupElement> PHIFixups;
  493. DenseSet<Instruction *> SkipInsts;
  494. std::vector<Value *> NonZeroChecks;
  495. struct CachedShadow {
  496. BasicBlock *Block; // The block where Shadow is defined.
  497. Value *Shadow;
  498. };
  499. /// Maps a value to its latest shadow value in terms of domination tree.
  500. DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
  501. /// Maps a value to its latest collapsed shadow value it was converted to in
  502. /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
  503. /// used at a post process where CFG blocks are split. So it does not cache
  504. /// BasicBlock like CachedShadows, but uses domination between values.
  505. DenseMap<Value *, Value *> CachedCollapsedShadows;
  506. DenseMap<Value *, std::set<Value *>> ShadowElements;
  507. DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
  508. bool IsForceZeroLabels)
  509. : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
  510. IsForceZeroLabels(IsForceZeroLabels) {
  511. DT.recalculate(*F);
  512. }
  513. /// Computes the shadow address for a given function argument.
  514. ///
  515. /// Shadow = ArgTLS+ArgOffset.
  516. Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
  517. /// Computes the shadow address for a return value.
  518. Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
  519. /// Computes the origin address for a given function argument.
  520. ///
  521. /// Origin = ArgOriginTLS[ArgNo].
  522. Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
  523. /// Computes the origin address for a return value.
  524. Value *getRetvalOriginTLS();
  525. Value *getOrigin(Value *V);
  526. void setOrigin(Instruction *I, Value *Origin);
  527. /// Generates IR to compute the origin of the last operand with a taint label.
  528. Value *combineOperandOrigins(Instruction *Inst);
  529. /// Before the instruction Pos, generates IR to compute the last origin with a
  530. /// taint label. Labels and origins are from vectors Shadows and Origins
  531. /// correspondingly. The generated IR is like
  532. /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
  533. /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
  534. /// zeros with other bitwidths.
  535. Value *combineOrigins(const std::vector<Value *> &Shadows,
  536. const std::vector<Value *> &Origins, Instruction *Pos,
  537. ConstantInt *Zero = nullptr);
  538. Value *getShadow(Value *V);
  539. void setShadow(Instruction *I, Value *Shadow);
  540. /// Generates IR to compute the union of the two given shadows, inserting it
  541. /// before Pos. The combined value is with primitive type.
  542. Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
  543. /// Combines the shadow values of V1 and V2, then converts the combined value
  544. /// with primitive type into a shadow value with the original type T.
  545. Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
  546. Instruction *Pos);
  547. Value *combineOperandShadows(Instruction *Inst);
  548. /// Generates IR to load shadow and origin corresponding to bytes [\p
  549. /// Addr, \p Addr + \p Size), where addr has alignment \p
  550. /// InstAlignment, and take the union of each of those shadows. The returned
  551. /// shadow always has primitive type.
  552. ///
  553. /// When tracking loads is enabled, the returned origin is a chain at the
  554. /// current stack if the returned shadow is tainted.
  555. std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
  556. Align InstAlignment,
  557. Instruction *Pos);
  558. void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
  559. Align InstAlignment, Value *PrimitiveShadow,
  560. Value *Origin, Instruction *Pos);
  561. /// Applies PrimitiveShadow to all primitive subtypes of T, returning
  562. /// the expanded shadow value.
  563. ///
  564. /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
  565. /// EFP([n x T], PS) = [n x EFP(T,PS)]
  566. /// EFP(other types, PS) = PS
  567. Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
  568. Instruction *Pos);
  569. /// Collapses Shadow into a single primitive shadow value, unioning all
  570. /// primitive shadow values in the process. Returns the final primitive
  571. /// shadow value.
  572. ///
  573. /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
  574. /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
  575. /// CTP(other types, PS) = PS
  576. Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
  577. void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
  578. Instruction *Pos);
  579. Align getShadowAlign(Align InstAlignment);
  580. // If ClConditionalCallbacks is enabled, insert a callback after a given
  581. // branch instruction using the given conditional expression.
  582. void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
  583. private:
  584. /// Collapses the shadow with aggregate type into a single primitive shadow
  585. /// value.
  586. template <class AggregateType>
  587. Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
  588. IRBuilder<> &IRB);
  589. Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
  590. /// Returns the shadow value of an argument A.
  591. Value *getShadowForTLSArgument(Argument *A);
  592. /// The fast path of loading shadows.
  593. std::pair<Value *, Value *>
  594. loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
  595. Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
  596. Instruction *Pos);
  597. Align getOriginAlign(Align InstAlignment);
  598. /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
  599. /// is __dfsan_load_label_and_origin. This function returns the union of all
  600. /// labels and the origin of the first taint label. However this is an
  601. /// additional call with many instructions. To ensure common cases are fast,
  602. /// checks if it is possible to load labels and origins without using the
  603. /// callback function.
  604. ///
  605. /// When enabling tracking load instructions, we always use
  606. /// __dfsan_load_label_and_origin to reduce code size.
  607. bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
  608. /// Returns a chain at the current stack with previous origin V.
  609. Value *updateOrigin(Value *V, IRBuilder<> &IRB);
  610. /// Returns a chain at the current stack with previous origin V if Shadow is
  611. /// tainted.
  612. Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
  613. /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
  614. /// Origin otherwise.
  615. Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
  616. /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
  617. /// Size).
  618. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
  619. uint64_t StoreOriginSize, Align Alignment);
  620. /// Stores Origin in terms of its Shadow value.
  621. /// * Do not write origins for zero shadows because we do not trace origins
  622. /// for untainted sinks.
  623. /// * Use __dfsan_maybe_store_origin if there are too many origin store
  624. /// instrumentations.
  625. void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
  626. Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
  627. /// Convert a scalar value to an i1 by comparing with 0.
  628. Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
  629. bool shouldInstrumentWithCall();
  630. /// Generates IR to load shadow and origin corresponding to bytes [\p
  631. /// Addr, \p Addr + \p Size), where addr has alignment \p
  632. /// InstAlignment, and take the union of each of those shadows. The returned
  633. /// shadow always has primitive type.
  634. std::pair<Value *, Value *>
  635. loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
  636. Align InstAlignment, Instruction *Pos);
  637. int NumOriginStores = 0;
  638. };
  639. class DFSanVisitor : public InstVisitor<DFSanVisitor> {
  640. public:
  641. DFSanFunction &DFSF;
  642. DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
  643. const DataLayout &getDataLayout() const {
  644. return DFSF.F->getParent()->getDataLayout();
  645. }
  646. // Combines shadow values and origins for all of I's operands.
  647. void visitInstOperands(Instruction &I);
  648. void visitUnaryOperator(UnaryOperator &UO);
  649. void visitBinaryOperator(BinaryOperator &BO);
  650. void visitBitCastInst(BitCastInst &BCI);
  651. void visitCastInst(CastInst &CI);
  652. void visitCmpInst(CmpInst &CI);
  653. void visitLandingPadInst(LandingPadInst &LPI);
  654. void visitGetElementPtrInst(GetElementPtrInst &GEPI);
  655. void visitLoadInst(LoadInst &LI);
  656. void visitStoreInst(StoreInst &SI);
  657. void visitAtomicRMWInst(AtomicRMWInst &I);
  658. void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
  659. void visitReturnInst(ReturnInst &RI);
  660. void visitCallBase(CallBase &CB);
  661. void visitPHINode(PHINode &PN);
  662. void visitExtractElementInst(ExtractElementInst &I);
  663. void visitInsertElementInst(InsertElementInst &I);
  664. void visitShuffleVectorInst(ShuffleVectorInst &I);
  665. void visitExtractValueInst(ExtractValueInst &I);
  666. void visitInsertValueInst(InsertValueInst &I);
  667. void visitAllocaInst(AllocaInst &I);
  668. void visitSelectInst(SelectInst &I);
  669. void visitMemSetInst(MemSetInst &I);
  670. void visitMemTransferInst(MemTransferInst &I);
  671. void visitBranchInst(BranchInst &BR);
  672. void visitSwitchInst(SwitchInst &SW);
  673. private:
  674. void visitCASOrRMW(Align InstAlignment, Instruction &I);
  675. // Returns false when this is an invoke of a custom function.
  676. bool visitWrappedCallBase(Function &F, CallBase &CB);
  677. // Combines origins for all of I's operands.
  678. void visitInstOperandOrigins(Instruction &I);
  679. void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
  680. IRBuilder<> &IRB);
  681. void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
  682. IRBuilder<> &IRB);
  683. };
  684. } // end anonymous namespace
  685. DataFlowSanitizer::DataFlowSanitizer(
  686. const std::vector<std::string> &ABIListFiles) {
  687. std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
  688. llvm::append_range(AllABIListFiles, ClABIListFiles);
  689. // FIXME: should we propagate vfs::FileSystem to this constructor?
  690. ABIList.set(
  691. SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
  692. }
  693. FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
  694. assert(!T->isVarArg());
  695. SmallVector<Type *, 4> ArgTypes;
  696. ArgTypes.push_back(T->getPointerTo());
  697. ArgTypes.append(T->param_begin(), T->param_end());
  698. ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
  699. Type *RetType = T->getReturnType();
  700. if (!RetType->isVoidTy())
  701. ArgTypes.push_back(PrimitiveShadowPtrTy);
  702. if (shouldTrackOrigins()) {
  703. ArgTypes.append(T->getNumParams(), OriginTy);
  704. if (!RetType->isVoidTy())
  705. ArgTypes.push_back(OriginPtrTy);
  706. }
  707. return FunctionType::get(T->getReturnType(), ArgTypes, false);
  708. }
  709. TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
  710. SmallVector<Type *, 4> ArgTypes;
  711. // Some parameters of the custom function being constructed are
  712. // parameters of T. Record the mapping from parameters of T to
  713. // parameters of the custom function, so that parameter attributes
  714. // at call sites can be updated.
  715. std::vector<unsigned> ArgumentIndexMapping;
  716. for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
  717. Type *ParamType = T->getParamType(I);
  718. FunctionType *FT;
  719. if (isa<PointerType>(ParamType) &&
  720. (FT = dyn_cast<FunctionType>(ParamType->getPointerElementType()))) {
  721. ArgumentIndexMapping.push_back(ArgTypes.size());
  722. ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
  723. ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
  724. } else {
  725. ArgumentIndexMapping.push_back(ArgTypes.size());
  726. ArgTypes.push_back(ParamType);
  727. }
  728. }
  729. for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
  730. ArgTypes.push_back(PrimitiveShadowTy);
  731. if (T->isVarArg())
  732. ArgTypes.push_back(PrimitiveShadowPtrTy);
  733. Type *RetType = T->getReturnType();
  734. if (!RetType->isVoidTy())
  735. ArgTypes.push_back(PrimitiveShadowPtrTy);
  736. if (shouldTrackOrigins()) {
  737. for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
  738. ArgTypes.push_back(OriginTy);
  739. if (T->isVarArg())
  740. ArgTypes.push_back(OriginPtrTy);
  741. if (!RetType->isVoidTy())
  742. ArgTypes.push_back(OriginPtrTy);
  743. }
  744. return TransformedFunction(
  745. T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
  746. ArgumentIndexMapping);
  747. }
  748. bool DataFlowSanitizer::isZeroShadow(Value *V) {
  749. Type *T = V->getType();
  750. if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
  751. if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
  752. return CI->isZero();
  753. return false;
  754. }
  755. return isa<ConstantAggregateZero>(V);
  756. }
  757. bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
  758. uint64_t ShadowSize = Size * ShadowWidthBytes;
  759. return ShadowSize % 8 == 0 || ShadowSize == 4;
  760. }
  761. bool DataFlowSanitizer::shouldTrackOrigins() {
  762. static const bool ShouldTrackOrigins = ClTrackOrigins;
  763. return ShouldTrackOrigins;
  764. }
  765. Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
  766. if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
  767. return ZeroPrimitiveShadow;
  768. Type *ShadowTy = getShadowTy(OrigTy);
  769. return ConstantAggregateZero::get(ShadowTy);
  770. }
  771. Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
  772. return getZeroShadow(V->getType());
  773. }
  774. static Value *expandFromPrimitiveShadowRecursive(
  775. Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
  776. Value *PrimitiveShadow, IRBuilder<> &IRB) {
  777. if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
  778. return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
  779. if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
  780. for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
  781. Indices.push_back(Idx);
  782. Shadow = expandFromPrimitiveShadowRecursive(
  783. Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
  784. Indices.pop_back();
  785. }
  786. return Shadow;
  787. }
  788. if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
  789. for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
  790. Indices.push_back(Idx);
  791. Shadow = expandFromPrimitiveShadowRecursive(
  792. Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
  793. Indices.pop_back();
  794. }
  795. return Shadow;
  796. }
  797. llvm_unreachable("Unexpected shadow type");
  798. }
  799. bool DFSanFunction::shouldInstrumentWithCall() {
  800. return ClInstrumentWithCallThreshold >= 0 &&
  801. NumOriginStores >= ClInstrumentWithCallThreshold;
  802. }
  803. Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
  804. Instruction *Pos) {
  805. Type *ShadowTy = DFS.getShadowTy(T);
  806. if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
  807. return PrimitiveShadow;
  808. if (DFS.isZeroShadow(PrimitiveShadow))
  809. return DFS.getZeroShadow(ShadowTy);
  810. IRBuilder<> IRB(Pos);
  811. SmallVector<unsigned, 4> Indices;
  812. Value *Shadow = UndefValue::get(ShadowTy);
  813. Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
  814. PrimitiveShadow, IRB);
  815. // Caches the primitive shadow value that built the shadow value.
  816. CachedCollapsedShadows[Shadow] = PrimitiveShadow;
  817. return Shadow;
  818. }
  819. template <class AggregateType>
  820. Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
  821. IRBuilder<> &IRB) {
  822. if (!AT->getNumElements())
  823. return DFS.ZeroPrimitiveShadow;
  824. Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
  825. Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
  826. for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
  827. Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
  828. Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
  829. Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
  830. }
  831. return Aggregator;
  832. }
  833. Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
  834. IRBuilder<> &IRB) {
  835. Type *ShadowTy = Shadow->getType();
  836. if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
  837. return Shadow;
  838. if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
  839. return collapseAggregateShadow<>(AT, Shadow, IRB);
  840. if (StructType *ST = dyn_cast<StructType>(ShadowTy))
  841. return collapseAggregateShadow<>(ST, Shadow, IRB);
  842. llvm_unreachable("Unexpected shadow type");
  843. }
  844. Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
  845. Instruction *Pos) {
  846. Type *ShadowTy = Shadow->getType();
  847. if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
  848. return Shadow;
  849. // Checks if the cached collapsed shadow value dominates Pos.
  850. Value *&CS = CachedCollapsedShadows[Shadow];
  851. if (CS && DT.dominates(CS, Pos))
  852. return CS;
  853. IRBuilder<> IRB(Pos);
  854. Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
  855. // Caches the converted primitive shadow value.
  856. CS = PrimitiveShadow;
  857. return PrimitiveShadow;
  858. }
  859. void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
  860. Value *Condition) {
  861. if (!ClConditionalCallbacks) {
  862. return;
  863. }
  864. IRBuilder<> IRB(&I);
  865. Value *CondShadow = getShadow(Condition);
  866. if (DFS.shouldTrackOrigins()) {
  867. Value *CondOrigin = getOrigin(Condition);
  868. IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
  869. {CondShadow, CondOrigin});
  870. } else {
  871. IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
  872. }
  873. }
  874. Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
  875. if (!OrigTy->isSized())
  876. return PrimitiveShadowTy;
  877. if (isa<IntegerType>(OrigTy))
  878. return PrimitiveShadowTy;
  879. if (isa<VectorType>(OrigTy))
  880. return PrimitiveShadowTy;
  881. if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
  882. return ArrayType::get(getShadowTy(AT->getElementType()),
  883. AT->getNumElements());
  884. if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
  885. SmallVector<Type *, 4> Elements;
  886. for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
  887. Elements.push_back(getShadowTy(ST->getElementType(I)));
  888. return StructType::get(*Ctx, Elements);
  889. }
  890. return PrimitiveShadowTy;
  891. }
  892. Type *DataFlowSanitizer::getShadowTy(Value *V) {
  893. return getShadowTy(V->getType());
  894. }
  895. bool DataFlowSanitizer::initializeModule(Module &M) {
  896. Triple TargetTriple(M.getTargetTriple());
  897. const DataLayout &DL = M.getDataLayout();
  898. if (TargetTriple.getOS() != Triple::Linux)
  899. report_fatal_error("unsupported operating system");
  900. if (TargetTriple.getArch() != Triple::x86_64)
  901. report_fatal_error("unsupported architecture");
  902. MapParams = &Linux_X86_64_MemoryMapParams;
  903. Mod = &M;
  904. Ctx = &M.getContext();
  905. Int8Ptr = Type::getInt8PtrTy(*Ctx);
  906. OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
  907. OriginPtrTy = PointerType::getUnqual(OriginTy);
  908. PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
  909. PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
  910. IntptrTy = DL.getIntPtrType(*Ctx);
  911. ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
  912. ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
  913. Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
  914. DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
  915. /*isVarArg=*/false);
  916. Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
  917. DFSanLoadLabelAndOriginFnTy =
  918. FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
  919. /*isVarArg=*/false);
  920. DFSanUnimplementedFnTy = FunctionType::get(
  921. Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
  922. Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
  923. Type::getInt8PtrTy(*Ctx), IntptrTy};
  924. DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
  925. DFSanSetLabelArgs, /*isVarArg=*/false);
  926. DFSanNonzeroLabelFnTy =
  927. FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
  928. DFSanVarargWrapperFnTy = FunctionType::get(
  929. Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
  930. DFSanConditionalCallbackFnTy =
  931. FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
  932. /*isVarArg=*/false);
  933. Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
  934. DFSanConditionalCallbackOriginFnTy = FunctionType::get(
  935. Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
  936. /*isVarArg=*/false);
  937. DFSanCmpCallbackFnTy =
  938. FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
  939. /*isVarArg=*/false);
  940. DFSanChainOriginFnTy =
  941. FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
  942. Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
  943. DFSanChainOriginIfTaintedFnTy = FunctionType::get(
  944. OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
  945. Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
  946. Int8Ptr, IntptrTy, OriginTy};
  947. DFSanMaybeStoreOriginFnTy = FunctionType::get(
  948. Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
  949. Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
  950. DFSanMemOriginTransferFnTy = FunctionType::get(
  951. Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
  952. Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
  953. DFSanLoadStoreCallbackFnTy =
  954. FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
  955. /*isVarArg=*/false);
  956. Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
  957. DFSanMemTransferCallbackFnTy =
  958. FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
  959. /*isVarArg=*/false);
  960. ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
  961. OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
  962. return true;
  963. }
  964. bool DataFlowSanitizer::isInstrumented(const Function *F) {
  965. return !ABIList.isIn(*F, "uninstrumented");
  966. }
  967. bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
  968. return !ABIList.isIn(*GA, "uninstrumented");
  969. }
  970. bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
  971. return ABIList.isIn(*F, "force_zero_labels");
  972. }
  973. DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
  974. if (ABIList.isIn(*F, "functional"))
  975. return WK_Functional;
  976. if (ABIList.isIn(*F, "discard"))
  977. return WK_Discard;
  978. if (ABIList.isIn(*F, "custom"))
  979. return WK_Custom;
  980. return WK_Warning;
  981. }
  982. void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
  983. std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
  984. GV->setName(GVName + Suffix);
  985. // Try to change the name of the function in module inline asm. We only do
  986. // this for specific asm directives, currently only ".symver", to try to avoid
  987. // corrupting asm which happens to contain the symbol name as a substring.
  988. // Note that the substitution for .symver assumes that the versioned symbol
  989. // also has an instrumented name.
  990. std::string Asm = GV->getParent()->getModuleInlineAsm();
  991. std::string SearchStr = ".symver " + GVName + ",";
  992. size_t Pos = Asm.find(SearchStr);
  993. if (Pos != std::string::npos) {
  994. Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
  995. Pos = Asm.find("@");
  996. if (Pos == std::string::npos)
  997. report_fatal_error(Twine("unsupported .symver: ", Asm));
  998. Asm.replace(Pos, 1, Suffix + "@");
  999. GV->getParent()->setModuleInlineAsm(Asm);
  1000. }
  1001. }
  1002. Function *
  1003. DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
  1004. GlobalValue::LinkageTypes NewFLink,
  1005. FunctionType *NewFT) {
  1006. FunctionType *FT = F->getFunctionType();
  1007. Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
  1008. NewFName, F->getParent());
  1009. NewF->copyAttributesFrom(F);
  1010. NewF->removeRetAttrs(
  1011. AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
  1012. BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
  1013. if (F->isVarArg()) {
  1014. NewF->removeFnAttr("split-stack");
  1015. CallInst::Create(DFSanVarargWrapperFn,
  1016. IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
  1017. BB);
  1018. new UnreachableInst(*Ctx, BB);
  1019. } else {
  1020. auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
  1021. std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
  1022. CallInst *CI = CallInst::Create(F, Args, "", BB);
  1023. if (FT->getReturnType()->isVoidTy())
  1024. ReturnInst::Create(*Ctx, BB);
  1025. else
  1026. ReturnInst::Create(*Ctx, CI, BB);
  1027. }
  1028. return NewF;
  1029. }
  1030. Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
  1031. StringRef FName) {
  1032. FunctionType *FTT = getTrampolineFunctionType(FT);
  1033. FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
  1034. Function *F = dyn_cast<Function>(C.getCallee());
  1035. if (F && F->isDeclaration()) {
  1036. F->setLinkage(GlobalValue::LinkOnceODRLinkage);
  1037. BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
  1038. std::vector<Value *> Args;
  1039. Function::arg_iterator AI = F->arg_begin() + 1;
  1040. for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
  1041. Args.push_back(&*AI);
  1042. CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
  1043. Type *RetType = FT->getReturnType();
  1044. ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB)
  1045. : ReturnInst::Create(*Ctx, CI, BB);
  1046. // F is called by a wrapped custom function with primitive shadows. So
  1047. // its arguments and return value need conversion.
  1048. DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true,
  1049. /*IsForceZeroLabels=*/false);
  1050. Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
  1051. ++ValAI;
  1052. for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
  1053. Value *Shadow =
  1054. DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
  1055. DFSF.ValShadowMap[&*ValAI] = Shadow;
  1056. }
  1057. Function::arg_iterator RetShadowAI = ShadowAI;
  1058. const bool ShouldTrackOrigins = shouldTrackOrigins();
  1059. if (ShouldTrackOrigins) {
  1060. ValAI = F->arg_begin();
  1061. ++ValAI;
  1062. Function::arg_iterator OriginAI = ShadowAI;
  1063. if (!RetType->isVoidTy())
  1064. ++OriginAI;
  1065. for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) {
  1066. DFSF.ValOriginMap[&*ValAI] = &*OriginAI;
  1067. }
  1068. }
  1069. DFSanVisitor(DFSF).visitCallInst(*CI);
  1070. if (!RetType->isVoidTy()) {
  1071. Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
  1072. DFSF.getShadow(RI->getReturnValue()), RI);
  1073. new StoreInst(PrimitiveShadow, &*RetShadowAI, RI);
  1074. if (ShouldTrackOrigins) {
  1075. Value *Origin = DFSF.getOrigin(RI->getReturnValue());
  1076. new StoreInst(Origin, &*std::prev(F->arg_end()), RI);
  1077. }
  1078. }
  1079. }
  1080. return cast<Constant>(C.getCallee());
  1081. }
  1082. // Initialize DataFlowSanitizer runtime functions and declare them in the module
  1083. void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
  1084. {
  1085. AttributeList AL;
  1086. AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
  1087. AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
  1088. AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
  1089. DFSanUnionLoadFn =
  1090. Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
  1091. }
  1092. {
  1093. AttributeList AL;
  1094. AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
  1095. AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
  1096. AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
  1097. DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
  1098. "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
  1099. }
  1100. DFSanUnimplementedFn =
  1101. Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
  1102. {
  1103. AttributeList AL;
  1104. AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
  1105. AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
  1106. DFSanSetLabelFn =
  1107. Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
  1108. }
  1109. DFSanNonzeroLabelFn =
  1110. Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
  1111. DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
  1112. DFSanVarargWrapperFnTy);
  1113. {
  1114. AttributeList AL;
  1115. AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
  1116. AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
  1117. DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
  1118. DFSanChainOriginFnTy, AL);
  1119. }
  1120. {
  1121. AttributeList AL;
  1122. AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
  1123. AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
  1124. AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
  1125. DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
  1126. "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
  1127. }
  1128. DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
  1129. "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
  1130. {
  1131. AttributeList AL;
  1132. AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
  1133. AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
  1134. DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
  1135. "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
  1136. }
  1137. DFSanRuntimeFunctions.insert(
  1138. DFSanUnionLoadFn.getCallee()->stripPointerCasts());
  1139. DFSanRuntimeFunctions.insert(
  1140. DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
  1141. DFSanRuntimeFunctions.insert(
  1142. DFSanUnimplementedFn.getCallee()->stripPointerCasts());
  1143. DFSanRuntimeFunctions.insert(
  1144. DFSanSetLabelFn.getCallee()->stripPointerCasts());
  1145. DFSanRuntimeFunctions.insert(
  1146. DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
  1147. DFSanRuntimeFunctions.insert(
  1148. DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
  1149. DFSanRuntimeFunctions.insert(
  1150. DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
  1151. DFSanRuntimeFunctions.insert(
  1152. DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
  1153. DFSanRuntimeFunctions.insert(
  1154. DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
  1155. DFSanRuntimeFunctions.insert(
  1156. DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
  1157. DFSanRuntimeFunctions.insert(
  1158. DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
  1159. DFSanRuntimeFunctions.insert(
  1160. DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
  1161. DFSanRuntimeFunctions.insert(
  1162. DFSanChainOriginFn.getCallee()->stripPointerCasts());
  1163. DFSanRuntimeFunctions.insert(
  1164. DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
  1165. DFSanRuntimeFunctions.insert(
  1166. DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
  1167. DFSanRuntimeFunctions.insert(
  1168. DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
  1169. }
  1170. // Initializes event callback functions and declare them in the module
  1171. void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
  1172. DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
  1173. DFSanLoadStoreCallbackFnTy);
  1174. DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
  1175. DFSanLoadStoreCallbackFnTy);
  1176. DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
  1177. "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
  1178. DFSanCmpCallbackFn =
  1179. Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
  1180. DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
  1181. "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy);
  1182. DFSanConditionalCallbackOriginFn =
  1183. Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
  1184. DFSanConditionalCallbackOriginFnTy);
  1185. }
  1186. void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
  1187. // These variables can be used:
  1188. // - by the runtime (to discover what the shadow width was, during
  1189. // compilation)
  1190. // - in testing (to avoid hardcoding the shadow width and type but instead
  1191. // extract them by pattern matching)
  1192. Type *IntTy = Type::getInt32Ty(*Ctx);
  1193. (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] {
  1194. return new GlobalVariable(
  1195. M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage,
  1196. ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits");
  1197. });
  1198. (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] {
  1199. return new GlobalVariable(M, IntTy, /*isConstant=*/true,
  1200. GlobalValue::WeakODRLinkage,
  1201. ConstantInt::get(IntTy, ShadowWidthBytes),
  1202. "__dfsan_shadow_width_bytes");
  1203. });
  1204. }
  1205. bool DataFlowSanitizer::runImpl(Module &M) {
  1206. initializeModule(M);
  1207. if (ABIList.isIn(M, "skip"))
  1208. return false;
  1209. const unsigned InitialGlobalSize = M.global_size();
  1210. const unsigned InitialModuleSize = M.size();
  1211. bool Changed = false;
  1212. auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
  1213. Type *Ty) -> Constant * {
  1214. Constant *C = Mod->getOrInsertGlobal(Name, Ty);
  1215. if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
  1216. Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
  1217. G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
  1218. }
  1219. return C;
  1220. };
  1221. // These globals must be kept in sync with the ones in dfsan.cpp.
  1222. ArgTLS =
  1223. GetOrInsertGlobal("__dfsan_arg_tls",
  1224. ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
  1225. RetvalTLS = GetOrInsertGlobal(
  1226. "__dfsan_retval_tls",
  1227. ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
  1228. ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
  1229. ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
  1230. RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
  1231. (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
  1232. Changed = true;
  1233. return new GlobalVariable(
  1234. M, OriginTy, true, GlobalValue::WeakODRLinkage,
  1235. ConstantInt::getSigned(OriginTy,
  1236. shouldTrackOrigins() ? ClTrackOrigins : 0),
  1237. "__dfsan_track_origins");
  1238. });
  1239. injectMetadataGlobals(M);
  1240. initializeCallbackFunctions(M);
  1241. initializeRuntimeFunctions(M);
  1242. std::vector<Function *> FnsToInstrument;
  1243. SmallPtrSet<Function *, 2> FnsWithNativeABI;
  1244. SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
  1245. SmallPtrSet<Constant *, 1> PersonalityFns;
  1246. for (Function &F : M)
  1247. if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F)) {
  1248. FnsToInstrument.push_back(&F);
  1249. if (F.hasPersonalityFn())
  1250. PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
  1251. }
  1252. if (ClIgnorePersonalityRoutine) {
  1253. for (auto *C : PersonalityFns) {
  1254. assert(isa<Function>(C) && "Personality routine is not a function!");
  1255. Function *F = cast<Function>(C);
  1256. if (!isInstrumented(F))
  1257. FnsToInstrument.erase(
  1258. std::remove(FnsToInstrument.begin(), FnsToInstrument.end(), F),
  1259. FnsToInstrument.end());
  1260. }
  1261. }
  1262. // Give function aliases prefixes when necessary, and build wrappers where the
  1263. // instrumentedness is inconsistent.
  1264. for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
  1265. // Don't stop on weak. We assume people aren't playing games with the
  1266. // instrumentedness of overridden weak aliases.
  1267. auto *F = dyn_cast<Function>(GA.getAliaseeObject());
  1268. if (!F)
  1269. continue;
  1270. bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
  1271. if (GAInst && FInst) {
  1272. addGlobalNameSuffix(&GA);
  1273. } else if (GAInst != FInst) {
  1274. // Non-instrumented alias of an instrumented function, or vice versa.
  1275. // Replace the alias with a native-ABI wrapper of the aliasee. The pass
  1276. // below will take care of instrumenting it.
  1277. Function *NewF =
  1278. buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
  1279. GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
  1280. NewF->takeName(&GA);
  1281. GA.eraseFromParent();
  1282. FnsToInstrument.push_back(NewF);
  1283. }
  1284. }
  1285. ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
  1286. .addAttribute(Attribute::ReadNone);
  1287. // First, change the ABI of every function in the module. ABI-listed
  1288. // functions keep their original ABI and get a wrapper function.
  1289. for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
  1290. FE = FnsToInstrument.end();
  1291. FI != FE; ++FI) {
  1292. Function &F = **FI;
  1293. FunctionType *FT = F.getFunctionType();
  1294. bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
  1295. FT->getReturnType()->isVoidTy());
  1296. if (isInstrumented(&F)) {
  1297. if (isForceZeroLabels(&F))
  1298. FnsWithForceZeroLabel.insert(&F);
  1299. // Instrumented functions get a '.dfsan' suffix. This allows us to more
  1300. // easily identify cases of mismatching ABIs. This naming scheme is
  1301. // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
  1302. addGlobalNameSuffix(&F);
  1303. } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
  1304. // Build a wrapper function for F. The wrapper simply calls F, and is
  1305. // added to FnsToInstrument so that any instrumentation according to its
  1306. // WrapperKind is done in the second pass below.
  1307. // If the function being wrapped has local linkage, then preserve the
  1308. // function's linkage in the wrapper function.
  1309. GlobalValue::LinkageTypes WrapperLinkage =
  1310. F.hasLocalLinkage() ? F.getLinkage()
  1311. : GlobalValue::LinkOnceODRLinkage;
  1312. Function *NewF = buildWrapperFunction(
  1313. &F,
  1314. (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
  1315. std::string(F.getName()),
  1316. WrapperLinkage, FT);
  1317. NewF->removeFnAttrs(ReadOnlyNoneAttrs);
  1318. Value *WrappedFnCst =
  1319. ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
  1320. F.replaceAllUsesWith(WrappedFnCst);
  1321. UnwrappedFnMap[WrappedFnCst] = &F;
  1322. *FI = NewF;
  1323. if (!F.isDeclaration()) {
  1324. // This function is probably defining an interposition of an
  1325. // uninstrumented function and hence needs to keep the original ABI.
  1326. // But any functions it may call need to use the instrumented ABI, so
  1327. // we instrument it in a mode which preserves the original ABI.
  1328. FnsWithNativeABI.insert(&F);
  1329. // This code needs to rebuild the iterators, as they may be invalidated
  1330. // by the push_back, taking care that the new range does not include
  1331. // any functions added by this code.
  1332. size_t N = FI - FnsToInstrument.begin(),
  1333. Count = FE - FnsToInstrument.begin();
  1334. FnsToInstrument.push_back(&F);
  1335. FI = FnsToInstrument.begin() + N;
  1336. FE = FnsToInstrument.begin() + Count;
  1337. }
  1338. // Hopefully, nobody will try to indirectly call a vararg
  1339. // function... yet.
  1340. } else if (FT->isVarArg()) {
  1341. UnwrappedFnMap[&F] = &F;
  1342. *FI = nullptr;
  1343. }
  1344. }
  1345. for (Function *F : FnsToInstrument) {
  1346. if (!F || F->isDeclaration())
  1347. continue;
  1348. removeUnreachableBlocks(*F);
  1349. DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
  1350. FnsWithForceZeroLabel.count(F));
  1351. // DFSanVisitor may create new basic blocks, which confuses df_iterator.
  1352. // Build a copy of the list before iterating over it.
  1353. SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
  1354. for (BasicBlock *BB : BBList) {
  1355. Instruction *Inst = &BB->front();
  1356. while (true) {
  1357. // DFSanVisitor may split the current basic block, changing the current
  1358. // instruction's next pointer and moving the next instruction to the
  1359. // tail block from which we should continue.
  1360. Instruction *Next = Inst->getNextNode();
  1361. // DFSanVisitor may delete Inst, so keep track of whether it was a
  1362. // terminator.
  1363. bool IsTerminator = Inst->isTerminator();
  1364. if (!DFSF.SkipInsts.count(Inst))
  1365. DFSanVisitor(DFSF).visit(Inst);
  1366. if (IsTerminator)
  1367. break;
  1368. Inst = Next;
  1369. }
  1370. }
  1371. // We will not necessarily be able to compute the shadow for every phi node
  1372. // until we have visited every block. Therefore, the code that handles phi
  1373. // nodes adds them to the PHIFixups list so that they can be properly
  1374. // handled here.
  1375. for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
  1376. for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
  1377. ++Val) {
  1378. P.ShadowPhi->setIncomingValue(
  1379. Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
  1380. if (P.OriginPhi)
  1381. P.OriginPhi->setIncomingValue(
  1382. Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
  1383. }
  1384. }
  1385. // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
  1386. // places (i.e. instructions in basic blocks we haven't even begun visiting
  1387. // yet). To make our life easier, do this work in a pass after the main
  1388. // instrumentation.
  1389. if (ClDebugNonzeroLabels) {
  1390. for (Value *V : DFSF.NonZeroChecks) {
  1391. Instruction *Pos;
  1392. if (Instruction *I = dyn_cast<Instruction>(V))
  1393. Pos = I->getNextNode();
  1394. else
  1395. Pos = &DFSF.F->getEntryBlock().front();
  1396. while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
  1397. Pos = Pos->getNextNode();
  1398. IRBuilder<> IRB(Pos);
  1399. Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
  1400. Value *Ne =
  1401. IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
  1402. BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
  1403. Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
  1404. IRBuilder<> ThenIRB(BI);
  1405. ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
  1406. }
  1407. }
  1408. }
  1409. return Changed || !FnsToInstrument.empty() ||
  1410. M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
  1411. }
  1412. Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
  1413. Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
  1414. if (ArgOffset)
  1415. Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
  1416. return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
  1417. "_dfsarg");
  1418. }
  1419. Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
  1420. return IRB.CreatePointerCast(
  1421. DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
  1422. }
  1423. Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
  1424. Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
  1425. return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
  1426. "_dfsarg_o");
  1427. }
  1428. Value *DFSanFunction::getOrigin(Value *V) {
  1429. assert(DFS.shouldTrackOrigins());
  1430. if (!isa<Argument>(V) && !isa<Instruction>(V))
  1431. return DFS.ZeroOrigin;
  1432. Value *&Origin = ValOriginMap[V];
  1433. if (!Origin) {
  1434. if (Argument *A = dyn_cast<Argument>(V)) {
  1435. if (IsNativeABI)
  1436. return DFS.ZeroOrigin;
  1437. if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
  1438. Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
  1439. IRBuilder<> IRB(ArgOriginTLSPos);
  1440. Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
  1441. Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
  1442. } else {
  1443. // Overflow
  1444. Origin = DFS.ZeroOrigin;
  1445. }
  1446. } else {
  1447. Origin = DFS.ZeroOrigin;
  1448. }
  1449. }
  1450. return Origin;
  1451. }
  1452. void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
  1453. if (!DFS.shouldTrackOrigins())
  1454. return;
  1455. assert(!ValOriginMap.count(I));
  1456. assert(Origin->getType() == DFS.OriginTy);
  1457. ValOriginMap[I] = Origin;
  1458. }
  1459. Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
  1460. unsigned ArgOffset = 0;
  1461. const DataLayout &DL = F->getParent()->getDataLayout();
  1462. for (auto &FArg : F->args()) {
  1463. if (!FArg.getType()->isSized()) {
  1464. if (A == &FArg)
  1465. break;
  1466. continue;
  1467. }
  1468. unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
  1469. if (A != &FArg) {
  1470. ArgOffset += alignTo(Size, ShadowTLSAlignment);
  1471. if (ArgOffset > ArgTLSSize)
  1472. break; // ArgTLS overflows, uses a zero shadow.
  1473. continue;
  1474. }
  1475. if (ArgOffset + Size > ArgTLSSize)
  1476. break; // ArgTLS overflows, uses a zero shadow.
  1477. Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
  1478. IRBuilder<> IRB(ArgTLSPos);
  1479. Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
  1480. return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
  1481. ShadowTLSAlignment);
  1482. }
  1483. return DFS.getZeroShadow(A);
  1484. }
  1485. Value *DFSanFunction::getShadow(Value *V) {
  1486. if (!isa<Argument>(V) && !isa<Instruction>(V))
  1487. return DFS.getZeroShadow(V);
  1488. if (IsForceZeroLabels)
  1489. return DFS.getZeroShadow(V);
  1490. Value *&Shadow = ValShadowMap[V];
  1491. if (!Shadow) {
  1492. if (Argument *A = dyn_cast<Argument>(V)) {
  1493. if (IsNativeABI)
  1494. return DFS.getZeroShadow(V);
  1495. Shadow = getShadowForTLSArgument(A);
  1496. NonZeroChecks.push_back(Shadow);
  1497. } else {
  1498. Shadow = DFS.getZeroShadow(V);
  1499. }
  1500. }
  1501. return Shadow;
  1502. }
  1503. void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
  1504. assert(!ValShadowMap.count(I));
  1505. ValShadowMap[I] = Shadow;
  1506. }
  1507. /// Compute the integer shadow offset that corresponds to a given
  1508. /// application address.
  1509. ///
  1510. /// Offset = (Addr & ~AndMask) ^ XorMask
  1511. Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
  1512. assert(Addr != RetvalTLS && "Reinstrumenting?");
  1513. Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
  1514. uint64_t AndMask = MapParams->AndMask;
  1515. if (AndMask)
  1516. OffsetLong =
  1517. IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
  1518. uint64_t XorMask = MapParams->XorMask;
  1519. if (XorMask)
  1520. OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
  1521. return OffsetLong;
  1522. }
  1523. std::pair<Value *, Value *>
  1524. DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
  1525. Instruction *Pos) {
  1526. // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
  1527. IRBuilder<> IRB(Pos);
  1528. Value *ShadowOffset = getShadowOffset(Addr, IRB);
  1529. Value *ShadowLong = ShadowOffset;
  1530. uint64_t ShadowBase = MapParams->ShadowBase;
  1531. if (ShadowBase != 0) {
  1532. ShadowLong =
  1533. IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
  1534. }
  1535. IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
  1536. Value *ShadowPtr =
  1537. IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
  1538. Value *OriginPtr = nullptr;
  1539. if (shouldTrackOrigins()) {
  1540. Value *OriginLong = ShadowOffset;
  1541. uint64_t OriginBase = MapParams->OriginBase;
  1542. if (OriginBase != 0)
  1543. OriginLong =
  1544. IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
  1545. const Align Alignment = llvm::assumeAligned(InstAlignment.value());
  1546. // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
  1547. // So Mask is unnecessary.
  1548. if (Alignment < MinOriginAlignment) {
  1549. uint64_t Mask = MinOriginAlignment.value() - 1;
  1550. OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
  1551. }
  1552. OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
  1553. }
  1554. return std::make_pair(ShadowPtr, OriginPtr);
  1555. }
  1556. Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
  1557. Value *ShadowOffset) {
  1558. IRBuilder<> IRB(Pos);
  1559. return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
  1560. }
  1561. Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
  1562. IRBuilder<> IRB(Pos);
  1563. Value *ShadowOffset = getShadowOffset(Addr, IRB);
  1564. return getShadowAddress(Addr, Pos, ShadowOffset);
  1565. }
  1566. Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
  1567. Instruction *Pos) {
  1568. Value *PrimitiveValue = combineShadows(V1, V2, Pos);
  1569. return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
  1570. }
  1571. // Generates IR to compute the union of the two given shadows, inserting it
  1572. // before Pos. The combined value is with primitive type.
  1573. Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
  1574. if (DFS.isZeroShadow(V1))
  1575. return collapseToPrimitiveShadow(V2, Pos);
  1576. if (DFS.isZeroShadow(V2))
  1577. return collapseToPrimitiveShadow(V1, Pos);
  1578. if (V1 == V2)
  1579. return collapseToPrimitiveShadow(V1, Pos);
  1580. auto V1Elems = ShadowElements.find(V1);
  1581. auto V2Elems = ShadowElements.find(V2);
  1582. if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
  1583. if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
  1584. V2Elems->second.begin(), V2Elems->second.end())) {
  1585. return collapseToPrimitiveShadow(V1, Pos);
  1586. }
  1587. if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
  1588. V1Elems->second.begin(), V1Elems->second.end())) {
  1589. return collapseToPrimitiveShadow(V2, Pos);
  1590. }
  1591. } else if (V1Elems != ShadowElements.end()) {
  1592. if (V1Elems->second.count(V2))
  1593. return collapseToPrimitiveShadow(V1, Pos);
  1594. } else if (V2Elems != ShadowElements.end()) {
  1595. if (V2Elems->second.count(V1))
  1596. return collapseToPrimitiveShadow(V2, Pos);
  1597. }
  1598. auto Key = std::make_pair(V1, V2);
  1599. if (V1 > V2)
  1600. std::swap(Key.first, Key.second);
  1601. CachedShadow &CCS = CachedShadows[Key];
  1602. if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
  1603. return CCS.Shadow;
  1604. // Converts inputs shadows to shadows with primitive types.
  1605. Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
  1606. Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
  1607. IRBuilder<> IRB(Pos);
  1608. CCS.Block = Pos->getParent();
  1609. CCS.Shadow = IRB.CreateOr(PV1, PV2);
  1610. std::set<Value *> UnionElems;
  1611. if (V1Elems != ShadowElements.end()) {
  1612. UnionElems = V1Elems->second;
  1613. } else {
  1614. UnionElems.insert(V1);
  1615. }
  1616. if (V2Elems != ShadowElements.end()) {
  1617. UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
  1618. } else {
  1619. UnionElems.insert(V2);
  1620. }
  1621. ShadowElements[CCS.Shadow] = std::move(UnionElems);
  1622. return CCS.Shadow;
  1623. }
  1624. // A convenience function which folds the shadows of each of the operands
  1625. // of the provided instruction Inst, inserting the IR before Inst. Returns
  1626. // the computed union Value.
  1627. Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
  1628. if (Inst->getNumOperands() == 0)
  1629. return DFS.getZeroShadow(Inst);
  1630. Value *Shadow = getShadow(Inst->getOperand(0));
  1631. for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
  1632. Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
  1633. return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
  1634. }
  1635. void DFSanVisitor::visitInstOperands(Instruction &I) {
  1636. Value *CombinedShadow = DFSF.combineOperandShadows(&I);
  1637. DFSF.setShadow(&I, CombinedShadow);
  1638. visitInstOperandOrigins(I);
  1639. }
  1640. Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
  1641. const std::vector<Value *> &Origins,
  1642. Instruction *Pos, ConstantInt *Zero) {
  1643. assert(Shadows.size() == Origins.size());
  1644. size_t Size = Origins.size();
  1645. if (Size == 0)
  1646. return DFS.ZeroOrigin;
  1647. Value *Origin = nullptr;
  1648. if (!Zero)
  1649. Zero = DFS.ZeroPrimitiveShadow;
  1650. for (size_t I = 0; I != Size; ++I) {
  1651. Value *OpOrigin = Origins[I];
  1652. Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
  1653. if (ConstOpOrigin && ConstOpOrigin->isNullValue())
  1654. continue;
  1655. if (!Origin) {
  1656. Origin = OpOrigin;
  1657. continue;
  1658. }
  1659. Value *OpShadow = Shadows[I];
  1660. Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
  1661. IRBuilder<> IRB(Pos);
  1662. Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
  1663. Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
  1664. }
  1665. return Origin ? Origin : DFS.ZeroOrigin;
  1666. }
  1667. Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
  1668. size_t Size = Inst->getNumOperands();
  1669. std::vector<Value *> Shadows(Size);
  1670. std::vector<Value *> Origins(Size);
  1671. for (unsigned I = 0; I != Size; ++I) {
  1672. Shadows[I] = getShadow(Inst->getOperand(I));
  1673. Origins[I] = getOrigin(Inst->getOperand(I));
  1674. }
  1675. return combineOrigins(Shadows, Origins, Inst);
  1676. }
  1677. void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
  1678. if (!DFSF.DFS.shouldTrackOrigins())
  1679. return;
  1680. Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
  1681. DFSF.setOrigin(&I, CombinedOrigin);
  1682. }
  1683. Align DFSanFunction::getShadowAlign(Align InstAlignment) {
  1684. const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
  1685. return Align(Alignment.value() * DFS.ShadowWidthBytes);
  1686. }
  1687. Align DFSanFunction::getOriginAlign(Align InstAlignment) {
  1688. const Align Alignment = llvm::assumeAligned(InstAlignment.value());
  1689. return Align(std::max(MinOriginAlignment, Alignment));
  1690. }
  1691. bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
  1692. Align InstAlignment) {
  1693. // When enabling tracking load instructions, we always use
  1694. // __dfsan_load_label_and_origin to reduce code size.
  1695. if (ClTrackOrigins == 2)
  1696. return true;
  1697. assert(Size != 0);
  1698. // * if Size == 1, it is sufficient to load its origin aligned at 4.
  1699. // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
  1700. // load its origin aligned at 4. If not, although origins may be lost, it
  1701. // should not happen very often.
  1702. // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
  1703. // Size % 4 == 0, it is more efficient to load origins without callbacks.
  1704. // * Otherwise we use __dfsan_load_label_and_origin.
  1705. // This should ensure that common cases run efficiently.
  1706. if (Size <= 2)
  1707. return false;
  1708. const Align Alignment = llvm::assumeAligned(InstAlignment.value());
  1709. return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
  1710. }
  1711. Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
  1712. Value **OriginAddr) {
  1713. IRBuilder<> IRB(Pos);
  1714. *OriginAddr =
  1715. IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
  1716. return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
  1717. }
  1718. std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
  1719. Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
  1720. Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
  1721. const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
  1722. const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
  1723. assert(Size >= 4 && "Not large enough load size for fast path!");
  1724. // Used for origin tracking.
  1725. std::vector<Value *> Shadows;
  1726. std::vector<Value *> Origins;
  1727. // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
  1728. // but this function is only used in a subset of cases that make it possible
  1729. // to optimize the instrumentation.
  1730. //
  1731. // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
  1732. // per byte) is either:
  1733. // - a multiple of 8 (common)
  1734. // - equal to 4 (only for load32)
  1735. //
  1736. // For the second case, we can fit the wide shadow in a 32-bit integer. In all
  1737. // other cases, we use a 64-bit integer to hold the wide shadow.
  1738. Type *WideShadowTy =
  1739. ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
  1740. IRBuilder<> IRB(Pos);
  1741. Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
  1742. Value *CombinedWideShadow =
  1743. IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
  1744. unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
  1745. const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
  1746. auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
  1747. if (BytesPerWideShadow > 4) {
  1748. assert(BytesPerWideShadow == 8);
  1749. // The wide shadow relates to two origin pointers: one for the first four
  1750. // application bytes, and one for the latest four. We use a left shift to
  1751. // get just the shadow bytes that correspond to the first origin pointer,
  1752. // and then the entire shadow for the second origin pointer (which will be
  1753. // chosen by combineOrigins() iff the least-significant half of the wide
  1754. // shadow was empty but the other half was not).
  1755. Value *WideShadowLo = IRB.CreateShl(
  1756. WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
  1757. Shadows.push_back(WideShadow);
  1758. Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
  1759. Shadows.push_back(WideShadowLo);
  1760. Origins.push_back(Origin);
  1761. } else {
  1762. Shadows.push_back(WideShadow);
  1763. Origins.push_back(Origin);
  1764. }
  1765. };
  1766. if (ShouldTrackOrigins)
  1767. AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
  1768. // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
  1769. // then OR individual shadows within the combined WideShadow by binary ORing.
  1770. // This is fewer instructions than ORing shadows individually, since it
  1771. // needs logN shift/or instructions (N being the bytes of the combined wide
  1772. // shadow).
  1773. for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
  1774. ByteOfs += BytesPerWideShadow) {
  1775. WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
  1776. ConstantInt::get(DFS.IntptrTy, 1));
  1777. Value *NextWideShadow =
  1778. IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
  1779. CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
  1780. if (ShouldTrackOrigins) {
  1781. Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
  1782. AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
  1783. }
  1784. }
  1785. for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
  1786. Width >>= 1) {
  1787. Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
  1788. CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
  1789. }
  1790. return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
  1791. ShouldTrackOrigins
  1792. ? combineOrigins(Shadows, Origins, Pos,
  1793. ConstantInt::getSigned(IRB.getInt64Ty(), 0))
  1794. : DFS.ZeroOrigin};
  1795. }
  1796. std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
  1797. Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
  1798. const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
  1799. // Non-escaped loads.
  1800. if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
  1801. const auto SI = AllocaShadowMap.find(AI);
  1802. if (SI != AllocaShadowMap.end()) {
  1803. IRBuilder<> IRB(Pos);
  1804. Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
  1805. const auto OI = AllocaOriginMap.find(AI);
  1806. assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
  1807. return {ShadowLI, ShouldTrackOrigins
  1808. ? IRB.CreateLoad(DFS.OriginTy, OI->second)
  1809. : nullptr};
  1810. }
  1811. }
  1812. // Load from constant addresses.
  1813. SmallVector<const Value *, 2> Objs;
  1814. getUnderlyingObjects(Addr, Objs);
  1815. bool AllConstants = true;
  1816. for (const Value *Obj : Objs) {
  1817. if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
  1818. continue;
  1819. if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
  1820. continue;
  1821. AllConstants = false;
  1822. break;
  1823. }
  1824. if (AllConstants)
  1825. return {DFS.ZeroPrimitiveShadow,
  1826. ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
  1827. if (Size == 0)
  1828. return {DFS.ZeroPrimitiveShadow,
  1829. ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
  1830. // Use callback to load if this is not an optimizable case for origin
  1831. // tracking.
  1832. if (ShouldTrackOrigins &&
  1833. useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
  1834. IRBuilder<> IRB(Pos);
  1835. CallInst *Call =
  1836. IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
  1837. {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
  1838. ConstantInt::get(DFS.IntptrTy, Size)});
  1839. Call->addRetAttr(Attribute::ZExt);
  1840. return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
  1841. DFS.PrimitiveShadowTy),
  1842. IRB.CreateTrunc(Call, DFS.OriginTy)};
  1843. }
  1844. // Other cases that support loading shadows or origins in a fast way.
  1845. Value *ShadowAddr, *OriginAddr;
  1846. std::tie(ShadowAddr, OriginAddr) =
  1847. DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
  1848. const Align ShadowAlign = getShadowAlign(InstAlignment);
  1849. const Align OriginAlign = getOriginAlign(InstAlignment);
  1850. Value *Origin = nullptr;
  1851. if (ShouldTrackOrigins) {
  1852. IRBuilder<> IRB(Pos);
  1853. Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
  1854. }
  1855. // When the byte size is small enough, we can load the shadow directly with
  1856. // just a few instructions.
  1857. switch (Size) {
  1858. case 1: {
  1859. LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
  1860. LI->setAlignment(ShadowAlign);
  1861. return {LI, Origin};
  1862. }
  1863. case 2: {
  1864. IRBuilder<> IRB(Pos);
  1865. Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
  1866. ConstantInt::get(DFS.IntptrTy, 1));
  1867. Value *Load =
  1868. IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
  1869. Value *Load1 =
  1870. IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
  1871. return {combineShadows(Load, Load1, Pos), Origin};
  1872. }
  1873. }
  1874. bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
  1875. if (HasSizeForFastPath)
  1876. return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
  1877. OriginAlign, Origin, Pos);
  1878. IRBuilder<> IRB(Pos);
  1879. CallInst *FallbackCall = IRB.CreateCall(
  1880. DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
  1881. FallbackCall->addRetAttr(Attribute::ZExt);
  1882. return {FallbackCall, Origin};
  1883. }
  1884. std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
  1885. uint64_t Size,
  1886. Align InstAlignment,
  1887. Instruction *Pos) {
  1888. Value *PrimitiveShadow, *Origin;
  1889. std::tie(PrimitiveShadow, Origin) =
  1890. loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
  1891. if (DFS.shouldTrackOrigins()) {
  1892. if (ClTrackOrigins == 2) {
  1893. IRBuilder<> IRB(Pos);
  1894. auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
  1895. if (!ConstantShadow || !ConstantShadow->isZeroValue())
  1896. Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
  1897. }
  1898. }
  1899. return {PrimitiveShadow, Origin};
  1900. }
  1901. static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
  1902. switch (AO) {
  1903. case AtomicOrdering::NotAtomic:
  1904. return AtomicOrdering::NotAtomic;
  1905. case AtomicOrdering::Unordered:
  1906. case AtomicOrdering::Monotonic:
  1907. case AtomicOrdering::Acquire:
  1908. return AtomicOrdering::Acquire;
  1909. case AtomicOrdering::Release:
  1910. case AtomicOrdering::AcquireRelease:
  1911. return AtomicOrdering::AcquireRelease;
  1912. case AtomicOrdering::SequentiallyConsistent:
  1913. return AtomicOrdering::SequentiallyConsistent;
  1914. }
  1915. llvm_unreachable("Unknown ordering");
  1916. }
  1917. void DFSanVisitor::visitLoadInst(LoadInst &LI) {
  1918. auto &DL = LI.getModule()->getDataLayout();
  1919. uint64_t Size = DL.getTypeStoreSize(LI.getType());
  1920. if (Size == 0) {
  1921. DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
  1922. DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
  1923. return;
  1924. }
  1925. // When an application load is atomic, increase atomic ordering between
  1926. // atomic application loads and stores to ensure happen-before order; load
  1927. // shadow data after application data; store zero shadow data before
  1928. // application data. This ensure shadow loads return either labels of the
  1929. // initial application data or zeros.
  1930. if (LI.isAtomic())
  1931. LI.setOrdering(addAcquireOrdering(LI.getOrdering()));
  1932. Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
  1933. std::vector<Value *> Shadows;
  1934. std::vector<Value *> Origins;
  1935. Value *PrimitiveShadow, *Origin;
  1936. std::tie(PrimitiveShadow, Origin) =
  1937. DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
  1938. const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
  1939. if (ShouldTrackOrigins) {
  1940. Shadows.push_back(PrimitiveShadow);
  1941. Origins.push_back(Origin);
  1942. }
  1943. if (ClCombinePointerLabelsOnLoad) {
  1944. Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
  1945. PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
  1946. if (ShouldTrackOrigins) {
  1947. Shadows.push_back(PtrShadow);
  1948. Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
  1949. }
  1950. }
  1951. if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
  1952. DFSF.NonZeroChecks.push_back(PrimitiveShadow);
  1953. Value *Shadow =
  1954. DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
  1955. DFSF.setShadow(&LI, Shadow);
  1956. if (ShouldTrackOrigins) {
  1957. DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
  1958. }
  1959. if (ClEventCallbacks) {
  1960. IRBuilder<> IRB(Pos);
  1961. Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
  1962. IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
  1963. }
  1964. }
  1965. Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
  1966. IRBuilder<> &IRB) {
  1967. assert(DFS.shouldTrackOrigins());
  1968. return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
  1969. }
  1970. Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
  1971. if (!DFS.shouldTrackOrigins())
  1972. return V;
  1973. return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
  1974. }
  1975. Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
  1976. const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
  1977. const DataLayout &DL = F->getParent()->getDataLayout();
  1978. unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
  1979. if (IntptrSize == OriginSize)
  1980. return Origin;
  1981. assert(IntptrSize == OriginSize * 2);
  1982. Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
  1983. return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
  1984. }
  1985. void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
  1986. Value *StoreOriginAddr,
  1987. uint64_t StoreOriginSize, Align Alignment) {
  1988. const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
  1989. const DataLayout &DL = F->getParent()->getDataLayout();
  1990. const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
  1991. unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
  1992. assert(IntptrAlignment >= MinOriginAlignment);
  1993. assert(IntptrSize >= OriginSize);
  1994. unsigned Ofs = 0;
  1995. Align CurrentAlignment = Alignment;
  1996. if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
  1997. Value *IntptrOrigin = originToIntptr(IRB, Origin);
  1998. Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
  1999. StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
  2000. for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
  2001. Value *Ptr =
  2002. I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
  2003. : IntptrStoreOriginPtr;
  2004. IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
  2005. Ofs += IntptrSize / OriginSize;
  2006. CurrentAlignment = IntptrAlignment;
  2007. }
  2008. }
  2009. for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
  2010. ++I) {
  2011. Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
  2012. : StoreOriginAddr;
  2013. IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
  2014. CurrentAlignment = MinOriginAlignment;
  2015. }
  2016. }
  2017. Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
  2018. const Twine &Name) {
  2019. Type *VTy = V->getType();
  2020. assert(VTy->isIntegerTy());
  2021. if (VTy->getIntegerBitWidth() == 1)
  2022. // Just converting a bool to a bool, so do nothing.
  2023. return V;
  2024. return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
  2025. }
  2026. void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
  2027. Value *Shadow, Value *Origin,
  2028. Value *StoreOriginAddr, Align InstAlignment) {
  2029. // Do not write origins for zero shadows because we do not trace origins for
  2030. // untainted sinks.
  2031. const Align OriginAlignment = getOriginAlign(InstAlignment);
  2032. Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
  2033. IRBuilder<> IRB(Pos);
  2034. if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
  2035. if (!ConstantShadow->isZeroValue())
  2036. paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
  2037. OriginAlignment);
  2038. return;
  2039. }
  2040. if (shouldInstrumentWithCall()) {
  2041. IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
  2042. {CollapsedShadow,
  2043. IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
  2044. ConstantInt::get(DFS.IntptrTy, Size), Origin});
  2045. } else {
  2046. Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
  2047. Instruction *CheckTerm = SplitBlockAndInsertIfThen(
  2048. Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
  2049. IRBuilder<> IRBNew(CheckTerm);
  2050. paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
  2051. OriginAlignment);
  2052. ++NumOriginStores;
  2053. }
  2054. }
  2055. void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
  2056. Align ShadowAlign,
  2057. Instruction *Pos) {
  2058. IRBuilder<> IRB(Pos);
  2059. IntegerType *ShadowTy =
  2060. IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
  2061. Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
  2062. Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
  2063. Value *ExtShadowAddr =
  2064. IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
  2065. IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
  2066. // Do not write origins for 0 shadows because we do not trace origins for
  2067. // untainted sinks.
  2068. }
  2069. void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
  2070. Align InstAlignment,
  2071. Value *PrimitiveShadow,
  2072. Value *Origin,
  2073. Instruction *Pos) {
  2074. const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
  2075. if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
  2076. const auto SI = AllocaShadowMap.find(AI);
  2077. if (SI != AllocaShadowMap.end()) {
  2078. IRBuilder<> IRB(Pos);
  2079. IRB.CreateStore(PrimitiveShadow, SI->second);
  2080. // Do not write origins for 0 shadows because we do not trace origins for
  2081. // untainted sinks.
  2082. if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
  2083. const auto OI = AllocaOriginMap.find(AI);
  2084. assert(OI != AllocaOriginMap.end() && Origin);
  2085. IRB.CreateStore(Origin, OI->second);
  2086. }
  2087. return;
  2088. }
  2089. }
  2090. const Align ShadowAlign = getShadowAlign(InstAlignment);
  2091. if (DFS.isZeroShadow(PrimitiveShadow)) {
  2092. storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
  2093. return;
  2094. }
  2095. IRBuilder<> IRB(Pos);
  2096. Value *ShadowAddr, *OriginAddr;
  2097. std::tie(ShadowAddr, OriginAddr) =
  2098. DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
  2099. const unsigned ShadowVecSize = 8;
  2100. assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
  2101. "Shadow vector is too large!");
  2102. uint64_t Offset = 0;
  2103. uint64_t LeftSize = Size;
  2104. if (LeftSize >= ShadowVecSize) {
  2105. auto *ShadowVecTy =
  2106. FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
  2107. Value *ShadowVec = UndefValue::get(ShadowVecTy);
  2108. for (unsigned I = 0; I != ShadowVecSize; ++I) {
  2109. ShadowVec = IRB.CreateInsertElement(
  2110. ShadowVec, PrimitiveShadow,
  2111. ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
  2112. }
  2113. Value *ShadowVecAddr =
  2114. IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
  2115. do {
  2116. Value *CurShadowVecAddr =
  2117. IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
  2118. IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
  2119. LeftSize -= ShadowVecSize;
  2120. ++Offset;
  2121. } while (LeftSize >= ShadowVecSize);
  2122. Offset *= ShadowVecSize;
  2123. }
  2124. while (LeftSize > 0) {
  2125. Value *CurShadowAddr =
  2126. IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
  2127. IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
  2128. --LeftSize;
  2129. ++Offset;
  2130. }
  2131. if (ShouldTrackOrigins) {
  2132. storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
  2133. InstAlignment);
  2134. }
  2135. }
  2136. static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {
  2137. switch (AO) {
  2138. case AtomicOrdering::NotAtomic:
  2139. return AtomicOrdering::NotAtomic;
  2140. case AtomicOrdering::Unordered:
  2141. case AtomicOrdering::Monotonic:
  2142. case AtomicOrdering::Release:
  2143. return AtomicOrdering::Release;
  2144. case AtomicOrdering::Acquire:
  2145. case AtomicOrdering::AcquireRelease:
  2146. return AtomicOrdering::AcquireRelease;
  2147. case AtomicOrdering::SequentiallyConsistent:
  2148. return AtomicOrdering::SequentiallyConsistent;
  2149. }
  2150. llvm_unreachable("Unknown ordering");
  2151. }
  2152. void DFSanVisitor::visitStoreInst(StoreInst &SI) {
  2153. auto &DL = SI.getModule()->getDataLayout();
  2154. Value *Val = SI.getValueOperand();
  2155. uint64_t Size = DL.getTypeStoreSize(Val->getType());
  2156. if (Size == 0)
  2157. return;
  2158. // When an application store is atomic, increase atomic ordering between
  2159. // atomic application loads and stores to ensure happen-before order; load
  2160. // shadow data after application data; store zero shadow data before
  2161. // application data. This ensure shadow loads return either labels of the
  2162. // initial application data or zeros.
  2163. if (SI.isAtomic())
  2164. SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
  2165. const bool ShouldTrackOrigins =
  2166. DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
  2167. std::vector<Value *> Shadows;
  2168. std::vector<Value *> Origins;
  2169. Value *Shadow =
  2170. SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
  2171. if (ShouldTrackOrigins) {
  2172. Shadows.push_back(Shadow);
  2173. Origins.push_back(DFSF.getOrigin(Val));
  2174. }
  2175. Value *PrimitiveShadow;
  2176. if (ClCombinePointerLabelsOnStore) {
  2177. Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
  2178. if (ShouldTrackOrigins) {
  2179. Shadows.push_back(PtrShadow);
  2180. Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
  2181. }
  2182. PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
  2183. } else {
  2184. PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
  2185. }
  2186. Value *Origin = nullptr;
  2187. if (ShouldTrackOrigins)
  2188. Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
  2189. DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
  2190. PrimitiveShadow, Origin, &SI);
  2191. if (ClEventCallbacks) {
  2192. IRBuilder<> IRB(&SI);
  2193. Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
  2194. IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
  2195. }
  2196. }
  2197. void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
  2198. assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
  2199. Value *Val = I.getOperand(1);
  2200. const auto &DL = I.getModule()->getDataLayout();
  2201. uint64_t Size = DL.getTypeStoreSize(Val->getType());
  2202. if (Size == 0)
  2203. return;
  2204. // Conservatively set data at stored addresses and return with zero shadow to
  2205. // prevent shadow data races.
  2206. IRBuilder<> IRB(&I);
  2207. Value *Addr = I.getOperand(0);
  2208. const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
  2209. DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
  2210. DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
  2211. DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
  2212. }
  2213. void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
  2214. visitCASOrRMW(I.getAlign(), I);
  2215. // TODO: The ordering change follows MSan. It is possible not to change
  2216. // ordering because we always set and use 0 shadows.
  2217. I.setOrdering(addReleaseOrdering(I.getOrdering()));
  2218. }
  2219. void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
  2220. visitCASOrRMW(I.getAlign(), I);
  2221. // TODO: The ordering change follows MSan. It is possible not to change
  2222. // ordering because we always set and use 0 shadows.
  2223. I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
  2224. }
  2225. void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
  2226. visitInstOperands(UO);
  2227. }
  2228. void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
  2229. visitInstOperands(BO);
  2230. }
  2231. void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
  2232. // Special case: if this is the bitcast (there is exactly 1 allowed) between
  2233. // a musttail call and a ret, don't instrument. New instructions are not
  2234. // allowed after a musttail call.
  2235. if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
  2236. if (CI->isMustTailCall())
  2237. return;
  2238. visitInstOperands(BCI);
  2239. }
  2240. void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
  2241. void DFSanVisitor::visitCmpInst(CmpInst &CI) {
  2242. visitInstOperands(CI);
  2243. if (ClEventCallbacks) {
  2244. IRBuilder<> IRB(&CI);
  2245. Value *CombinedShadow = DFSF.getShadow(&CI);
  2246. IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
  2247. }
  2248. }
  2249. void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
  2250. // We do not need to track data through LandingPadInst.
  2251. //
  2252. // For the C++ exceptions, if a value is thrown, this value will be stored
  2253. // in a memory location provided by __cxa_allocate_exception(...) (on the
  2254. // throw side) or __cxa_begin_catch(...) (on the catch side).
  2255. // This memory will have a shadow, so with the loads and stores we will be
  2256. // able to propagate labels on data thrown through exceptions, without any
  2257. // special handling of the LandingPadInst.
  2258. //
  2259. // The second element in the pair result of the LandingPadInst is a
  2260. // register value, but it is for a type ID and should never be tainted.
  2261. DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
  2262. DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
  2263. }
  2264. void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
  2265. if (ClCombineOffsetLabelsOnGEP) {
  2266. visitInstOperands(GEPI);
  2267. return;
  2268. }
  2269. // Only propagate shadow/origin of base pointer value but ignore those of
  2270. // offset operands.
  2271. Value *BasePointer = GEPI.getPointerOperand();
  2272. DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
  2273. if (DFSF.DFS.shouldTrackOrigins())
  2274. DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
  2275. }
  2276. void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
  2277. visitInstOperands(I);
  2278. }
  2279. void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
  2280. visitInstOperands(I);
  2281. }
  2282. void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
  2283. visitInstOperands(I);
  2284. }
  2285. void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
  2286. IRBuilder<> IRB(&I);
  2287. Value *Agg = I.getAggregateOperand();
  2288. Value *AggShadow = DFSF.getShadow(Agg);
  2289. Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
  2290. DFSF.setShadow(&I, ResShadow);
  2291. visitInstOperandOrigins(I);
  2292. }
  2293. void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
  2294. IRBuilder<> IRB(&I);
  2295. Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
  2296. Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
  2297. Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
  2298. DFSF.setShadow(&I, Res);
  2299. visitInstOperandOrigins(I);
  2300. }
  2301. void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
  2302. bool AllLoadsStores = true;
  2303. for (User *U : I.users()) {
  2304. if (isa<LoadInst>(U))
  2305. continue;
  2306. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  2307. if (SI->getPointerOperand() == &I)
  2308. continue;
  2309. }
  2310. AllLoadsStores = false;
  2311. break;
  2312. }
  2313. if (AllLoadsStores) {
  2314. IRBuilder<> IRB(&I);
  2315. DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
  2316. if (DFSF.DFS.shouldTrackOrigins()) {
  2317. DFSF.AllocaOriginMap[&I] =
  2318. IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
  2319. }
  2320. }
  2321. DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
  2322. DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
  2323. }
  2324. void DFSanVisitor::visitSelectInst(SelectInst &I) {
  2325. Value *CondShadow = DFSF.getShadow(I.getCondition());
  2326. Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
  2327. Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
  2328. Value *ShadowSel = nullptr;
  2329. const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
  2330. std::vector<Value *> Shadows;
  2331. std::vector<Value *> Origins;
  2332. Value *TrueOrigin =
  2333. ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
  2334. Value *FalseOrigin =
  2335. ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
  2336. DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
  2337. if (isa<VectorType>(I.getCondition()->getType())) {
  2338. ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
  2339. FalseShadow, &I);
  2340. if (ShouldTrackOrigins) {
  2341. Shadows.push_back(TrueShadow);
  2342. Shadows.push_back(FalseShadow);
  2343. Origins.push_back(TrueOrigin);
  2344. Origins.push_back(FalseOrigin);
  2345. }
  2346. } else {
  2347. if (TrueShadow == FalseShadow) {
  2348. ShadowSel = TrueShadow;
  2349. if (ShouldTrackOrigins) {
  2350. Shadows.push_back(TrueShadow);
  2351. Origins.push_back(TrueOrigin);
  2352. }
  2353. } else {
  2354. ShadowSel =
  2355. SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
  2356. if (ShouldTrackOrigins) {
  2357. Shadows.push_back(ShadowSel);
  2358. Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
  2359. FalseOrigin, "", &I));
  2360. }
  2361. }
  2362. }
  2363. DFSF.setShadow(&I, ClTrackSelectControlFlow
  2364. ? DFSF.combineShadowsThenConvert(
  2365. I.getType(), CondShadow, ShadowSel, &I)
  2366. : ShadowSel);
  2367. if (ShouldTrackOrigins) {
  2368. if (ClTrackSelectControlFlow) {
  2369. Shadows.push_back(CondShadow);
  2370. Origins.push_back(DFSF.getOrigin(I.getCondition()));
  2371. }
  2372. DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
  2373. }
  2374. }
  2375. void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
  2376. IRBuilder<> IRB(&I);
  2377. Value *ValShadow = DFSF.getShadow(I.getValue());
  2378. Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
  2379. ? DFSF.getOrigin(I.getValue())
  2380. : DFSF.DFS.ZeroOrigin;
  2381. IRB.CreateCall(
  2382. DFSF.DFS.DFSanSetLabelFn,
  2383. {ValShadow, ValOrigin,
  2384. IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
  2385. IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
  2386. }
  2387. void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
  2388. IRBuilder<> IRB(&I);
  2389. // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
  2390. // need to move origins before moving shadows.
  2391. if (DFSF.DFS.shouldTrackOrigins()) {
  2392. IRB.CreateCall(
  2393. DFSF.DFS.DFSanMemOriginTransferFn,
  2394. {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
  2395. IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
  2396. IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
  2397. }
  2398. Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
  2399. Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
  2400. Value *LenShadow =
  2401. IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
  2402. DFSF.DFS.ShadowWidthBytes));
  2403. Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
  2404. Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
  2405. SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
  2406. auto *MTI = cast<MemTransferInst>(
  2407. IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
  2408. {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
  2409. if (ClPreserveAlignment) {
  2410. MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
  2411. MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
  2412. } else {
  2413. MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
  2414. MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
  2415. }
  2416. if (ClEventCallbacks) {
  2417. IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
  2418. {RawDestShadow,
  2419. IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
  2420. }
  2421. }
  2422. void DFSanVisitor::visitBranchInst(BranchInst &BR) {
  2423. if (!BR.isConditional())
  2424. return;
  2425. DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
  2426. }
  2427. void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
  2428. DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
  2429. }
  2430. static bool isAMustTailRetVal(Value *RetVal) {
  2431. // Tail call may have a bitcast between return.
  2432. if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
  2433. RetVal = I->getOperand(0);
  2434. }
  2435. if (auto *I = dyn_cast<CallInst>(RetVal)) {
  2436. return I->isMustTailCall();
  2437. }
  2438. return false;
  2439. }
  2440. void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
  2441. if (!DFSF.IsNativeABI && RI.getReturnValue()) {
  2442. // Don't emit the instrumentation for musttail call returns.
  2443. if (isAMustTailRetVal(RI.getReturnValue()))
  2444. return;
  2445. Value *S = DFSF.getShadow(RI.getReturnValue());
  2446. IRBuilder<> IRB(&RI);
  2447. Type *RT = DFSF.F->getFunctionType()->getReturnType();
  2448. unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
  2449. if (Size <= RetvalTLSSize) {
  2450. // If the size overflows, stores nothing. At callsite, oversized return
  2451. // shadows are set to zero.
  2452. IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
  2453. }
  2454. if (DFSF.DFS.shouldTrackOrigins()) {
  2455. Value *O = DFSF.getOrigin(RI.getReturnValue());
  2456. IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
  2457. }
  2458. }
  2459. }
  2460. void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
  2461. std::vector<Value *> &Args,
  2462. IRBuilder<> &IRB) {
  2463. FunctionType *FT = F.getFunctionType();
  2464. auto *I = CB.arg_begin();
  2465. // Adds non-variable argument shadows.
  2466. for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
  2467. Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
  2468. // Adds variable argument shadows.
  2469. if (FT->isVarArg()) {
  2470. auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
  2471. CB.arg_size() - FT->getNumParams());
  2472. auto *LabelVAAlloca =
  2473. new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
  2474. "labelva", &DFSF.F->getEntryBlock().front());
  2475. for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
  2476. auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
  2477. IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
  2478. LabelVAPtr);
  2479. }
  2480. Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
  2481. }
  2482. // Adds the return value shadow.
  2483. if (!FT->getReturnType()->isVoidTy()) {
  2484. if (!DFSF.LabelReturnAlloca) {
  2485. DFSF.LabelReturnAlloca = new AllocaInst(
  2486. DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
  2487. "labelreturn", &DFSF.F->getEntryBlock().front());
  2488. }
  2489. Args.push_back(DFSF.LabelReturnAlloca);
  2490. }
  2491. }
  2492. void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
  2493. std::vector<Value *> &Args,
  2494. IRBuilder<> &IRB) {
  2495. FunctionType *FT = F.getFunctionType();
  2496. auto *I = CB.arg_begin();
  2497. // Add non-variable argument origins.
  2498. for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
  2499. Args.push_back(DFSF.getOrigin(*I));
  2500. // Add variable argument origins.
  2501. if (FT->isVarArg()) {
  2502. auto *OriginVATy =
  2503. ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
  2504. auto *OriginVAAlloca =
  2505. new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
  2506. "originva", &DFSF.F->getEntryBlock().front());
  2507. for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
  2508. auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
  2509. IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
  2510. }
  2511. Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
  2512. }
  2513. // Add the return value origin.
  2514. if (!FT->getReturnType()->isVoidTy()) {
  2515. if (!DFSF.OriginReturnAlloca) {
  2516. DFSF.OriginReturnAlloca = new AllocaInst(
  2517. DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
  2518. "originreturn", &DFSF.F->getEntryBlock().front());
  2519. }
  2520. Args.push_back(DFSF.OriginReturnAlloca);
  2521. }
  2522. }
  2523. bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
  2524. IRBuilder<> IRB(&CB);
  2525. switch (DFSF.DFS.getWrapperKind(&F)) {
  2526. case DataFlowSanitizer::WK_Warning:
  2527. CB.setCalledFunction(&F);
  2528. IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
  2529. IRB.CreateGlobalStringPtr(F.getName()));
  2530. DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
  2531. DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
  2532. return true;
  2533. case DataFlowSanitizer::WK_Discard:
  2534. CB.setCalledFunction(&F);
  2535. DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
  2536. DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
  2537. return true;
  2538. case DataFlowSanitizer::WK_Functional:
  2539. CB.setCalledFunction(&F);
  2540. visitInstOperands(CB);
  2541. return true;
  2542. case DataFlowSanitizer::WK_Custom:
  2543. // Don't try to handle invokes of custom functions, it's too complicated.
  2544. // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
  2545. // wrapper.
  2546. CallInst *CI = dyn_cast<CallInst>(&CB);
  2547. if (!CI)
  2548. return false;
  2549. const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
  2550. FunctionType *FT = F.getFunctionType();
  2551. TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
  2552. std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
  2553. CustomFName += F.getName();
  2554. FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
  2555. CustomFName, CustomFn.TransformedType);
  2556. if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
  2557. CustomFn->copyAttributesFrom(&F);
  2558. // Custom functions returning non-void will write to the return label.
  2559. if (!FT->getReturnType()->isVoidTy()) {
  2560. CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
  2561. }
  2562. }
  2563. std::vector<Value *> Args;
  2564. // Adds non-variable arguments.
  2565. auto *I = CB.arg_begin();
  2566. for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
  2567. Type *T = (*I)->getType();
  2568. FunctionType *ParamFT;
  2569. if (isa<PointerType>(T) &&
  2570. (ParamFT = dyn_cast<FunctionType>(T->getPointerElementType()))) {
  2571. std::string TName = "dfst";
  2572. TName += utostr(FT->getNumParams() - N);
  2573. TName += "$";
  2574. TName += F.getName();
  2575. Constant *Trampoline =
  2576. DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
  2577. Args.push_back(Trampoline);
  2578. Args.push_back(
  2579. IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
  2580. } else {
  2581. Args.push_back(*I);
  2582. }
  2583. }
  2584. // Adds shadow arguments.
  2585. const unsigned ShadowArgStart = Args.size();
  2586. addShadowArguments(F, CB, Args, IRB);
  2587. // Adds origin arguments.
  2588. const unsigned OriginArgStart = Args.size();
  2589. if (ShouldTrackOrigins)
  2590. addOriginArguments(F, CB, Args, IRB);
  2591. // Adds variable arguments.
  2592. append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
  2593. CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
  2594. CustomCI->setCallingConv(CI->getCallingConv());
  2595. CustomCI->setAttributes(transformFunctionAttributes(
  2596. CustomFn, CI->getContext(), CI->getAttributes()));
  2597. // Update the parameter attributes of the custom call instruction to
  2598. // zero extend the shadow parameters. This is required for targets
  2599. // which consider PrimitiveShadowTy an illegal type.
  2600. for (unsigned N = 0; N < FT->getNumParams(); N++) {
  2601. const unsigned ArgNo = ShadowArgStart + N;
  2602. if (CustomCI->getArgOperand(ArgNo)->getType() ==
  2603. DFSF.DFS.PrimitiveShadowTy)
  2604. CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
  2605. if (ShouldTrackOrigins) {
  2606. const unsigned OriginArgNo = OriginArgStart + N;
  2607. if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
  2608. DFSF.DFS.OriginTy)
  2609. CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
  2610. }
  2611. }
  2612. // Loads the return value shadow and origin.
  2613. if (!FT->getReturnType()->isVoidTy()) {
  2614. LoadInst *LabelLoad =
  2615. IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
  2616. DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
  2617. FT->getReturnType(), LabelLoad, &CB));
  2618. if (ShouldTrackOrigins) {
  2619. LoadInst *OriginLoad =
  2620. IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
  2621. DFSF.setOrigin(CustomCI, OriginLoad);
  2622. }
  2623. }
  2624. CI->replaceAllUsesWith(CustomCI);
  2625. CI->eraseFromParent();
  2626. return true;
  2627. }
  2628. return false;
  2629. }
  2630. void DFSanVisitor::visitCallBase(CallBase &CB) {
  2631. Function *F = CB.getCalledFunction();
  2632. if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
  2633. visitInstOperands(CB);
  2634. return;
  2635. }
  2636. // Calls to this function are synthesized in wrappers, and we shouldn't
  2637. // instrument them.
  2638. if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
  2639. return;
  2640. DenseMap<Value *, Function *>::iterator UnwrappedFnIt =
  2641. DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
  2642. if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
  2643. if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
  2644. return;
  2645. IRBuilder<> IRB(&CB);
  2646. const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
  2647. FunctionType *FT = CB.getFunctionType();
  2648. const DataLayout &DL = getDataLayout();
  2649. // Stores argument shadows.
  2650. unsigned ArgOffset = 0;
  2651. for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
  2652. if (ShouldTrackOrigins) {
  2653. // Ignore overflowed origins
  2654. Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
  2655. if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
  2656. !DFSF.DFS.isZeroShadow(ArgShadow))
  2657. IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
  2658. DFSF.getArgOriginTLS(I, IRB));
  2659. }
  2660. unsigned Size =
  2661. DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
  2662. // Stop storing if arguments' size overflows. Inside a function, arguments
  2663. // after overflow have zero shadow values.
  2664. if (ArgOffset + Size > ArgTLSSize)
  2665. break;
  2666. IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
  2667. DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
  2668. ShadowTLSAlignment);
  2669. ArgOffset += alignTo(Size, ShadowTLSAlignment);
  2670. }
  2671. Instruction *Next = nullptr;
  2672. if (!CB.getType()->isVoidTy()) {
  2673. if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
  2674. if (II->getNormalDest()->getSinglePredecessor()) {
  2675. Next = &II->getNormalDest()->front();
  2676. } else {
  2677. BasicBlock *NewBB =
  2678. SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
  2679. Next = &NewBB->front();
  2680. }
  2681. } else {
  2682. assert(CB.getIterator() != CB.getParent()->end());
  2683. Next = CB.getNextNode();
  2684. }
  2685. // Don't emit the epilogue for musttail call returns.
  2686. if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
  2687. return;
  2688. // Loads the return value shadow.
  2689. IRBuilder<> NextIRB(Next);
  2690. unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
  2691. if (Size > RetvalTLSSize) {
  2692. // Set overflowed return shadow to be zero.
  2693. DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
  2694. } else {
  2695. LoadInst *LI = NextIRB.CreateAlignedLoad(
  2696. DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
  2697. ShadowTLSAlignment, "_dfsret");
  2698. DFSF.SkipInsts.insert(LI);
  2699. DFSF.setShadow(&CB, LI);
  2700. DFSF.NonZeroChecks.push_back(LI);
  2701. }
  2702. if (ShouldTrackOrigins) {
  2703. LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
  2704. DFSF.getRetvalOriginTLS(), "_dfsret_o");
  2705. DFSF.SkipInsts.insert(LI);
  2706. DFSF.setOrigin(&CB, LI);
  2707. }
  2708. }
  2709. }
  2710. void DFSanVisitor::visitPHINode(PHINode &PN) {
  2711. Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
  2712. PHINode *ShadowPN =
  2713. PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
  2714. // Give the shadow phi node valid predecessors to fool SplitEdge into working.
  2715. Value *UndefShadow = UndefValue::get(ShadowTy);
  2716. for (BasicBlock *BB : PN.blocks())
  2717. ShadowPN->addIncoming(UndefShadow, BB);
  2718. DFSF.setShadow(&PN, ShadowPN);
  2719. PHINode *OriginPN = nullptr;
  2720. if (DFSF.DFS.shouldTrackOrigins()) {
  2721. OriginPN =
  2722. PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
  2723. Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
  2724. for (BasicBlock *BB : PN.blocks())
  2725. OriginPN->addIncoming(UndefOrigin, BB);
  2726. DFSF.setOrigin(&PN, OriginPN);
  2727. }
  2728. DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
  2729. }
  2730. namespace {
  2731. class DataFlowSanitizerLegacyPass : public ModulePass {
  2732. private:
  2733. std::vector<std::string> ABIListFiles;
  2734. public:
  2735. static char ID;
  2736. DataFlowSanitizerLegacyPass(
  2737. const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
  2738. : ModulePass(ID), ABIListFiles(ABIListFiles) {}
  2739. bool runOnModule(Module &M) override {
  2740. return DataFlowSanitizer(ABIListFiles).runImpl(M);
  2741. }
  2742. };
  2743. } // namespace
  2744. char DataFlowSanitizerLegacyPass::ID;
  2745. INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
  2746. "DataFlowSanitizer: dynamic data flow analysis.", false, false)
  2747. ModulePass *llvm::createDataFlowSanitizerLegacyPassPass(
  2748. const std::vector<std::string> &ABIListFiles) {
  2749. return new DataFlowSanitizerLegacyPass(ABIListFiles);
  2750. }
  2751. PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
  2752. ModuleAnalysisManager &AM) {
  2753. if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
  2754. return PreservedAnalyses::none();
  2755. }
  2756. return PreservedAnalyses::all();
  2757. }