RangeConstraintManager.cpp 118 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143
  1. //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines RangeConstraintManager, a class that tracks simple
  10. // equality and inequality constraints on symbolic values of ProgramState.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/Basic/JsonSupport.h"
  14. #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
  15. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
  16. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
  17. #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
  18. #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
  19. #include "llvm/ADT/FoldingSet.h"
  20. #include "llvm/ADT/ImmutableSet.h"
  21. #include "llvm/ADT/STLExtras.h"
  22. #include "llvm/ADT/StringExtras.h"
  23. #include "llvm/ADT/SmallSet.h"
  24. #include "llvm/Support/Compiler.h"
  25. #include "llvm/Support/raw_ostream.h"
  26. #include <algorithm>
  27. #include <iterator>
  28. using namespace clang;
  29. using namespace ento;
  30. // This class can be extended with other tables which will help to reason
  31. // about ranges more precisely.
  32. class OperatorRelationsTable {
  33. static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
  34. BO_GE < BO_EQ && BO_EQ < BO_NE,
  35. "This class relies on operators order. Rework it otherwise.");
  36. public:
  37. enum TriStateKind {
  38. False = 0,
  39. True,
  40. Unknown,
  41. };
  42. private:
  43. // CmpOpTable holds states which represent the corresponding range for
  44. // branching an exploded graph. We can reason about the branch if there is
  45. // a previously known fact of the existence of a comparison expression with
  46. // operands used in the current expression.
  47. // E.g. assuming (x < y) is true that means (x != y) is surely true.
  48. // if (x previous_operation y) // < | != | >
  49. // if (x operation y) // != | > | <
  50. // tristate // True | Unknown | False
  51. //
  52. // CmpOpTable represents next:
  53. // __|< |> |<=|>=|==|!=|UnknownX2|
  54. // < |1 |0 |* |0 |0 |* |1 |
  55. // > |0 |1 |0 |* |0 |* |1 |
  56. // <=|1 |0 |1 |* |1 |* |0 |
  57. // >=|0 |1 |* |1 |1 |* |0 |
  58. // ==|0 |0 |* |* |1 |0 |1 |
  59. // !=|1 |1 |* |* |0 |1 |0 |
  60. //
  61. // Columns stands for a previous operator.
  62. // Rows stands for a current operator.
  63. // Each row has exactly two `Unknown` cases.
  64. // UnknownX2 means that both `Unknown` previous operators are met in code,
  65. // and there is a special column for that, for example:
  66. // if (x >= y)
  67. // if (x != y)
  68. // if (x <= y)
  69. // False only
  70. static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
  71. const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
  72. // < > <= >= == != UnknownX2
  73. {True, False, Unknown, False, False, Unknown, True}, // <
  74. {False, True, False, Unknown, False, Unknown, True}, // >
  75. {True, False, True, Unknown, True, Unknown, False}, // <=
  76. {False, True, Unknown, True, True, Unknown, False}, // >=
  77. {False, False, Unknown, Unknown, True, False, True}, // ==
  78. {True, True, Unknown, Unknown, False, True, False}, // !=
  79. };
  80. static size_t getIndexFromOp(BinaryOperatorKind OP) {
  81. return static_cast<size_t>(OP - BO_LT);
  82. }
  83. public:
  84. constexpr size_t getCmpOpCount() const { return CmpOpCount; }
  85. static BinaryOperatorKind getOpFromIndex(size_t Index) {
  86. return static_cast<BinaryOperatorKind>(Index + BO_LT);
  87. }
  88. TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
  89. BinaryOperatorKind QueriedOP) const {
  90. return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
  91. }
  92. TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
  93. return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
  94. }
  95. };
  96. //===----------------------------------------------------------------------===//
  97. // RangeSet implementation
  98. //===----------------------------------------------------------------------===//
  99. RangeSet::ContainerType RangeSet::Factory::EmptySet{};
  100. RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
  101. ContainerType Result;
  102. Result.reserve(LHS.size() + RHS.size());
  103. std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
  104. std::back_inserter(Result));
  105. return makePersistent(std::move(Result));
  106. }
  107. RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
  108. ContainerType Result;
  109. Result.reserve(Original.size() + 1);
  110. const_iterator Lower = llvm::lower_bound(Original, Element);
  111. Result.insert(Result.end(), Original.begin(), Lower);
  112. Result.push_back(Element);
  113. Result.insert(Result.end(), Lower, Original.end());
  114. return makePersistent(std::move(Result));
  115. }
  116. RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
  117. return add(Original, Range(Point));
  118. }
  119. RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
  120. ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
  121. return makePersistent(std::move(Result));
  122. }
  123. RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
  124. ContainerType Result;
  125. Result.push_back(R);
  126. Result = unite(*Original.Impl, Result);
  127. return makePersistent(std::move(Result));
  128. }
  129. RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
  130. return unite(Original, Range(ValueFactory.getValue(Point)));
  131. }
  132. RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
  133. llvm::APSInt To) {
  134. return unite(Original,
  135. Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
  136. }
  137. template <typename T>
  138. void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
  139. std::swap(First, Second);
  140. std::swap(FirstEnd, SecondEnd);
  141. }
  142. RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
  143. const ContainerType &RHS) {
  144. if (LHS.empty())
  145. return RHS;
  146. if (RHS.empty())
  147. return LHS;
  148. using llvm::APSInt;
  149. using iterator = ContainerType::const_iterator;
  150. iterator First = LHS.begin();
  151. iterator FirstEnd = LHS.end();
  152. iterator Second = RHS.begin();
  153. iterator SecondEnd = RHS.end();
  154. APSIntType Ty = APSIntType(First->From());
  155. const APSInt Min = Ty.getMinValue();
  156. // Handle a corner case first when both range sets start from MIN.
  157. // This helps to avoid complicated conditions below. Specifically, this
  158. // particular check for `MIN` is not needed in the loop below every time
  159. // when we do `Second->From() - One` operation.
  160. if (Min == First->From() && Min == Second->From()) {
  161. if (First->To() > Second->To()) {
  162. // [ First ]--->
  163. // [ Second ]----->
  164. // MIN^
  165. // The Second range is entirely inside the First one.
  166. // Check if Second is the last in its RangeSet.
  167. if (++Second == SecondEnd)
  168. // [ First ]--[ First + 1 ]--->
  169. // [ Second ]--------------------->
  170. // MIN^
  171. // The Union is equal to First's RangeSet.
  172. return LHS;
  173. } else {
  174. // case 1: [ First ]----->
  175. // case 2: [ First ]--->
  176. // [ Second ]--->
  177. // MIN^
  178. // The First range is entirely inside or equal to the Second one.
  179. // Check if First is the last in its RangeSet.
  180. if (++First == FirstEnd)
  181. // [ First ]----------------------->
  182. // [ Second ]--[ Second + 1 ]---->
  183. // MIN^
  184. // The Union is equal to Second's RangeSet.
  185. return RHS;
  186. }
  187. }
  188. const APSInt One = Ty.getValue(1);
  189. ContainerType Result;
  190. // This is called when there are no ranges left in one of the ranges.
  191. // Append the rest of the ranges from another range set to the Result
  192. // and return with that.
  193. const auto AppendTheRest = [&Result](iterator I, iterator E) {
  194. Result.append(I, E);
  195. return Result;
  196. };
  197. while (true) {
  198. // We want to keep the following invariant at all times:
  199. // ---[ First ------>
  200. // -----[ Second --->
  201. if (First->From() > Second->From())
  202. swapIterators(First, FirstEnd, Second, SecondEnd);
  203. // The Union definitely starts with First->From().
  204. // ----------[ First ------>
  205. // ------------[ Second --->
  206. // ----------[ Union ------>
  207. // UnionStart^
  208. const llvm::APSInt &UnionStart = First->From();
  209. // Loop where the invariant holds.
  210. while (true) {
  211. // Skip all enclosed ranges.
  212. // ---[ First ]--->
  213. // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
  214. while (First->To() >= Second->To()) {
  215. // Check if Second is the last in its RangeSet.
  216. if (++Second == SecondEnd) {
  217. // Append the Union.
  218. // ---[ Union ]--->
  219. // -----[ Second ]----->
  220. // --------[ First ]--->
  221. // UnionEnd^
  222. Result.emplace_back(UnionStart, First->To());
  223. // ---[ Union ]----------------->
  224. // --------------[ First + 1]--->
  225. // Append all remaining ranges from the First's RangeSet.
  226. return AppendTheRest(++First, FirstEnd);
  227. }
  228. }
  229. // Check if First and Second are disjoint. It means that we find
  230. // the end of the Union. Exit the loop and append the Union.
  231. // ---[ First ]=------------->
  232. // ------------=[ Second ]--->
  233. // ----MinusOne^
  234. if (First->To() < Second->From() - One)
  235. break;
  236. // First is entirely inside the Union. Go next.
  237. // ---[ Union ----------->
  238. // ---- [ First ]-------->
  239. // -------[ Second ]----->
  240. // Check if First is the last in its RangeSet.
  241. if (++First == FirstEnd) {
  242. // Append the Union.
  243. // ---[ Union ]--->
  244. // -----[ First ]------->
  245. // --------[ Second ]--->
  246. // UnionEnd^
  247. Result.emplace_back(UnionStart, Second->To());
  248. // ---[ Union ]------------------>
  249. // --------------[ Second + 1]--->
  250. // Append all remaining ranges from the Second's RangeSet.
  251. return AppendTheRest(++Second, SecondEnd);
  252. }
  253. // We know that we are at one of the two cases:
  254. // case 1: --[ First ]--------->
  255. // case 2: ----[ First ]------->
  256. // --------[ Second ]---------->
  257. // In both cases First starts after Second->From().
  258. // Make sure that the loop invariant holds.
  259. swapIterators(First, FirstEnd, Second, SecondEnd);
  260. }
  261. // Here First and Second are disjoint.
  262. // Append the Union.
  263. // ---[ Union ]--------------->
  264. // -----------------[ Second ]--->
  265. // ------[ First ]--------------->
  266. // UnionEnd^
  267. Result.emplace_back(UnionStart, First->To());
  268. // Check if First is the last in its RangeSet.
  269. if (++First == FirstEnd)
  270. // ---[ Union ]--------------->
  271. // --------------[ Second ]--->
  272. // Append all remaining ranges from the Second's RangeSet.
  273. return AppendTheRest(Second, SecondEnd);
  274. }
  275. llvm_unreachable("Normally, we should not reach here");
  276. }
  277. RangeSet RangeSet::Factory::getRangeSet(Range From) {
  278. ContainerType Result;
  279. Result.push_back(From);
  280. return makePersistent(std::move(Result));
  281. }
  282. RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
  283. llvm::FoldingSetNodeID ID;
  284. void *InsertPos;
  285. From.Profile(ID);
  286. ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
  287. if (!Result) {
  288. // It is cheaper to fully construct the resulting range on stack
  289. // and move it to the freshly allocated buffer if we don't have
  290. // a set like this already.
  291. Result = construct(std::move(From));
  292. Cache.InsertNode(Result, InsertPos);
  293. }
  294. return Result;
  295. }
  296. RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
  297. void *Buffer = Arena.Allocate();
  298. return new (Buffer) ContainerType(std::move(From));
  299. }
  300. const llvm::APSInt &RangeSet::getMinValue() const {
  301. assert(!isEmpty());
  302. return begin()->From();
  303. }
  304. const llvm::APSInt &RangeSet::getMaxValue() const {
  305. assert(!isEmpty());
  306. return std::prev(end())->To();
  307. }
  308. bool RangeSet::containsImpl(llvm::APSInt &Point) const {
  309. if (isEmpty() || !pin(Point))
  310. return false;
  311. Range Dummy(Point);
  312. const_iterator It = llvm::upper_bound(*this, Dummy);
  313. if (It == begin())
  314. return false;
  315. return std::prev(It)->Includes(Point);
  316. }
  317. bool RangeSet::pin(llvm::APSInt &Point) const {
  318. APSIntType Type(getMinValue());
  319. if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
  320. return false;
  321. Type.apply(Point);
  322. return true;
  323. }
  324. bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
  325. // This function has nine cases, the cartesian product of range-testing
  326. // both the upper and lower bounds against the symbol's type.
  327. // Each case requires a different pinning operation.
  328. // The function returns false if the described range is entirely outside
  329. // the range of values for the associated symbol.
  330. APSIntType Type(getMinValue());
  331. APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
  332. APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
  333. switch (LowerTest) {
  334. case APSIntType::RTR_Below:
  335. switch (UpperTest) {
  336. case APSIntType::RTR_Below:
  337. // The entire range is outside the symbol's set of possible values.
  338. // If this is a conventionally-ordered range, the state is infeasible.
  339. if (Lower <= Upper)
  340. return false;
  341. // However, if the range wraps around, it spans all possible values.
  342. Lower = Type.getMinValue();
  343. Upper = Type.getMaxValue();
  344. break;
  345. case APSIntType::RTR_Within:
  346. // The range starts below what's possible but ends within it. Pin.
  347. Lower = Type.getMinValue();
  348. Type.apply(Upper);
  349. break;
  350. case APSIntType::RTR_Above:
  351. // The range spans all possible values for the symbol. Pin.
  352. Lower = Type.getMinValue();
  353. Upper = Type.getMaxValue();
  354. break;
  355. }
  356. break;
  357. case APSIntType::RTR_Within:
  358. switch (UpperTest) {
  359. case APSIntType::RTR_Below:
  360. // The range wraps around, but all lower values are not possible.
  361. Type.apply(Lower);
  362. Upper = Type.getMaxValue();
  363. break;
  364. case APSIntType::RTR_Within:
  365. // The range may or may not wrap around, but both limits are valid.
  366. Type.apply(Lower);
  367. Type.apply(Upper);
  368. break;
  369. case APSIntType::RTR_Above:
  370. // The range starts within what's possible but ends above it. Pin.
  371. Type.apply(Lower);
  372. Upper = Type.getMaxValue();
  373. break;
  374. }
  375. break;
  376. case APSIntType::RTR_Above:
  377. switch (UpperTest) {
  378. case APSIntType::RTR_Below:
  379. // The range wraps but is outside the symbol's set of possible values.
  380. return false;
  381. case APSIntType::RTR_Within:
  382. // The range starts above what's possible but ends within it (wrap).
  383. Lower = Type.getMinValue();
  384. Type.apply(Upper);
  385. break;
  386. case APSIntType::RTR_Above:
  387. // The entire range is outside the symbol's set of possible values.
  388. // If this is a conventionally-ordered range, the state is infeasible.
  389. if (Lower <= Upper)
  390. return false;
  391. // However, if the range wraps around, it spans all possible values.
  392. Lower = Type.getMinValue();
  393. Upper = Type.getMaxValue();
  394. break;
  395. }
  396. break;
  397. }
  398. return true;
  399. }
  400. RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
  401. llvm::APSInt Upper) {
  402. if (What.isEmpty() || !What.pin(Lower, Upper))
  403. return getEmptySet();
  404. ContainerType DummyContainer;
  405. if (Lower <= Upper) {
  406. // [Lower, Upper] is a regular range.
  407. //
  408. // Shortcut: check that there is even a possibility of the intersection
  409. // by checking the two following situations:
  410. //
  411. // <---[ What ]---[------]------>
  412. // Lower Upper
  413. // -or-
  414. // <----[------]----[ What ]---->
  415. // Lower Upper
  416. if (What.getMaxValue() < Lower || Upper < What.getMinValue())
  417. return getEmptySet();
  418. DummyContainer.push_back(
  419. Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
  420. } else {
  421. // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
  422. //
  423. // Shortcut: check that there is even a possibility of the intersection
  424. // by checking the following situation:
  425. //
  426. // <------]---[ What ]---[------>
  427. // Upper Lower
  428. if (What.getMaxValue() < Lower && Upper < What.getMinValue())
  429. return getEmptySet();
  430. DummyContainer.push_back(
  431. Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
  432. DummyContainer.push_back(
  433. Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
  434. }
  435. return intersect(*What.Impl, DummyContainer);
  436. }
  437. RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
  438. const RangeSet::ContainerType &RHS) {
  439. ContainerType Result;
  440. Result.reserve(std::max(LHS.size(), RHS.size()));
  441. const_iterator First = LHS.begin(), Second = RHS.begin(),
  442. FirstEnd = LHS.end(), SecondEnd = RHS.end();
  443. // If we ran out of ranges in one set, but not in the other,
  444. // it means that those elements are definitely not in the
  445. // intersection.
  446. while (First != FirstEnd && Second != SecondEnd) {
  447. // We want to keep the following invariant at all times:
  448. //
  449. // ----[ First ---------------------->
  450. // --------[ Second ----------------->
  451. if (Second->From() < First->From())
  452. swapIterators(First, FirstEnd, Second, SecondEnd);
  453. // Loop where the invariant holds:
  454. do {
  455. // Check for the following situation:
  456. //
  457. // ----[ First ]--------------------->
  458. // ---------------[ Second ]--------->
  459. //
  460. // which means that...
  461. if (Second->From() > First->To()) {
  462. // ...First is not in the intersection.
  463. //
  464. // We should move on to the next range after First and break out of the
  465. // loop because the invariant might not be true.
  466. ++First;
  467. break;
  468. }
  469. // We have a guaranteed intersection at this point!
  470. // And this is the current situation:
  471. //
  472. // ----[ First ]----------------->
  473. // -------[ Second ------------------>
  474. //
  475. // Additionally, it definitely starts with Second->From().
  476. const llvm::APSInt &IntersectionStart = Second->From();
  477. // It is important to know which of the two ranges' ends
  478. // is greater. That "longer" range might have some other
  479. // intersections, while the "shorter" range might not.
  480. if (Second->To() > First->To()) {
  481. // Here we make a decision to keep First as the "longer"
  482. // range.
  483. swapIterators(First, FirstEnd, Second, SecondEnd);
  484. }
  485. // At this point, we have the following situation:
  486. //
  487. // ---- First ]-------------------->
  488. // ---- Second ]--[ Second+1 ---------->
  489. //
  490. // We don't know the relationship between First->From and
  491. // Second->From and we don't know whether Second+1 intersects
  492. // with First.
  493. //
  494. // However, we know that [IntersectionStart, Second->To] is
  495. // a part of the intersection...
  496. Result.push_back(Range(IntersectionStart, Second->To()));
  497. ++Second;
  498. // ...and that the invariant will hold for a valid Second+1
  499. // because First->From <= Second->To < (Second+1)->From.
  500. } while (Second != SecondEnd);
  501. }
  502. if (Result.empty())
  503. return getEmptySet();
  504. return makePersistent(std::move(Result));
  505. }
  506. RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
  507. // Shortcut: let's see if the intersection is even possible.
  508. if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
  509. RHS.getMaxValue() < LHS.getMinValue())
  510. return getEmptySet();
  511. return intersect(*LHS.Impl, *RHS.Impl);
  512. }
  513. RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
  514. if (LHS.containsImpl(Point))
  515. return getRangeSet(ValueFactory.getValue(Point));
  516. return getEmptySet();
  517. }
  518. RangeSet RangeSet::Factory::negate(RangeSet What) {
  519. if (What.isEmpty())
  520. return getEmptySet();
  521. const llvm::APSInt SampleValue = What.getMinValue();
  522. const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
  523. const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
  524. ContainerType Result;
  525. Result.reserve(What.size() + (SampleValue == MIN));
  526. // Handle a special case for MIN value.
  527. const_iterator It = What.begin();
  528. const_iterator End = What.end();
  529. const llvm::APSInt &From = It->From();
  530. const llvm::APSInt &To = It->To();
  531. if (From == MIN) {
  532. // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
  533. if (To == MAX) {
  534. return What;
  535. }
  536. const_iterator Last = std::prev(End);
  537. // Try to find and unite the following ranges:
  538. // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
  539. if (Last->To() == MAX) {
  540. // It means that in the original range we have ranges
  541. // [MIN, A], ... , [B, MAX]
  542. // And the result should be [MIN, -B], ..., [-A, MAX]
  543. Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
  544. // We already negated Last, so we can skip it.
  545. End = Last;
  546. } else {
  547. // Add a separate range for the lowest value.
  548. Result.emplace_back(MIN, MIN);
  549. }
  550. // Skip adding the second range in case when [From, To] are [MIN, MIN].
  551. if (To != MIN) {
  552. Result.emplace_back(ValueFactory.getValue(-To), MAX);
  553. }
  554. // Skip the first range in the loop.
  555. ++It;
  556. }
  557. // Negate all other ranges.
  558. for (; It != End; ++It) {
  559. // Negate int values.
  560. const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
  561. const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
  562. // Add a negated range.
  563. Result.emplace_back(NewFrom, NewTo);
  564. }
  565. llvm::sort(Result);
  566. return makePersistent(std::move(Result));
  567. }
  568. RangeSet RangeSet::Factory::deletePoint(RangeSet From,
  569. const llvm::APSInt &Point) {
  570. if (!From.contains(Point))
  571. return From;
  572. llvm::APSInt Upper = Point;
  573. llvm::APSInt Lower = Point;
  574. ++Upper;
  575. --Lower;
  576. // Notice that the lower bound is greater than the upper bound.
  577. return intersect(From, Upper, Lower);
  578. }
  579. LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
  580. OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
  581. }
  582. LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
  583. LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
  584. OS << "{ ";
  585. llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
  586. OS << " }";
  587. }
  588. LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
  589. REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
  590. namespace {
  591. class EquivalenceClass;
  592. } // end anonymous namespace
  593. REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
  594. REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
  595. REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
  596. REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
  597. REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
  598. namespace {
  599. /// This class encapsulates a set of symbols equal to each other.
  600. ///
  601. /// The main idea of the approach requiring such classes is in narrowing
  602. /// and sharing constraints between symbols within the class. Also we can
  603. /// conclude that there is no practical need in storing constraints for
  604. /// every member of the class separately.
  605. ///
  606. /// Main terminology:
  607. ///
  608. /// * "Equivalence class" is an object of this class, which can be efficiently
  609. /// compared to other classes. It represents the whole class without
  610. /// storing the actual in it. The members of the class however can be
  611. /// retrieved from the state.
  612. ///
  613. /// * "Class members" are the symbols corresponding to the class. This means
  614. /// that A == B for every member symbols A and B from the class. Members of
  615. /// each class are stored in the state.
  616. ///
  617. /// * "Trivial class" is a class that has and ever had only one same symbol.
  618. ///
  619. /// * "Merge operation" merges two classes into one. It is the main operation
  620. /// to produce non-trivial classes.
  621. /// If, at some point, we can assume that two symbols from two distinct
  622. /// classes are equal, we can merge these classes.
  623. class EquivalenceClass : public llvm::FoldingSetNode {
  624. public:
  625. /// Find equivalence class for the given symbol in the given state.
  626. LLVM_NODISCARD static inline EquivalenceClass find(ProgramStateRef State,
  627. SymbolRef Sym);
  628. /// Merge classes for the given symbols and return a new state.
  629. LLVM_NODISCARD static inline ProgramStateRef merge(RangeSet::Factory &F,
  630. ProgramStateRef State,
  631. SymbolRef First,
  632. SymbolRef Second);
  633. // Merge this class with the given class and return a new state.
  634. LLVM_NODISCARD inline ProgramStateRef
  635. merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
  636. /// Return a set of class members for the given state.
  637. LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const;
  638. /// Return true if the current class is trivial in the given state.
  639. /// A class is trivial if and only if there is not any member relations stored
  640. /// to it in State/ClassMembers.
  641. /// An equivalence class with one member might seem as it does not hold any
  642. /// meaningful information, i.e. that is a tautology. However, during the
  643. /// removal of dead symbols we do not remove classes with one member for
  644. /// resource and performance reasons. Consequently, a class with one member is
  645. /// not necessarily trivial. It could happen that we have a class with two
  646. /// members and then during the removal of dead symbols we remove one of its
  647. /// members. In this case, the class is still non-trivial (it still has the
  648. /// mappings in ClassMembers), even though it has only one member.
  649. LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State) const;
  650. /// Return true if the current class is trivial and its only member is dead.
  651. LLVM_NODISCARD inline bool isTriviallyDead(ProgramStateRef State,
  652. SymbolReaper &Reaper) const;
  653. LLVM_NODISCARD static inline ProgramStateRef
  654. markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
  655. SymbolRef Second);
  656. LLVM_NODISCARD static inline ProgramStateRef
  657. markDisequal(RangeSet::Factory &F, ProgramStateRef State,
  658. EquivalenceClass First, EquivalenceClass Second);
  659. LLVM_NODISCARD inline ProgramStateRef
  660. markDisequal(RangeSet::Factory &F, ProgramStateRef State,
  661. EquivalenceClass Other) const;
  662. LLVM_NODISCARD static inline ClassSet
  663. getDisequalClasses(ProgramStateRef State, SymbolRef Sym);
  664. LLVM_NODISCARD inline ClassSet
  665. getDisequalClasses(ProgramStateRef State) const;
  666. LLVM_NODISCARD inline ClassSet
  667. getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
  668. LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State,
  669. EquivalenceClass First,
  670. EquivalenceClass Second);
  671. LLVM_NODISCARD static inline Optional<bool>
  672. areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
  673. /// Remove one member from the class.
  674. LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
  675. const SymbolRef Old);
  676. /// Iterate over all symbols and try to simplify them.
  677. LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
  678. RangeSet::Factory &F,
  679. ProgramStateRef State,
  680. EquivalenceClass Class);
  681. void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
  682. LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
  683. dumpToStream(State, llvm::errs());
  684. }
  685. /// Check equivalence data for consistency.
  686. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool
  687. isClassDataConsistent(ProgramStateRef State);
  688. LLVM_NODISCARD QualType getType() const {
  689. return getRepresentativeSymbol()->getType();
  690. }
  691. EquivalenceClass() = delete;
  692. EquivalenceClass(const EquivalenceClass &) = default;
  693. EquivalenceClass &operator=(const EquivalenceClass &) = delete;
  694. EquivalenceClass(EquivalenceClass &&) = default;
  695. EquivalenceClass &operator=(EquivalenceClass &&) = delete;
  696. bool operator==(const EquivalenceClass &Other) const {
  697. return ID == Other.ID;
  698. }
  699. bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
  700. bool operator!=(const EquivalenceClass &Other) const {
  701. return !operator==(Other);
  702. }
  703. static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
  704. ID.AddInteger(CID);
  705. }
  706. void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
  707. private:
  708. /* implicit */ EquivalenceClass(SymbolRef Sym)
  709. : ID(reinterpret_cast<uintptr_t>(Sym)) {}
  710. /// This function is intended to be used ONLY within the class.
  711. /// The fact that ID is a pointer to a symbol is an implementation detail
  712. /// and should stay that way.
  713. /// In the current implementation, we use it to retrieve the only member
  714. /// of the trivial class.
  715. SymbolRef getRepresentativeSymbol() const {
  716. return reinterpret_cast<SymbolRef>(ID);
  717. }
  718. static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
  719. inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
  720. SymbolSet Members, EquivalenceClass Other,
  721. SymbolSet OtherMembers);
  722. static inline bool
  723. addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
  724. RangeSet::Factory &F, ProgramStateRef State,
  725. EquivalenceClass First, EquivalenceClass Second);
  726. /// This is a unique identifier of the class.
  727. uintptr_t ID;
  728. };
  729. //===----------------------------------------------------------------------===//
  730. // Constraint functions
  731. //===----------------------------------------------------------------------===//
  732. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED bool
  733. areFeasible(ConstraintRangeTy Constraints) {
  734. return llvm::none_of(
  735. Constraints,
  736. [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
  737. return ClassConstraint.second.isEmpty();
  738. });
  739. }
  740. LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
  741. EquivalenceClass Class) {
  742. return State->get<ConstraintRange>(Class);
  743. }
  744. LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
  745. SymbolRef Sym) {
  746. return getConstraint(State, EquivalenceClass::find(State, Sym));
  747. }
  748. LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State,
  749. EquivalenceClass Class,
  750. RangeSet Constraint) {
  751. return State->set<ConstraintRange>(Class, Constraint);
  752. }
  753. LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State,
  754. ConstraintRangeTy Constraints) {
  755. return State->set<ConstraintRange>(Constraints);
  756. }
  757. //===----------------------------------------------------------------------===//
  758. // Equality/diseqiality abstraction
  759. //===----------------------------------------------------------------------===//
  760. /// A small helper function for detecting symbolic (dis)equality.
  761. ///
  762. /// Equality check can have different forms (like a == b or a - b) and this
  763. /// class encapsulates those away if the only thing the user wants to check -
  764. /// whether it's equality/diseqiality or not.
  765. ///
  766. /// \returns true if assuming this Sym to be true means equality of operands
  767. /// false if it means disequality of operands
  768. /// None otherwise
  769. Optional<bool> meansEquality(const SymSymExpr *Sym) {
  770. switch (Sym->getOpcode()) {
  771. case BO_Sub:
  772. // This case is: A - B != 0 -> disequality check.
  773. return false;
  774. case BO_EQ:
  775. // This case is: A == B != 0 -> equality check.
  776. return true;
  777. case BO_NE:
  778. // This case is: A != B != 0 -> diseqiality check.
  779. return false;
  780. default:
  781. return llvm::None;
  782. }
  783. }
  784. //===----------------------------------------------------------------------===//
  785. // Intersection functions
  786. //===----------------------------------------------------------------------===//
  787. template <class SecondTy, class... RestTy>
  788. LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  789. SecondTy Second, RestTy... Tail);
  790. template <class... RangeTy> struct IntersectionTraits;
  791. template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
  792. // Found RangeSet, no need to check any further
  793. using Type = RangeSet;
  794. };
  795. template <> struct IntersectionTraits<> {
  796. // We ran out of types, and we didn't find any RangeSet, so the result should
  797. // be optional.
  798. using Type = Optional<RangeSet>;
  799. };
  800. template <class OptionalOrPointer, class... TailTy>
  801. struct IntersectionTraits<OptionalOrPointer, TailTy...> {
  802. // If current type is Optional or a raw pointer, we should keep looking.
  803. using Type = typename IntersectionTraits<TailTy...>::Type;
  804. };
  805. template <class EndTy>
  806. LLVM_NODISCARD inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
  807. // If the list contains only RangeSet or Optional<RangeSet>, simply return
  808. // that range set.
  809. return End;
  810. }
  811. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED inline Optional<RangeSet>
  812. intersect(RangeSet::Factory &F, const RangeSet *End) {
  813. // This is an extraneous conversion from a raw pointer into Optional<RangeSet>
  814. if (End) {
  815. return *End;
  816. }
  817. return llvm::None;
  818. }
  819. template <class... RestTy>
  820. LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  821. RangeSet Second, RestTy... Tail) {
  822. // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
  823. // of the function and can be sure that the result is RangeSet.
  824. return intersect(F, F.intersect(Head, Second), Tail...);
  825. }
  826. template <class SecondTy, class... RestTy>
  827. LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  828. SecondTy Second, RestTy... Tail) {
  829. if (Second) {
  830. // Here we call the <RangeSet,RangeSet,...> version of the function...
  831. return intersect(F, Head, *Second, Tail...);
  832. }
  833. // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
  834. // means that the result is definitely RangeSet.
  835. return intersect(F, Head, Tail...);
  836. }
  837. /// Main generic intersect function.
  838. /// It intersects all of the given range sets. If some of the given arguments
  839. /// don't hold a range set (nullptr or llvm::None), the function will skip them.
  840. ///
  841. /// Available representations for the arguments are:
  842. /// * RangeSet
  843. /// * Optional<RangeSet>
  844. /// * RangeSet *
  845. /// Pointer to a RangeSet is automatically assumed to be nullable and will get
  846. /// checked as well as the optional version. If this behaviour is undesired,
  847. /// please dereference the pointer in the call.
  848. ///
  849. /// Return type depends on the arguments' types. If we can be sure in compile
  850. /// time that there will be a range set as a result, the returning type is
  851. /// simply RangeSet, in other cases we have to back off to Optional<RangeSet>.
  852. ///
  853. /// Please, prefer optional range sets to raw pointers. If the last argument is
  854. /// a raw pointer and all previous arguments are None, it will cost one
  855. /// additional check to convert RangeSet * into Optional<RangeSet>.
  856. template <class HeadTy, class SecondTy, class... RestTy>
  857. LLVM_NODISCARD inline
  858. typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
  859. intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
  860. RestTy... Tail) {
  861. if (Head) {
  862. return intersect(F, *Head, Second, Tail...);
  863. }
  864. return intersect(F, Second, Tail...);
  865. }
  866. //===----------------------------------------------------------------------===//
  867. // Symbolic reasoning logic
  868. //===----------------------------------------------------------------------===//
  869. /// A little component aggregating all of the reasoning we have about
  870. /// the ranges of symbolic expressions.
  871. ///
  872. /// Even when we don't know the exact values of the operands, we still
  873. /// can get a pretty good estimate of the result's range.
  874. class SymbolicRangeInferrer
  875. : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
  876. public:
  877. template <class SourceType>
  878. static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
  879. SourceType Origin) {
  880. SymbolicRangeInferrer Inferrer(F, State);
  881. return Inferrer.infer(Origin);
  882. }
  883. RangeSet VisitSymExpr(SymbolRef Sym) {
  884. // If we got to this function, the actual type of the symbolic
  885. // expression is not supported for advanced inference.
  886. // In this case, we simply backoff to the default "let's simply
  887. // infer the range from the expression's type".
  888. return infer(Sym->getType());
  889. }
  890. RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
  891. return VisitBinaryOperator(Sym);
  892. }
  893. RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
  894. return VisitBinaryOperator(Sym);
  895. }
  896. RangeSet VisitSymSymExpr(const SymSymExpr *Sym) {
  897. return intersect(
  898. RangeFactory,
  899. // If Sym is (dis)equality, we might have some information
  900. // on that in our equality classes data structure.
  901. getRangeForEqualities(Sym),
  902. // And we should always check what we can get from the operands.
  903. VisitBinaryOperator(Sym));
  904. }
  905. private:
  906. SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
  907. : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
  908. /// Infer range information from the given integer constant.
  909. ///
  910. /// It's not a real "inference", but is here for operating with
  911. /// sub-expressions in a more polymorphic manner.
  912. RangeSet inferAs(const llvm::APSInt &Val, QualType) {
  913. return {RangeFactory, Val};
  914. }
  915. /// Infer range information from symbol in the context of the given type.
  916. RangeSet inferAs(SymbolRef Sym, QualType DestType) {
  917. QualType ActualType = Sym->getType();
  918. // Check that we can reason about the symbol at all.
  919. if (ActualType->isIntegralOrEnumerationType() ||
  920. Loc::isLocType(ActualType)) {
  921. return infer(Sym);
  922. }
  923. // Otherwise, let's simply infer from the destination type.
  924. // We couldn't figure out nothing else about that expression.
  925. return infer(DestType);
  926. }
  927. RangeSet infer(SymbolRef Sym) {
  928. return intersect(
  929. RangeFactory,
  930. // Of course, we should take the constraint directly associated with
  931. // this symbol into consideration.
  932. getConstraint(State, Sym),
  933. // If Sym is a difference of symbols A - B, then maybe we have range
  934. // set stored for B - A.
  935. //
  936. // If we have range set stored for both A - B and B - A then
  937. // calculate the effective range set by intersecting the range set
  938. // for A - B and the negated range set of B - A.
  939. getRangeForNegatedSub(Sym),
  940. // If Sym is a comparison expression (except <=>),
  941. // find any other comparisons with the same operands.
  942. // See function description.
  943. getRangeForComparisonSymbol(Sym),
  944. // Apart from the Sym itself, we can infer quite a lot if we look
  945. // into subexpressions of Sym.
  946. Visit(Sym));
  947. }
  948. RangeSet infer(EquivalenceClass Class) {
  949. if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
  950. return *AssociatedConstraint;
  951. return infer(Class.getType());
  952. }
  953. /// Infer range information solely from the type.
  954. RangeSet infer(QualType T) {
  955. // Lazily generate a new RangeSet representing all possible values for the
  956. // given symbol type.
  957. RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
  958. ValueFactory.getMaxValue(T));
  959. // References are known to be non-zero.
  960. if (T->isReferenceType())
  961. return assumeNonZero(Result, T);
  962. return Result;
  963. }
  964. template <class BinarySymExprTy>
  965. RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
  966. // TODO #1: VisitBinaryOperator implementation might not make a good
  967. // use of the inferred ranges. In this case, we might be calculating
  968. // everything for nothing. This being said, we should introduce some
  969. // sort of laziness mechanism here.
  970. //
  971. // TODO #2: We didn't go into the nested expressions before, so it
  972. // might cause us spending much more time doing the inference.
  973. // This can be a problem for deeply nested expressions that are
  974. // involved in conditions and get tested continuously. We definitely
  975. // need to address this issue and introduce some sort of caching
  976. // in here.
  977. QualType ResultType = Sym->getType();
  978. return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
  979. Sym->getOpcode(),
  980. inferAs(Sym->getRHS(), ResultType), ResultType);
  981. }
  982. RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
  983. RangeSet RHS, QualType T) {
  984. switch (Op) {
  985. case BO_Or:
  986. return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
  987. case BO_And:
  988. return VisitBinaryOperator<BO_And>(LHS, RHS, T);
  989. case BO_Rem:
  990. return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
  991. default:
  992. return infer(T);
  993. }
  994. }
  995. //===----------------------------------------------------------------------===//
  996. // Ranges and operators
  997. //===----------------------------------------------------------------------===//
  998. /// Return a rough approximation of the given range set.
  999. ///
  1000. /// For the range set:
  1001. /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
  1002. /// it will return the range [x_0, y_N].
  1003. static Range fillGaps(RangeSet Origin) {
  1004. assert(!Origin.isEmpty());
  1005. return {Origin.getMinValue(), Origin.getMaxValue()};
  1006. }
  1007. /// Try to convert given range into the given type.
  1008. ///
  1009. /// It will return llvm::None only when the trivial conversion is possible.
  1010. llvm::Optional<Range> convert(const Range &Origin, APSIntType To) {
  1011. if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
  1012. To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
  1013. return llvm::None;
  1014. }
  1015. return Range(ValueFactory.Convert(To, Origin.From()),
  1016. ValueFactory.Convert(To, Origin.To()));
  1017. }
  1018. template <BinaryOperator::Opcode Op>
  1019. RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
  1020. // We should propagate information about unfeasbility of one of the
  1021. // operands to the resulting range.
  1022. if (LHS.isEmpty() || RHS.isEmpty()) {
  1023. return RangeFactory.getEmptySet();
  1024. }
  1025. Range CoarseLHS = fillGaps(LHS);
  1026. Range CoarseRHS = fillGaps(RHS);
  1027. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1028. // We need to convert ranges to the resulting type, so we can compare values
  1029. // and combine them in a meaningful (in terms of the given operation) way.
  1030. auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
  1031. auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
  1032. // It is hard to reason about ranges when conversion changes
  1033. // borders of the ranges.
  1034. if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
  1035. return infer(T);
  1036. }
  1037. return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
  1038. }
  1039. template <BinaryOperator::Opcode Op>
  1040. RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
  1041. return infer(T);
  1042. }
  1043. /// Return a symmetrical range for the given range and type.
  1044. ///
  1045. /// If T is signed, return the smallest range [-x..x] that covers the original
  1046. /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
  1047. /// exist due to original range covering min(T)).
  1048. ///
  1049. /// If T is unsigned, return the smallest range [0..x] that covers the
  1050. /// original range.
  1051. Range getSymmetricalRange(Range Origin, QualType T) {
  1052. APSIntType RangeType = ValueFactory.getAPSIntType(T);
  1053. if (RangeType.isUnsigned()) {
  1054. return Range(ValueFactory.getMinValue(RangeType), Origin.To());
  1055. }
  1056. if (Origin.From().isMinSignedValue()) {
  1057. // If mini is a minimal signed value, absolute value of it is greater
  1058. // than the maximal signed value. In order to avoid these
  1059. // complications, we simply return the whole range.
  1060. return {ValueFactory.getMinValue(RangeType),
  1061. ValueFactory.getMaxValue(RangeType)};
  1062. }
  1063. // At this point, we are sure that the type is signed and we can safely
  1064. // use unary - operator.
  1065. //
  1066. // While calculating absolute maximum, we can use the following formula
  1067. // because of these reasons:
  1068. // * If From >= 0 then To >= From and To >= -From.
  1069. // AbsMax == To == max(To, -From)
  1070. // * If To <= 0 then -From >= -To and -From >= From.
  1071. // AbsMax == -From == max(-From, To)
  1072. // * Otherwise, From <= 0, To >= 0, and
  1073. // AbsMax == max(abs(From), abs(To))
  1074. llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
  1075. // Intersection is guaranteed to be non-empty.
  1076. return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
  1077. }
  1078. /// Return a range set subtracting zero from \p Domain.
  1079. RangeSet assumeNonZero(RangeSet Domain, QualType T) {
  1080. APSIntType IntType = ValueFactory.getAPSIntType(T);
  1081. return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
  1082. }
  1083. // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to
  1084. // obtain the negated symbolic expression instead of constructing the
  1085. // symbol manually. This will allow us to support finding ranges of not
  1086. // only negated SymSymExpr-type expressions, but also of other, simpler
  1087. // expressions which we currently do not know how to negate.
  1088. Optional<RangeSet> getRangeForNegatedSub(SymbolRef Sym) {
  1089. if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) {
  1090. if (SSE->getOpcode() == BO_Sub) {
  1091. QualType T = Sym->getType();
  1092. // Do not negate unsigned ranges
  1093. if (!T->isUnsignedIntegerOrEnumerationType() &&
  1094. !T->isSignedIntegerOrEnumerationType())
  1095. return llvm::None;
  1096. SymbolManager &SymMgr = State->getSymbolManager();
  1097. SymbolRef NegatedSym =
  1098. SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T);
  1099. if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) {
  1100. return RangeFactory.negate(*NegatedRange);
  1101. }
  1102. }
  1103. }
  1104. return llvm::None;
  1105. }
  1106. // Returns ranges only for binary comparison operators (except <=>)
  1107. // when left and right operands are symbolic values.
  1108. // Finds any other comparisons with the same operands.
  1109. // Then do logical calculations and refuse impossible branches.
  1110. // E.g. (x < y) and (x > y) at the same time are impossible.
  1111. // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
  1112. // E.g. (x == y) and (y == x) are just reversed but the same.
  1113. // It covers all possible combinations (see CmpOpTable description).
  1114. // Note that `x` and `y` can also stand for subexpressions,
  1115. // not only for actual symbols.
  1116. Optional<RangeSet> getRangeForComparisonSymbol(SymbolRef Sym) {
  1117. const auto *SSE = dyn_cast<SymSymExpr>(Sym);
  1118. if (!SSE)
  1119. return llvm::None;
  1120. const BinaryOperatorKind CurrentOP = SSE->getOpcode();
  1121. // We currently do not support <=> (C++20).
  1122. if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
  1123. return llvm::None;
  1124. static const OperatorRelationsTable CmpOpTable{};
  1125. const SymExpr *LHS = SSE->getLHS();
  1126. const SymExpr *RHS = SSE->getRHS();
  1127. QualType T = SSE->getType();
  1128. SymbolManager &SymMgr = State->getSymbolManager();
  1129. // We use this variable to store the last queried operator (`QueriedOP`)
  1130. // for which the `getCmpOpState` returned with `Unknown`. If there are two
  1131. // different OPs that returned `Unknown` then we have to query the special
  1132. // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
  1133. // never returns `Unknown`, so `CurrentOP` is a good initial value.
  1134. BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
  1135. // Loop goes through all of the columns exept the last one ('UnknownX2').
  1136. // We treat `UnknownX2` column separately at the end of the loop body.
  1137. for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
  1138. // Let's find an expression e.g. (x < y).
  1139. BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
  1140. const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
  1141. const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
  1142. // If ranges were not previously found,
  1143. // try to find a reversed expression (y > x).
  1144. if (!QueriedRangeSet) {
  1145. const BinaryOperatorKind ROP =
  1146. BinaryOperator::reverseComparisonOp(QueriedOP);
  1147. SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
  1148. QueriedRangeSet = getConstraint(State, SymSym);
  1149. }
  1150. if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
  1151. continue;
  1152. const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
  1153. const bool isInFalseBranch =
  1154. ConcreteValue ? (*ConcreteValue == 0) : false;
  1155. // If it is a false branch, we shall be guided by opposite operator,
  1156. // because the table is made assuming we are in the true branch.
  1157. // E.g. when (x <= y) is false, then (x > y) is true.
  1158. if (isInFalseBranch)
  1159. QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
  1160. OperatorRelationsTable::TriStateKind BranchState =
  1161. CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
  1162. if (BranchState == OperatorRelationsTable::Unknown) {
  1163. if (LastQueriedOpToUnknown != CurrentOP &&
  1164. LastQueriedOpToUnknown != QueriedOP) {
  1165. // If we got the Unknown state for both different operators.
  1166. // if (x <= y) // assume true
  1167. // if (x != y) // assume true
  1168. // if (x < y) // would be also true
  1169. // Get a state from `UnknownX2` column.
  1170. BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
  1171. } else {
  1172. LastQueriedOpToUnknown = QueriedOP;
  1173. continue;
  1174. }
  1175. }
  1176. return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
  1177. : getFalseRange(T);
  1178. }
  1179. return llvm::None;
  1180. }
  1181. Optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
  1182. Optional<bool> Equality = meansEquality(Sym);
  1183. if (!Equality)
  1184. return llvm::None;
  1185. if (Optional<bool> AreEqual =
  1186. EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
  1187. // Here we cover two cases at once:
  1188. // * if Sym is equality and its operands are known to be equal -> true
  1189. // * if Sym is disequality and its operands are disequal -> true
  1190. if (*AreEqual == *Equality) {
  1191. return getTrueRange(Sym->getType());
  1192. }
  1193. // Opposite combinations result in false.
  1194. return getFalseRange(Sym->getType());
  1195. }
  1196. return llvm::None;
  1197. }
  1198. RangeSet getTrueRange(QualType T) {
  1199. RangeSet TypeRange = infer(T);
  1200. return assumeNonZero(TypeRange, T);
  1201. }
  1202. RangeSet getFalseRange(QualType T) {
  1203. const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
  1204. return RangeSet(RangeFactory, Zero);
  1205. }
  1206. BasicValueFactory &ValueFactory;
  1207. RangeSet::Factory &RangeFactory;
  1208. ProgramStateRef State;
  1209. };
  1210. //===----------------------------------------------------------------------===//
  1211. // Range-based reasoning about symbolic operations
  1212. //===----------------------------------------------------------------------===//
  1213. template <>
  1214. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
  1215. QualType T) {
  1216. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1217. llvm::APSInt Zero = ResultType.getZeroValue();
  1218. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1219. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1220. bool IsLHSNegative = LHS.To() < Zero;
  1221. bool IsRHSNegative = RHS.To() < Zero;
  1222. // Check if both ranges have the same sign.
  1223. if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
  1224. (IsLHSNegative && IsRHSNegative)) {
  1225. // The result is definitely greater or equal than any of the operands.
  1226. const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
  1227. // We estimate maximal value for positives as the maximal value for the
  1228. // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
  1229. //
  1230. // TODO: We basically, limit the resulting range from below, but don't do
  1231. // anything with the upper bound.
  1232. //
  1233. // For positive operands, it can be done as follows: for the upper
  1234. // bound of LHS and RHS we calculate the most significant bit set.
  1235. // Let's call it the N-th bit. Then we can estimate the maximal
  1236. // number to be 2^(N+1)-1, i.e. the number with all the bits up to
  1237. // the N-th bit set.
  1238. const llvm::APSInt &Max = IsLHSNegative
  1239. ? ValueFactory.getValue(--Zero)
  1240. : ValueFactory.getMaxValue(ResultType);
  1241. return {RangeFactory, ValueFactory.getValue(Min), Max};
  1242. }
  1243. // Otherwise, let's check if at least one of the operands is negative.
  1244. if (IsLHSNegative || IsRHSNegative) {
  1245. // This means that the result is definitely negative as well.
  1246. return {RangeFactory, ValueFactory.getMinValue(ResultType),
  1247. ValueFactory.getValue(--Zero)};
  1248. }
  1249. RangeSet DefaultRange = infer(T);
  1250. // It is pretty hard to reason about operands with different signs
  1251. // (and especially with possibly different signs). We simply check if it
  1252. // can be zero. In order to conclude that the result could not be zero,
  1253. // at least one of the operands should be definitely not zero itself.
  1254. if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
  1255. return assumeNonZero(DefaultRange, T);
  1256. }
  1257. // Nothing much else to do here.
  1258. return DefaultRange;
  1259. }
  1260. template <>
  1261. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
  1262. Range RHS,
  1263. QualType T) {
  1264. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1265. llvm::APSInt Zero = ResultType.getZeroValue();
  1266. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1267. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1268. bool IsLHSNegative = LHS.To() < Zero;
  1269. bool IsRHSNegative = RHS.To() < Zero;
  1270. // Check if both ranges have the same sign.
  1271. if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
  1272. (IsLHSNegative && IsRHSNegative)) {
  1273. // The result is definitely less or equal than any of the operands.
  1274. const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
  1275. // We conservatively estimate lower bound to be the smallest positive
  1276. // or negative value corresponding to the sign of the operands.
  1277. const llvm::APSInt &Min = IsLHSNegative
  1278. ? ValueFactory.getMinValue(ResultType)
  1279. : ValueFactory.getValue(Zero);
  1280. return {RangeFactory, Min, Max};
  1281. }
  1282. // Otherwise, let's check if at least one of the operands is positive.
  1283. if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
  1284. // This makes result definitely positive.
  1285. //
  1286. // We can also reason about a maximal value by finding the maximal
  1287. // value of the positive operand.
  1288. const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
  1289. // The minimal value on the other hand is much harder to reason about.
  1290. // The only thing we know for sure is that the result is positive.
  1291. return {RangeFactory, ValueFactory.getValue(Zero),
  1292. ValueFactory.getValue(Max)};
  1293. }
  1294. // Nothing much else to do here.
  1295. return infer(T);
  1296. }
  1297. template <>
  1298. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
  1299. Range RHS,
  1300. QualType T) {
  1301. llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
  1302. Range ConservativeRange = getSymmetricalRange(RHS, T);
  1303. llvm::APSInt Max = ConservativeRange.To();
  1304. llvm::APSInt Min = ConservativeRange.From();
  1305. if (Max == Zero) {
  1306. // It's an undefined behaviour to divide by 0 and it seems like we know
  1307. // for sure that RHS is 0. Let's say that the resulting range is
  1308. // simply infeasible for that matter.
  1309. return RangeFactory.getEmptySet();
  1310. }
  1311. // At this point, our conservative range is closed. The result, however,
  1312. // couldn't be greater than the RHS' maximal absolute value. Because of
  1313. // this reason, we turn the range into open (or half-open in case of
  1314. // unsigned integers).
  1315. //
  1316. // While we operate on integer values, an open interval (a, b) can be easily
  1317. // represented by the closed interval [a + 1, b - 1]. And this is exactly
  1318. // what we do next.
  1319. //
  1320. // If we are dealing with unsigned case, we shouldn't move the lower bound.
  1321. if (Min.isSigned()) {
  1322. ++Min;
  1323. }
  1324. --Max;
  1325. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1326. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1327. // Remainder operator results with negative operands is implementation
  1328. // defined. Positive cases are much easier to reason about though.
  1329. if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
  1330. // If maximal value of LHS is less than maximal value of RHS,
  1331. // the result won't get greater than LHS.To().
  1332. Max = std::min(LHS.To(), Max);
  1333. // We want to check if it is a situation similar to the following:
  1334. //
  1335. // <------------|---[ LHS ]--------[ RHS ]----->
  1336. // -INF 0 +INF
  1337. //
  1338. // In this situation, we can conclude that (LHS / RHS) == 0 and
  1339. // (LHS % RHS) == LHS.
  1340. Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
  1341. }
  1342. // Nevertheless, the symmetrical range for RHS is a conservative estimate
  1343. // for any sign of either LHS, or RHS.
  1344. return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
  1345. }
  1346. //===----------------------------------------------------------------------===//
  1347. // Constraint manager implementation details
  1348. //===----------------------------------------------------------------------===//
  1349. class RangeConstraintManager : public RangedConstraintManager {
  1350. public:
  1351. RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
  1352. : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
  1353. //===------------------------------------------------------------------===//
  1354. // Implementation for interface from ConstraintManager.
  1355. //===------------------------------------------------------------------===//
  1356. bool haveEqualConstraints(ProgramStateRef S1,
  1357. ProgramStateRef S2) const override {
  1358. // NOTE: ClassMembers are as simple as back pointers for ClassMap,
  1359. // so comparing constraint ranges and class maps should be
  1360. // sufficient.
  1361. return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
  1362. S1->get<ClassMap>() == S2->get<ClassMap>();
  1363. }
  1364. bool canReasonAbout(SVal X) const override;
  1365. ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
  1366. const llvm::APSInt *getSymVal(ProgramStateRef State,
  1367. SymbolRef Sym) const override;
  1368. ProgramStateRef removeDeadBindings(ProgramStateRef State,
  1369. SymbolReaper &SymReaper) override;
  1370. void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
  1371. unsigned int Space = 0, bool IsDot = false) const override;
  1372. void printConstraints(raw_ostream &Out, ProgramStateRef State,
  1373. const char *NL = "\n", unsigned int Space = 0,
  1374. bool IsDot = false) const;
  1375. void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
  1376. const char *NL = "\n", unsigned int Space = 0,
  1377. bool IsDot = false) const;
  1378. void printDisequalities(raw_ostream &Out, ProgramStateRef State,
  1379. const char *NL = "\n", unsigned int Space = 0,
  1380. bool IsDot = false) const;
  1381. //===------------------------------------------------------------------===//
  1382. // Implementation for interface from RangedConstraintManager.
  1383. //===------------------------------------------------------------------===//
  1384. ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
  1385. const llvm::APSInt &V,
  1386. const llvm::APSInt &Adjustment) override;
  1387. ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
  1388. const llvm::APSInt &V,
  1389. const llvm::APSInt &Adjustment) override;
  1390. ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
  1391. const llvm::APSInt &V,
  1392. const llvm::APSInt &Adjustment) override;
  1393. ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
  1394. const llvm::APSInt &V,
  1395. const llvm::APSInt &Adjustment) override;
  1396. ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
  1397. const llvm::APSInt &V,
  1398. const llvm::APSInt &Adjustment) override;
  1399. ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
  1400. const llvm::APSInt &V,
  1401. const llvm::APSInt &Adjustment) override;
  1402. ProgramStateRef assumeSymWithinInclusiveRange(
  1403. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  1404. const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
  1405. ProgramStateRef assumeSymOutsideInclusiveRange(
  1406. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  1407. const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
  1408. private:
  1409. RangeSet::Factory F;
  1410. RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
  1411. RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
  1412. ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
  1413. RangeSet Range);
  1414. ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
  1415. RangeSet Range);
  1416. RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
  1417. const llvm::APSInt &Int,
  1418. const llvm::APSInt &Adjustment);
  1419. RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
  1420. const llvm::APSInt &Int,
  1421. const llvm::APSInt &Adjustment);
  1422. RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
  1423. const llvm::APSInt &Int,
  1424. const llvm::APSInt &Adjustment);
  1425. RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
  1426. const llvm::APSInt &Int,
  1427. const llvm::APSInt &Adjustment);
  1428. RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
  1429. const llvm::APSInt &Int,
  1430. const llvm::APSInt &Adjustment);
  1431. };
  1432. //===----------------------------------------------------------------------===//
  1433. // Constraint assignment logic
  1434. //===----------------------------------------------------------------------===//
  1435. /// ConstraintAssignorBase is a small utility class that unifies visitor
  1436. /// for ranges with a visitor for constraints (rangeset/range/constant).
  1437. ///
  1438. /// It is designed to have one derived class, but generally it can have more.
  1439. /// Derived class can control which types we handle by defining methods of the
  1440. /// following form:
  1441. ///
  1442. /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
  1443. /// CONSTRAINT Constraint);
  1444. ///
  1445. /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
  1446. /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
  1447. /// return value signifies whether we should try other handle methods
  1448. /// (i.e. false would mean to stop right after calling this method)
  1449. template <class Derived> class ConstraintAssignorBase {
  1450. public:
  1451. using Const = const llvm::APSInt &;
  1452. #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
  1453. #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
  1454. if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
  1455. return false
  1456. void assign(SymbolRef Sym, RangeSet Constraint) {
  1457. assignImpl(Sym, Constraint);
  1458. }
  1459. bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
  1460. switch (Sym->getKind()) {
  1461. #define SYMBOL(Id, Parent) \
  1462. case SymExpr::Id##Kind: \
  1463. DISPATCH(Id);
  1464. #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
  1465. }
  1466. llvm_unreachable("Unknown SymExpr kind!");
  1467. }
  1468. #define DEFAULT_ASSIGN(Id) \
  1469. bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
  1470. return true; \
  1471. } \
  1472. bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
  1473. bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
  1474. // When we dispatch for constraint types, we first try to check
  1475. // if the new constraint is the constant and try the corresponding
  1476. // assignor methods. If it didn't interrupt, we can proceed to the
  1477. // range, and finally to the range set.
  1478. #define CONSTRAINT_DISPATCH(Id) \
  1479. if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
  1480. ASSIGN(Id, Const, Sym, *Const); \
  1481. } \
  1482. if (Constraint.size() == 1) { \
  1483. ASSIGN(Id, Range, Sym, *Constraint.begin()); \
  1484. } \
  1485. ASSIGN(Id, RangeSet, Sym, Constraint)
  1486. // Our internal assign method first tries to call assignor methods for all
  1487. // constraint types that apply. And if not interrupted, continues with its
  1488. // parent class.
  1489. #define SYMBOL(Id, Parent) \
  1490. bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
  1491. CONSTRAINT_DISPATCH(Id); \
  1492. DISPATCH(Parent); \
  1493. } \
  1494. DEFAULT_ASSIGN(Id)
  1495. #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
  1496. #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
  1497. // Default implementations for the top class that doesn't have parents.
  1498. bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
  1499. CONSTRAINT_DISPATCH(SymExpr);
  1500. return true;
  1501. }
  1502. DEFAULT_ASSIGN(SymExpr);
  1503. #undef DISPATCH
  1504. #undef CONSTRAINT_DISPATCH
  1505. #undef DEFAULT_ASSIGN
  1506. #undef ASSIGN
  1507. };
  1508. /// A little component aggregating all of the reasoning we have about
  1509. /// assigning new constraints to symbols.
  1510. ///
  1511. /// The main purpose of this class is to associate constraints to symbols,
  1512. /// and impose additional constraints on other symbols, when we can imply
  1513. /// them.
  1514. ///
  1515. /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
  1516. /// can provide more precise ranges by looking into the operands of the
  1517. /// expression in question, ConstraintAssignor looks into the operands
  1518. /// to see if we can imply more from the new constraint.
  1519. class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
  1520. public:
  1521. template <class ClassOrSymbol>
  1522. LLVM_NODISCARD static ProgramStateRef
  1523. assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
  1524. ClassOrSymbol CoS, RangeSet NewConstraint) {
  1525. if (!State || NewConstraint.isEmpty())
  1526. return nullptr;
  1527. ConstraintAssignor Assignor{State, Builder, F};
  1528. return Assignor.assign(CoS, NewConstraint);
  1529. }
  1530. /// Handle expressions like: a % b != 0.
  1531. template <typename SymT>
  1532. bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
  1533. if (Sym->getOpcode() != BO_Rem)
  1534. return true;
  1535. // a % b != 0 implies that a != 0.
  1536. if (!Constraint.containsZero()) {
  1537. SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
  1538. if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
  1539. State = State->assume(*NonLocSymSVal, true);
  1540. if (!State)
  1541. return false;
  1542. }
  1543. }
  1544. return true;
  1545. }
  1546. inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
  1547. inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
  1548. RangeSet Constraint) {
  1549. return handleRemainderOp(Sym, Constraint);
  1550. }
  1551. inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
  1552. RangeSet Constraint);
  1553. private:
  1554. ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
  1555. RangeSet::Factory &F)
  1556. : State(State), Builder(Builder), RangeFactory(F) {}
  1557. using Base = ConstraintAssignorBase<ConstraintAssignor>;
  1558. /// Base method for handling new constraints for symbols.
  1559. LLVM_NODISCARD ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
  1560. // All constraints are actually associated with equivalence classes, and
  1561. // that's what we are going to do first.
  1562. State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
  1563. if (!State)
  1564. return nullptr;
  1565. // And after that we can check what other things we can get from this
  1566. // constraint.
  1567. Base::assign(Sym, NewConstraint);
  1568. return State;
  1569. }
  1570. /// Base method for handling new constraints for classes.
  1571. LLVM_NODISCARD ProgramStateRef assign(EquivalenceClass Class,
  1572. RangeSet NewConstraint) {
  1573. // There is a chance that we might need to update constraints for the
  1574. // classes that are known to be disequal to Class.
  1575. //
  1576. // In order for this to be even possible, the new constraint should
  1577. // be simply a constant because we can't reason about range disequalities.
  1578. if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
  1579. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1580. ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
  1581. // Add new constraint.
  1582. Constraints = CF.add(Constraints, Class, NewConstraint);
  1583. for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
  1584. RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
  1585. RangeFactory, State, DisequalClass);
  1586. UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
  1587. // If we end up with at least one of the disequal classes to be
  1588. // constrained with an empty range-set, the state is infeasible.
  1589. if (UpdatedConstraint.isEmpty())
  1590. return nullptr;
  1591. Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
  1592. }
  1593. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  1594. "a state with infeasible constraints");
  1595. return setConstraints(State, Constraints);
  1596. }
  1597. return setConstraint(State, Class, NewConstraint);
  1598. }
  1599. ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
  1600. SymbolRef RHS) {
  1601. return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
  1602. }
  1603. ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
  1604. SymbolRef RHS) {
  1605. return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
  1606. }
  1607. LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) {
  1608. assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
  1609. if (Constraint.getConcreteValue())
  1610. return !Constraint.getConcreteValue()->isZero();
  1611. if (!Constraint.containsZero())
  1612. return true;
  1613. return llvm::None;
  1614. }
  1615. ProgramStateRef State;
  1616. SValBuilder &Builder;
  1617. RangeSet::Factory &RangeFactory;
  1618. };
  1619. bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
  1620. const llvm::APSInt &Constraint) {
  1621. llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
  1622. // Iterate over all equivalence classes and try to simplify them.
  1623. ClassMembersTy Members = State->get<ClassMembers>();
  1624. for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
  1625. EquivalenceClass Class = ClassToSymbolSet.first;
  1626. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1627. if (!State)
  1628. return false;
  1629. SimplifiedClasses.insert(Class);
  1630. }
  1631. // Trivial equivalence classes (those that have only one symbol member) are
  1632. // not stored in the State. Thus, we must skim through the constraints as
  1633. // well. And we try to simplify symbols in the constraints.
  1634. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1635. for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
  1636. EquivalenceClass Class = ClassConstraint.first;
  1637. if (SimplifiedClasses.count(Class)) // Already simplified.
  1638. continue;
  1639. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1640. if (!State)
  1641. return false;
  1642. }
  1643. // We may have trivial equivalence classes in the disequality info as
  1644. // well, and we need to simplify them.
  1645. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  1646. for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
  1647. DisequalityInfo) {
  1648. EquivalenceClass Class = DisequalityEntry.first;
  1649. ClassSet DisequalClasses = DisequalityEntry.second;
  1650. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1651. if (!State)
  1652. return false;
  1653. }
  1654. return true;
  1655. }
  1656. bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
  1657. RangeSet Constraint) {
  1658. if (!handleRemainderOp(Sym, Constraint))
  1659. return false;
  1660. Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
  1661. if (!ConstraintAsBool)
  1662. return true;
  1663. if (Optional<bool> Equality = meansEquality(Sym)) {
  1664. // Here we cover two cases:
  1665. // * if Sym is equality and the new constraint is true -> Sym's operands
  1666. // should be marked as equal
  1667. // * if Sym is disequality and the new constraint is false -> Sym's
  1668. // operands should be also marked as equal
  1669. if (*Equality == *ConstraintAsBool) {
  1670. State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
  1671. } else {
  1672. // Other combinations leave as with disequal operands.
  1673. State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
  1674. }
  1675. if (!State)
  1676. return false;
  1677. }
  1678. return true;
  1679. }
  1680. } // end anonymous namespace
  1681. std::unique_ptr<ConstraintManager>
  1682. ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
  1683. ExprEngine *Eng) {
  1684. return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
  1685. }
  1686. ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
  1687. ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
  1688. ConstraintMap Result = F.getEmptyMap();
  1689. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1690. for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
  1691. EquivalenceClass Class = ClassConstraint.first;
  1692. SymbolSet ClassMembers = Class.getClassMembers(State);
  1693. assert(!ClassMembers.isEmpty() &&
  1694. "Class must always have at least one member!");
  1695. SymbolRef Representative = *ClassMembers.begin();
  1696. Result = F.add(Result, Representative, ClassConstraint.second);
  1697. }
  1698. return Result;
  1699. }
  1700. //===----------------------------------------------------------------------===//
  1701. // EqualityClass implementation details
  1702. //===----------------------------------------------------------------------===//
  1703. LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
  1704. raw_ostream &os) const {
  1705. SymbolSet ClassMembers = getClassMembers(State);
  1706. for (const SymbolRef &MemberSym : ClassMembers) {
  1707. MemberSym->dump();
  1708. os << "\n";
  1709. }
  1710. }
  1711. inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
  1712. SymbolRef Sym) {
  1713. assert(State && "State should not be null");
  1714. assert(Sym && "Symbol should not be null");
  1715. // We store far from all Symbol -> Class mappings
  1716. if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
  1717. return *NontrivialClass;
  1718. // This is a trivial class of Sym.
  1719. return Sym;
  1720. }
  1721. inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
  1722. ProgramStateRef State,
  1723. SymbolRef First,
  1724. SymbolRef Second) {
  1725. EquivalenceClass FirstClass = find(State, First);
  1726. EquivalenceClass SecondClass = find(State, Second);
  1727. return FirstClass.merge(F, State, SecondClass);
  1728. }
  1729. inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
  1730. ProgramStateRef State,
  1731. EquivalenceClass Other) {
  1732. // It is already the same class.
  1733. if (*this == Other)
  1734. return State;
  1735. // FIXME: As of now, we support only equivalence classes of the same type.
  1736. // This limitation is connected to the lack of explicit casts in
  1737. // our symbolic expression model.
  1738. //
  1739. // That means that for `int x` and `char y` we don't distinguish
  1740. // between these two very different cases:
  1741. // * `x == y`
  1742. // * `(char)x == y`
  1743. //
  1744. // The moment we introduce symbolic casts, this restriction can be
  1745. // lifted.
  1746. if (getType() != Other.getType())
  1747. return State;
  1748. SymbolSet Members = getClassMembers(State);
  1749. SymbolSet OtherMembers = Other.getClassMembers(State);
  1750. // We estimate the size of the class by the height of tree containing
  1751. // its members. Merging is not a trivial operation, so it's easier to
  1752. // merge the smaller class into the bigger one.
  1753. if (Members.getHeight() >= OtherMembers.getHeight()) {
  1754. return mergeImpl(F, State, Members, Other, OtherMembers);
  1755. } else {
  1756. return Other.mergeImpl(F, State, OtherMembers, *this, Members);
  1757. }
  1758. }
  1759. inline ProgramStateRef
  1760. EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
  1761. ProgramStateRef State, SymbolSet MyMembers,
  1762. EquivalenceClass Other, SymbolSet OtherMembers) {
  1763. // Essentially what we try to recreate here is some kind of union-find
  1764. // data structure. It does have certain limitations due to persistence
  1765. // and the need to remove elements from classes.
  1766. //
  1767. // In this setting, EquialityClass object is the representative of the class
  1768. // or the parent element. ClassMap is a mapping of class members to their
  1769. // parent. Unlike the union-find structure, they all point directly to the
  1770. // class representative because we don't have an opportunity to actually do
  1771. // path compression when dealing with immutability. This means that we
  1772. // compress paths every time we do merges. It also means that we lose
  1773. // the main amortized complexity benefit from the original data structure.
  1774. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1775. ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
  1776. // 1. If the merged classes have any constraints associated with them, we
  1777. // need to transfer them to the class we have left.
  1778. //
  1779. // Intersection here makes perfect sense because both of these constraints
  1780. // must hold for the whole new class.
  1781. if (Optional<RangeSet> NewClassConstraint =
  1782. intersect(RangeFactory, getConstraint(State, *this),
  1783. getConstraint(State, Other))) {
  1784. // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
  1785. // range inferrer shouldn't generate ranges incompatible with
  1786. // equivalence classes. However, at the moment, due to imperfections
  1787. // in the solver, it is possible and the merge function can also
  1788. // return infeasible states aka null states.
  1789. if (NewClassConstraint->isEmpty())
  1790. // Infeasible state
  1791. return nullptr;
  1792. // No need in tracking constraints of a now-dissolved class.
  1793. Constraints = CRF.remove(Constraints, Other);
  1794. // Assign new constraints for this class.
  1795. Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
  1796. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  1797. "a state with infeasible constraints");
  1798. State = State->set<ConstraintRange>(Constraints);
  1799. }
  1800. // 2. Get ALL equivalence-related maps
  1801. ClassMapTy Classes = State->get<ClassMap>();
  1802. ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
  1803. ClassMembersTy Members = State->get<ClassMembers>();
  1804. ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
  1805. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  1806. DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
  1807. ClassSet::Factory &CF = State->get_context<ClassSet>();
  1808. SymbolSet::Factory &F = getMembersFactory(State);
  1809. // 2. Merge members of the Other class into the current class.
  1810. SymbolSet NewClassMembers = MyMembers;
  1811. for (SymbolRef Sym : OtherMembers) {
  1812. NewClassMembers = F.add(NewClassMembers, Sym);
  1813. // *this is now the class for all these new symbols.
  1814. Classes = CMF.add(Classes, Sym, *this);
  1815. }
  1816. // 3. Adjust member mapping.
  1817. //
  1818. // No need in tracking members of a now-dissolved class.
  1819. Members = MF.remove(Members, Other);
  1820. // Now only the current class is mapped to all the symbols.
  1821. Members = MF.add(Members, *this, NewClassMembers);
  1822. // 4. Update disequality relations
  1823. ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
  1824. // We are about to merge two classes but they are already known to be
  1825. // non-equal. This is a contradiction.
  1826. if (DisequalToOther.contains(*this))
  1827. return nullptr;
  1828. if (!DisequalToOther.isEmpty()) {
  1829. ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
  1830. DisequalityInfo = DF.remove(DisequalityInfo, Other);
  1831. for (EquivalenceClass DisequalClass : DisequalToOther) {
  1832. DisequalToThis = CF.add(DisequalToThis, DisequalClass);
  1833. // Disequality is a symmetric relation meaning that if
  1834. // DisequalToOther not null then the set for DisequalClass is not
  1835. // empty and has at least Other.
  1836. ClassSet OriginalSetLinkedToOther =
  1837. *DisequalityInfo.lookup(DisequalClass);
  1838. // Other will be eliminated and we should replace it with the bigger
  1839. // united class.
  1840. ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
  1841. NewSet = CF.add(NewSet, *this);
  1842. DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
  1843. }
  1844. DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
  1845. State = State->set<DisequalityMap>(DisequalityInfo);
  1846. }
  1847. // 5. Update the state
  1848. State = State->set<ClassMap>(Classes);
  1849. State = State->set<ClassMembers>(Members);
  1850. return State;
  1851. }
  1852. inline SymbolSet::Factory &
  1853. EquivalenceClass::getMembersFactory(ProgramStateRef State) {
  1854. return State->get_context<SymbolSet>();
  1855. }
  1856. SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
  1857. if (const SymbolSet *Members = State->get<ClassMembers>(*this))
  1858. return *Members;
  1859. // This class is trivial, so we need to construct a set
  1860. // with just that one symbol from the class.
  1861. SymbolSet::Factory &F = getMembersFactory(State);
  1862. return F.add(F.getEmptySet(), getRepresentativeSymbol());
  1863. }
  1864. bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
  1865. return State->get<ClassMembers>(*this) == nullptr;
  1866. }
  1867. bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
  1868. SymbolReaper &Reaper) const {
  1869. return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
  1870. }
  1871. inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
  1872. ProgramStateRef State,
  1873. SymbolRef First,
  1874. SymbolRef Second) {
  1875. return markDisequal(RF, State, find(State, First), find(State, Second));
  1876. }
  1877. inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
  1878. ProgramStateRef State,
  1879. EquivalenceClass First,
  1880. EquivalenceClass Second) {
  1881. return First.markDisequal(RF, State, Second);
  1882. }
  1883. inline ProgramStateRef
  1884. EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
  1885. EquivalenceClass Other) const {
  1886. // If we know that two classes are equal, we can only produce an infeasible
  1887. // state.
  1888. if (*this == Other) {
  1889. return nullptr;
  1890. }
  1891. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  1892. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1893. // Disequality is a symmetric relation, so if we mark A as disequal to B,
  1894. // we should also mark B as disequalt to A.
  1895. if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
  1896. Other) ||
  1897. !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
  1898. *this))
  1899. return nullptr;
  1900. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  1901. "a state with infeasible constraints");
  1902. State = State->set<DisequalityMap>(DisequalityInfo);
  1903. State = State->set<ConstraintRange>(Constraints);
  1904. return State;
  1905. }
  1906. inline bool EquivalenceClass::addToDisequalityInfo(
  1907. DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
  1908. RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
  1909. EquivalenceClass Second) {
  1910. // 1. Get all of the required factories.
  1911. DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
  1912. ClassSet::Factory &CF = State->get_context<ClassSet>();
  1913. ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
  1914. // 2. Add Second to the set of classes disequal to First.
  1915. const ClassSet *CurrentSet = Info.lookup(First);
  1916. ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
  1917. NewSet = CF.add(NewSet, Second);
  1918. Info = F.add(Info, First, NewSet);
  1919. // 3. If Second is known to be a constant, we can delete this point
  1920. // from the constraint asociated with First.
  1921. //
  1922. // So, if Second == 10, it means that First != 10.
  1923. // At the same time, the same logic does not apply to ranges.
  1924. if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
  1925. if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
  1926. RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
  1927. RF, State, First.getRepresentativeSymbol());
  1928. FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
  1929. // If the First class is about to be constrained with an empty
  1930. // range-set, the state is infeasible.
  1931. if (FirstConstraint.isEmpty())
  1932. return false;
  1933. Constraints = CRF.add(Constraints, First, FirstConstraint);
  1934. }
  1935. return true;
  1936. }
  1937. inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
  1938. SymbolRef FirstSym,
  1939. SymbolRef SecondSym) {
  1940. return EquivalenceClass::areEqual(State, find(State, FirstSym),
  1941. find(State, SecondSym));
  1942. }
  1943. inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
  1944. EquivalenceClass First,
  1945. EquivalenceClass Second) {
  1946. // The same equivalence class => symbols are equal.
  1947. if (First == Second)
  1948. return true;
  1949. // Let's check if we know anything about these two classes being not equal to
  1950. // each other.
  1951. ClassSet DisequalToFirst = First.getDisequalClasses(State);
  1952. if (DisequalToFirst.contains(Second))
  1953. return false;
  1954. // It is not clear.
  1955. return llvm::None;
  1956. }
  1957. LLVM_NODISCARD ProgramStateRef
  1958. EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
  1959. SymbolSet ClsMembers = getClassMembers(State);
  1960. assert(ClsMembers.contains(Old));
  1961. // We don't remove `Old`'s Sym->Class relation for two reasons:
  1962. // 1) This way constraints for the old symbol can still be found via it's
  1963. // equivalence class that it used to be the member of.
  1964. // 2) Performance and resource reasons. We can spare one removal and thus one
  1965. // additional tree in the forest of `ClassMap`.
  1966. // Remove `Old`'s Class->Sym relation.
  1967. SymbolSet::Factory &F = getMembersFactory(State);
  1968. ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
  1969. ClsMembers = F.remove(ClsMembers, Old);
  1970. // Ensure another precondition of the removeMember function (we can check
  1971. // this only with isEmpty, thus we have to do the remove first).
  1972. assert(!ClsMembers.isEmpty() &&
  1973. "Class should have had at least two members before member removal");
  1974. // Overwrite the existing members assigned to this class.
  1975. ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
  1976. ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
  1977. State = State->set<ClassMembers>(ClassMembersMap);
  1978. return State;
  1979. }
  1980. // Re-evaluate an SVal with top-level `State->assume` logic.
  1981. LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
  1982. const RangeSet *Constraint,
  1983. SVal TheValue) {
  1984. if (!Constraint)
  1985. return State;
  1986. const auto DefinedVal = TheValue.castAs<DefinedSVal>();
  1987. // If the SVal is 0, we can simply interpret that as `false`.
  1988. if (Constraint->encodesFalseRange())
  1989. return State->assume(DefinedVal, false);
  1990. // If the constraint does not encode 0 then we can interpret that as `true`
  1991. // AND as a Range(Set).
  1992. if (Constraint->encodesTrueRange()) {
  1993. State = State->assume(DefinedVal, true);
  1994. if (!State)
  1995. return nullptr;
  1996. // Fall through, re-assume based on the range values as well.
  1997. }
  1998. // Overestimate the individual Ranges with the RangeSet' lowest and
  1999. // highest values.
  2000. return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
  2001. Constraint->getMaxValue(), true);
  2002. }
  2003. // Iterate over all symbols and try to simplify them. Once a symbol is
  2004. // simplified then we check if we can merge the simplified symbol's equivalence
  2005. // class to this class. This way, we simplify not just the symbols but the
  2006. // classes as well: we strive to keep the number of the classes to be the
  2007. // absolute minimum.
  2008. LLVM_NODISCARD ProgramStateRef
  2009. EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
  2010. ProgramStateRef State, EquivalenceClass Class) {
  2011. SymbolSet ClassMembers = Class.getClassMembers(State);
  2012. for (const SymbolRef &MemberSym : ClassMembers) {
  2013. const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
  2014. const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
  2015. // The symbol is collapsed to a constant, check if the current State is
  2016. // still feasible.
  2017. if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
  2018. const llvm::APSInt &SV = CI->getValue();
  2019. const RangeSet *ClassConstraint = getConstraint(State, Class);
  2020. // We have found a contradiction.
  2021. if (ClassConstraint && !ClassConstraint->contains(SV))
  2022. return nullptr;
  2023. }
  2024. if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
  2025. // The simplified symbol should be the member of the original Class,
  2026. // however, it might be in another existing class at the moment. We
  2027. // have to merge these classes.
  2028. ProgramStateRef OldState = State;
  2029. State = merge(F, State, MemberSym, SimplifiedMemberSym);
  2030. if (!State)
  2031. return nullptr;
  2032. // No state change, no merge happened actually.
  2033. if (OldState == State)
  2034. continue;
  2035. assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
  2036. // Remove the old and more complex symbol.
  2037. State = find(State, MemberSym).removeMember(State, MemberSym);
  2038. // Query the class constraint again b/c that may have changed during the
  2039. // merge above.
  2040. const RangeSet *ClassConstraint = getConstraint(State, Class);
  2041. // Re-evaluate an SVal with top-level `State->assume`, this ignites
  2042. // a RECURSIVE algorithm that will reach a FIXPOINT.
  2043. //
  2044. // About performance and complexity: Let us assume that in a State we
  2045. // have N non-trivial equivalence classes and that all constraints and
  2046. // disequality info is related to non-trivial classes. In the worst case,
  2047. // we can simplify only one symbol of one class in each iteration. The
  2048. // number of symbols in one class cannot grow b/c we replace the old
  2049. // symbol with the simplified one. Also, the number of the equivalence
  2050. // classes can decrease only, b/c the algorithm does a merge operation
  2051. // optionally. We need N iterations in this case to reach the fixpoint.
  2052. // Thus, the steps needed to be done in the worst case is proportional to
  2053. // N*N.
  2054. //
  2055. // This worst case scenario can be extended to that case when we have
  2056. // trivial classes in the constraints and in the disequality map. This
  2057. // case can be reduced to the case with a State where there are only
  2058. // non-trivial classes. This is because a merge operation on two trivial
  2059. // classes results in one non-trivial class.
  2060. State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
  2061. if (!State)
  2062. return nullptr;
  2063. }
  2064. }
  2065. return State;
  2066. }
  2067. inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
  2068. SymbolRef Sym) {
  2069. return find(State, Sym).getDisequalClasses(State);
  2070. }
  2071. inline ClassSet
  2072. EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
  2073. return getDisequalClasses(State->get<DisequalityMap>(),
  2074. State->get_context<ClassSet>());
  2075. }
  2076. inline ClassSet
  2077. EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
  2078. ClassSet::Factory &Factory) const {
  2079. if (const ClassSet *DisequalClasses = Map.lookup(*this))
  2080. return *DisequalClasses;
  2081. return Factory.getEmptySet();
  2082. }
  2083. bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
  2084. ClassMembersTy Members = State->get<ClassMembers>();
  2085. for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
  2086. for (SymbolRef Member : ClassMembersPair.second) {
  2087. // Every member of the class should have a mapping back to the class.
  2088. if (find(State, Member) == ClassMembersPair.first) {
  2089. continue;
  2090. }
  2091. return false;
  2092. }
  2093. }
  2094. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2095. for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
  2096. EquivalenceClass Class = DisequalityInfo.first;
  2097. ClassSet DisequalClasses = DisequalityInfo.second;
  2098. // There is no use in keeping empty sets in the map.
  2099. if (DisequalClasses.isEmpty())
  2100. return false;
  2101. // Disequality is symmetrical, i.e. for every Class A and B that A != B,
  2102. // B != A should also be true.
  2103. for (EquivalenceClass DisequalClass : DisequalClasses) {
  2104. const ClassSet *DisequalToDisequalClasses =
  2105. Disequalities.lookup(DisequalClass);
  2106. // It should be a set of at least one element: Class
  2107. if (!DisequalToDisequalClasses ||
  2108. !DisequalToDisequalClasses->contains(Class))
  2109. return false;
  2110. }
  2111. }
  2112. return true;
  2113. }
  2114. //===----------------------------------------------------------------------===//
  2115. // RangeConstraintManager implementation
  2116. //===----------------------------------------------------------------------===//
  2117. bool RangeConstraintManager::canReasonAbout(SVal X) const {
  2118. Optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
  2119. if (SymVal && SymVal->isExpression()) {
  2120. const SymExpr *SE = SymVal->getSymbol();
  2121. if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
  2122. switch (SIE->getOpcode()) {
  2123. // We don't reason yet about bitwise-constraints on symbolic values.
  2124. case BO_And:
  2125. case BO_Or:
  2126. case BO_Xor:
  2127. return false;
  2128. // We don't reason yet about these arithmetic constraints on
  2129. // symbolic values.
  2130. case BO_Mul:
  2131. case BO_Div:
  2132. case BO_Rem:
  2133. case BO_Shl:
  2134. case BO_Shr:
  2135. return false;
  2136. // All other cases.
  2137. default:
  2138. return true;
  2139. }
  2140. }
  2141. if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
  2142. // FIXME: Handle <=> here.
  2143. if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
  2144. BinaryOperator::isRelationalOp(SSE->getOpcode())) {
  2145. // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
  2146. // We've recently started producing Loc <> NonLoc comparisons (that
  2147. // result from casts of one of the operands between eg. intptr_t and
  2148. // void *), but we can't reason about them yet.
  2149. if (Loc::isLocType(SSE->getLHS()->getType())) {
  2150. return Loc::isLocType(SSE->getRHS()->getType());
  2151. }
  2152. }
  2153. }
  2154. return false;
  2155. }
  2156. return true;
  2157. }
  2158. ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
  2159. SymbolRef Sym) {
  2160. const RangeSet *Ranges = getConstraint(State, Sym);
  2161. // If we don't have any information about this symbol, it's underconstrained.
  2162. if (!Ranges)
  2163. return ConditionTruthVal();
  2164. // If we have a concrete value, see if it's zero.
  2165. if (const llvm::APSInt *Value = Ranges->getConcreteValue())
  2166. return *Value == 0;
  2167. BasicValueFactory &BV = getBasicVals();
  2168. APSIntType IntType = BV.getAPSIntType(Sym->getType());
  2169. llvm::APSInt Zero = IntType.getZeroValue();
  2170. // Check if zero is in the set of possible values.
  2171. if (!Ranges->contains(Zero))
  2172. return false;
  2173. // Zero is a possible value, but it is not the /only/ possible value.
  2174. return ConditionTruthVal();
  2175. }
  2176. const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
  2177. SymbolRef Sym) const {
  2178. const RangeSet *T = getConstraint(St, Sym);
  2179. return T ? T->getConcreteValue() : nullptr;
  2180. }
  2181. //===----------------------------------------------------------------------===//
  2182. // Remove dead symbols from existing constraints
  2183. //===----------------------------------------------------------------------===//
  2184. /// Scan all symbols referenced by the constraints. If the symbol is not alive
  2185. /// as marked in LSymbols, mark it as dead in DSymbols.
  2186. ProgramStateRef
  2187. RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
  2188. SymbolReaper &SymReaper) {
  2189. ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
  2190. ClassMembersTy NewClassMembersMap = ClassMembersMap;
  2191. ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
  2192. SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
  2193. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2194. ConstraintRangeTy NewConstraints = Constraints;
  2195. ConstraintRangeTy::Factory &ConstraintFactory =
  2196. State->get_context<ConstraintRange>();
  2197. ClassMapTy Map = State->get<ClassMap>();
  2198. ClassMapTy NewMap = Map;
  2199. ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
  2200. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2201. DisequalityMapTy::Factory &DisequalityFactory =
  2202. State->get_context<DisequalityMap>();
  2203. ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
  2204. bool ClassMapChanged = false;
  2205. bool MembersMapChanged = false;
  2206. bool ConstraintMapChanged = false;
  2207. bool DisequalitiesChanged = false;
  2208. auto removeDeadClass = [&](EquivalenceClass Class) {
  2209. // Remove associated constraint ranges.
  2210. Constraints = ConstraintFactory.remove(Constraints, Class);
  2211. ConstraintMapChanged = true;
  2212. // Update disequality information to not hold any information on the
  2213. // removed class.
  2214. ClassSet DisequalClasses =
  2215. Class.getDisequalClasses(Disequalities, ClassSetFactory);
  2216. if (!DisequalClasses.isEmpty()) {
  2217. for (EquivalenceClass DisequalClass : DisequalClasses) {
  2218. ClassSet DisequalToDisequalSet =
  2219. DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
  2220. // DisequalToDisequalSet is guaranteed to be non-empty for consistent
  2221. // disequality info.
  2222. assert(!DisequalToDisequalSet.isEmpty());
  2223. ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
  2224. // No need in keeping an empty set.
  2225. if (NewSet.isEmpty()) {
  2226. Disequalities =
  2227. DisequalityFactory.remove(Disequalities, DisequalClass);
  2228. } else {
  2229. Disequalities =
  2230. DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
  2231. }
  2232. }
  2233. // Remove the data for the class
  2234. Disequalities = DisequalityFactory.remove(Disequalities, Class);
  2235. DisequalitiesChanged = true;
  2236. }
  2237. };
  2238. // 1. Let's see if dead symbols are trivial and have associated constraints.
  2239. for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
  2240. Constraints) {
  2241. EquivalenceClass Class = ClassConstraintPair.first;
  2242. if (Class.isTriviallyDead(State, SymReaper)) {
  2243. // If this class is trivial, we can remove its constraints right away.
  2244. removeDeadClass(Class);
  2245. }
  2246. }
  2247. // 2. We don't need to track classes for dead symbols.
  2248. for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
  2249. SymbolRef Sym = SymbolClassPair.first;
  2250. if (SymReaper.isDead(Sym)) {
  2251. ClassMapChanged = true;
  2252. NewMap = ClassFactory.remove(NewMap, Sym);
  2253. }
  2254. }
  2255. // 3. Remove dead members from classes and remove dead non-trivial classes
  2256. // and their constraints.
  2257. for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
  2258. ClassMembersMap) {
  2259. EquivalenceClass Class = ClassMembersPair.first;
  2260. SymbolSet LiveMembers = ClassMembersPair.second;
  2261. bool MembersChanged = false;
  2262. for (SymbolRef Member : ClassMembersPair.second) {
  2263. if (SymReaper.isDead(Member)) {
  2264. MembersChanged = true;
  2265. LiveMembers = SetFactory.remove(LiveMembers, Member);
  2266. }
  2267. }
  2268. // Check if the class changed.
  2269. if (!MembersChanged)
  2270. continue;
  2271. MembersMapChanged = true;
  2272. if (LiveMembers.isEmpty()) {
  2273. // The class is dead now, we need to wipe it out of the members map...
  2274. NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
  2275. // ...and remove all of its constraints.
  2276. removeDeadClass(Class);
  2277. } else {
  2278. // We need to change the members associated with the class.
  2279. NewClassMembersMap =
  2280. EMFactory.add(NewClassMembersMap, Class, LiveMembers);
  2281. }
  2282. }
  2283. // 4. Update the state with new maps.
  2284. //
  2285. // Here we try to be humble and update a map only if it really changed.
  2286. if (ClassMapChanged)
  2287. State = State->set<ClassMap>(NewMap);
  2288. if (MembersMapChanged)
  2289. State = State->set<ClassMembers>(NewClassMembersMap);
  2290. if (ConstraintMapChanged)
  2291. State = State->set<ConstraintRange>(Constraints);
  2292. if (DisequalitiesChanged)
  2293. State = State->set<DisequalityMap>(Disequalities);
  2294. assert(EquivalenceClass::isClassDataConsistent(State));
  2295. return State;
  2296. }
  2297. RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
  2298. SymbolRef Sym) {
  2299. return SymbolicRangeInferrer::inferRange(F, State, Sym);
  2300. }
  2301. ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
  2302. SymbolRef Sym,
  2303. RangeSet Range) {
  2304. return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
  2305. }
  2306. //===------------------------------------------------------------------------===
  2307. // assumeSymX methods: protected interface for RangeConstraintManager.
  2308. //===------------------------------------------------------------------------===/
  2309. // The syntax for ranges below is mathematical, using [x, y] for closed ranges
  2310. // and (x, y) for open ranges. These ranges are modular, corresponding with
  2311. // a common treatment of C integer overflow. This means that these methods
  2312. // do not have to worry about overflow; RangeSet::Intersect can handle such a
  2313. // "wraparound" range.
  2314. // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
  2315. // UINT_MAX, 0, 1, and 2.
  2316. ProgramStateRef
  2317. RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
  2318. const llvm::APSInt &Int,
  2319. const llvm::APSInt &Adjustment) {
  2320. // Before we do any real work, see if the value can even show up.
  2321. APSIntType AdjustmentType(Adjustment);
  2322. if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
  2323. return St;
  2324. llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
  2325. RangeSet New = getRange(St, Sym);
  2326. New = F.deletePoint(New, Point);
  2327. return setRange(St, Sym, New);
  2328. }
  2329. ProgramStateRef
  2330. RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
  2331. const llvm::APSInt &Int,
  2332. const llvm::APSInt &Adjustment) {
  2333. // Before we do any real work, see if the value can even show up.
  2334. APSIntType AdjustmentType(Adjustment);
  2335. if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
  2336. return nullptr;
  2337. // [Int-Adjustment, Int-Adjustment]
  2338. llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
  2339. RangeSet New = getRange(St, Sym);
  2340. New = F.intersect(New, AdjInt);
  2341. return setRange(St, Sym, New);
  2342. }
  2343. RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
  2344. SymbolRef Sym,
  2345. const llvm::APSInt &Int,
  2346. const llvm::APSInt &Adjustment) {
  2347. // Before we do any real work, see if the value can even show up.
  2348. APSIntType AdjustmentType(Adjustment);
  2349. switch (AdjustmentType.testInRange(Int, true)) {
  2350. case APSIntType::RTR_Below:
  2351. return F.getEmptySet();
  2352. case APSIntType::RTR_Within:
  2353. break;
  2354. case APSIntType::RTR_Above:
  2355. return getRange(St, Sym);
  2356. }
  2357. // Special case for Int == Min. This is always false.
  2358. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2359. llvm::APSInt Min = AdjustmentType.getMinValue();
  2360. if (ComparisonVal == Min)
  2361. return F.getEmptySet();
  2362. llvm::APSInt Lower = Min - Adjustment;
  2363. llvm::APSInt Upper = ComparisonVal - Adjustment;
  2364. --Upper;
  2365. RangeSet Result = getRange(St, Sym);
  2366. return F.intersect(Result, Lower, Upper);
  2367. }
  2368. ProgramStateRef
  2369. RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
  2370. const llvm::APSInt &Int,
  2371. const llvm::APSInt &Adjustment) {
  2372. RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
  2373. return setRange(St, Sym, New);
  2374. }
  2375. RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
  2376. SymbolRef Sym,
  2377. const llvm::APSInt &Int,
  2378. const llvm::APSInt &Adjustment) {
  2379. // Before we do any real work, see if the value can even show up.
  2380. APSIntType AdjustmentType(Adjustment);
  2381. switch (AdjustmentType.testInRange(Int, true)) {
  2382. case APSIntType::RTR_Below:
  2383. return getRange(St, Sym);
  2384. case APSIntType::RTR_Within:
  2385. break;
  2386. case APSIntType::RTR_Above:
  2387. return F.getEmptySet();
  2388. }
  2389. // Special case for Int == Max. This is always false.
  2390. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2391. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2392. if (ComparisonVal == Max)
  2393. return F.getEmptySet();
  2394. llvm::APSInt Lower = ComparisonVal - Adjustment;
  2395. llvm::APSInt Upper = Max - Adjustment;
  2396. ++Lower;
  2397. RangeSet SymRange = getRange(St, Sym);
  2398. return F.intersect(SymRange, Lower, Upper);
  2399. }
  2400. ProgramStateRef
  2401. RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
  2402. const llvm::APSInt &Int,
  2403. const llvm::APSInt &Adjustment) {
  2404. RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
  2405. return setRange(St, Sym, New);
  2406. }
  2407. RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
  2408. SymbolRef Sym,
  2409. const llvm::APSInt &Int,
  2410. const llvm::APSInt &Adjustment) {
  2411. // Before we do any real work, see if the value can even show up.
  2412. APSIntType AdjustmentType(Adjustment);
  2413. switch (AdjustmentType.testInRange(Int, true)) {
  2414. case APSIntType::RTR_Below:
  2415. return getRange(St, Sym);
  2416. case APSIntType::RTR_Within:
  2417. break;
  2418. case APSIntType::RTR_Above:
  2419. return F.getEmptySet();
  2420. }
  2421. // Special case for Int == Min. This is always feasible.
  2422. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2423. llvm::APSInt Min = AdjustmentType.getMinValue();
  2424. if (ComparisonVal == Min)
  2425. return getRange(St, Sym);
  2426. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2427. llvm::APSInt Lower = ComparisonVal - Adjustment;
  2428. llvm::APSInt Upper = Max - Adjustment;
  2429. RangeSet SymRange = getRange(St, Sym);
  2430. return F.intersect(SymRange, Lower, Upper);
  2431. }
  2432. ProgramStateRef
  2433. RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
  2434. const llvm::APSInt &Int,
  2435. const llvm::APSInt &Adjustment) {
  2436. RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
  2437. return setRange(St, Sym, New);
  2438. }
  2439. RangeSet
  2440. RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
  2441. const llvm::APSInt &Int,
  2442. const llvm::APSInt &Adjustment) {
  2443. // Before we do any real work, see if the value can even show up.
  2444. APSIntType AdjustmentType(Adjustment);
  2445. switch (AdjustmentType.testInRange(Int, true)) {
  2446. case APSIntType::RTR_Below:
  2447. return F.getEmptySet();
  2448. case APSIntType::RTR_Within:
  2449. break;
  2450. case APSIntType::RTR_Above:
  2451. return RS();
  2452. }
  2453. // Special case for Int == Max. This is always feasible.
  2454. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2455. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2456. if (ComparisonVal == Max)
  2457. return RS();
  2458. llvm::APSInt Min = AdjustmentType.getMinValue();
  2459. llvm::APSInt Lower = Min - Adjustment;
  2460. llvm::APSInt Upper = ComparisonVal - Adjustment;
  2461. RangeSet Default = RS();
  2462. return F.intersect(Default, Lower, Upper);
  2463. }
  2464. RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
  2465. SymbolRef Sym,
  2466. const llvm::APSInt &Int,
  2467. const llvm::APSInt &Adjustment) {
  2468. return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
  2469. }
  2470. ProgramStateRef
  2471. RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
  2472. const llvm::APSInt &Int,
  2473. const llvm::APSInt &Adjustment) {
  2474. RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
  2475. return setRange(St, Sym, New);
  2476. }
  2477. ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
  2478. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  2479. const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
  2480. RangeSet New = getSymGERange(State, Sym, From, Adjustment);
  2481. if (New.isEmpty())
  2482. return nullptr;
  2483. RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
  2484. return setRange(State, Sym, Out);
  2485. }
  2486. ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
  2487. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  2488. const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
  2489. RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
  2490. RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
  2491. RangeSet New(F.add(RangeLT, RangeGT));
  2492. return setRange(State, Sym, New);
  2493. }
  2494. //===----------------------------------------------------------------------===//
  2495. // Pretty-printing.
  2496. //===----------------------------------------------------------------------===//
  2497. void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
  2498. const char *NL, unsigned int Space,
  2499. bool IsDot) const {
  2500. printConstraints(Out, State, NL, Space, IsDot);
  2501. printEquivalenceClasses(Out, State, NL, Space, IsDot);
  2502. printDisequalities(Out, State, NL, Space, IsDot);
  2503. }
  2504. static std::string toString(const SymbolRef &Sym) {
  2505. std::string S;
  2506. llvm::raw_string_ostream O(S);
  2507. Sym->dumpToStream(O);
  2508. return O.str();
  2509. }
  2510. void RangeConstraintManager::printConstraints(raw_ostream &Out,
  2511. ProgramStateRef State,
  2512. const char *NL,
  2513. unsigned int Space,
  2514. bool IsDot) const {
  2515. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2516. Indent(Out, Space, IsDot) << "\"constraints\": ";
  2517. if (Constraints.isEmpty()) {
  2518. Out << "null," << NL;
  2519. return;
  2520. }
  2521. std::map<std::string, RangeSet> OrderedConstraints;
  2522. for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
  2523. SymbolSet ClassMembers = P.first.getClassMembers(State);
  2524. for (const SymbolRef &ClassMember : ClassMembers) {
  2525. bool insertion_took_place;
  2526. std::tie(std::ignore, insertion_took_place) =
  2527. OrderedConstraints.insert({toString(ClassMember), P.second});
  2528. assert(insertion_took_place &&
  2529. "two symbols should not have the same dump");
  2530. }
  2531. }
  2532. ++Space;
  2533. Out << '[' << NL;
  2534. bool First = true;
  2535. for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
  2536. if (First) {
  2537. First = false;
  2538. } else {
  2539. Out << ',';
  2540. Out << NL;
  2541. }
  2542. Indent(Out, Space, IsDot)
  2543. << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
  2544. P.second.dump(Out);
  2545. Out << "\" }";
  2546. }
  2547. Out << NL;
  2548. --Space;
  2549. Indent(Out, Space, IsDot) << "]," << NL;
  2550. }
  2551. static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
  2552. SymbolSet ClassMembers = Class.getClassMembers(State);
  2553. llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
  2554. ClassMembers.end());
  2555. llvm::sort(ClassMembersSorted,
  2556. [](const SymbolRef &LHS, const SymbolRef &RHS) {
  2557. return toString(LHS) < toString(RHS);
  2558. });
  2559. bool FirstMember = true;
  2560. std::string Str;
  2561. llvm::raw_string_ostream Out(Str);
  2562. Out << "[ ";
  2563. for (SymbolRef ClassMember : ClassMembersSorted) {
  2564. if (FirstMember)
  2565. FirstMember = false;
  2566. else
  2567. Out << ", ";
  2568. Out << "\"" << ClassMember << "\"";
  2569. }
  2570. Out << " ]";
  2571. return Out.str();
  2572. }
  2573. void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
  2574. ProgramStateRef State,
  2575. const char *NL,
  2576. unsigned int Space,
  2577. bool IsDot) const {
  2578. ClassMembersTy Members = State->get<ClassMembers>();
  2579. Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
  2580. if (Members.isEmpty()) {
  2581. Out << "null," << NL;
  2582. return;
  2583. }
  2584. std::set<std::string> MembersStr;
  2585. for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
  2586. MembersStr.insert(toString(State, ClassToSymbolSet.first));
  2587. ++Space;
  2588. Out << '[' << NL;
  2589. bool FirstClass = true;
  2590. for (const std::string &Str : MembersStr) {
  2591. if (FirstClass) {
  2592. FirstClass = false;
  2593. } else {
  2594. Out << ',';
  2595. Out << NL;
  2596. }
  2597. Indent(Out, Space, IsDot);
  2598. Out << Str;
  2599. }
  2600. Out << NL;
  2601. --Space;
  2602. Indent(Out, Space, IsDot) << "]," << NL;
  2603. }
  2604. void RangeConstraintManager::printDisequalities(raw_ostream &Out,
  2605. ProgramStateRef State,
  2606. const char *NL,
  2607. unsigned int Space,
  2608. bool IsDot) const {
  2609. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2610. Indent(Out, Space, IsDot) << "\"disequality_info\": ";
  2611. if (Disequalities.isEmpty()) {
  2612. Out << "null," << NL;
  2613. return;
  2614. }
  2615. // Transform the disequality info to an ordered map of
  2616. // [string -> (ordered set of strings)]
  2617. using EqClassesStrTy = std::set<std::string>;
  2618. using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
  2619. DisequalityInfoStrTy DisequalityInfoStr;
  2620. for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
  2621. EquivalenceClass Class = ClassToDisEqSet.first;
  2622. ClassSet DisequalClasses = ClassToDisEqSet.second;
  2623. EqClassesStrTy MembersStr;
  2624. for (EquivalenceClass DisEqClass : DisequalClasses)
  2625. MembersStr.insert(toString(State, DisEqClass));
  2626. DisequalityInfoStr.insert({toString(State, Class), MembersStr});
  2627. }
  2628. ++Space;
  2629. Out << '[' << NL;
  2630. bool FirstClass = true;
  2631. for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
  2632. DisequalityInfoStr) {
  2633. const std::string &Class = ClassToDisEqSet.first;
  2634. if (FirstClass) {
  2635. FirstClass = false;
  2636. } else {
  2637. Out << ',';
  2638. Out << NL;
  2639. }
  2640. Indent(Out, Space, IsDot) << "{" << NL;
  2641. unsigned int DisEqSpace = Space + 1;
  2642. Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
  2643. Out << Class;
  2644. const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
  2645. if (!DisequalClasses.empty()) {
  2646. Out << "," << NL;
  2647. Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
  2648. unsigned int DisEqClassSpace = DisEqSpace + 1;
  2649. Indent(Out, DisEqClassSpace, IsDot);
  2650. bool FirstDisEqClass = true;
  2651. for (const std::string &DisEqClass : DisequalClasses) {
  2652. if (FirstDisEqClass) {
  2653. FirstDisEqClass = false;
  2654. } else {
  2655. Out << ',' << NL;
  2656. Indent(Out, DisEqClassSpace, IsDot);
  2657. }
  2658. Out << DisEqClass;
  2659. }
  2660. Out << "]" << NL;
  2661. }
  2662. Indent(Out, Space, IsDot) << "}";
  2663. }
  2664. Out << NL;
  2665. --Space;
  2666. Indent(Out, Space, IsDot) << "]," << NL;
  2667. }