RangeConstraintManager.cpp 130 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436
  1. //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines RangeConstraintManager, a class that tracks simple
  10. // equality and inequality constraints on symbolic values of ProgramState.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/Basic/JsonSupport.h"
  14. #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
  15. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
  16. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
  17. #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
  18. #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
  19. #include "llvm/ADT/FoldingSet.h"
  20. #include "llvm/ADT/ImmutableSet.h"
  21. #include "llvm/ADT/STLExtras.h"
  22. #include "llvm/ADT/SmallSet.h"
  23. #include "llvm/ADT/StringExtras.h"
  24. #include "llvm/Support/Compiler.h"
  25. #include "llvm/Support/raw_ostream.h"
  26. #include <algorithm>
  27. #include <iterator>
  28. #include <optional>
  29. using namespace clang;
  30. using namespace ento;
  31. // This class can be extended with other tables which will help to reason
  32. // about ranges more precisely.
  33. class OperatorRelationsTable {
  34. static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
  35. BO_GE < BO_EQ && BO_EQ < BO_NE,
  36. "This class relies on operators order. Rework it otherwise.");
  37. public:
  38. enum TriStateKind {
  39. False = 0,
  40. True,
  41. Unknown,
  42. };
  43. private:
  44. // CmpOpTable holds states which represent the corresponding range for
  45. // branching an exploded graph. We can reason about the branch if there is
  46. // a previously known fact of the existence of a comparison expression with
  47. // operands used in the current expression.
  48. // E.g. assuming (x < y) is true that means (x != y) is surely true.
  49. // if (x previous_operation y) // < | != | >
  50. // if (x operation y) // != | > | <
  51. // tristate // True | Unknown | False
  52. //
  53. // CmpOpTable represents next:
  54. // __|< |> |<=|>=|==|!=|UnknownX2|
  55. // < |1 |0 |* |0 |0 |* |1 |
  56. // > |0 |1 |0 |* |0 |* |1 |
  57. // <=|1 |0 |1 |* |1 |* |0 |
  58. // >=|0 |1 |* |1 |1 |* |0 |
  59. // ==|0 |0 |* |* |1 |0 |1 |
  60. // !=|1 |1 |* |* |0 |1 |0 |
  61. //
  62. // Columns stands for a previous operator.
  63. // Rows stands for a current operator.
  64. // Each row has exactly two `Unknown` cases.
  65. // UnknownX2 means that both `Unknown` previous operators are met in code,
  66. // and there is a special column for that, for example:
  67. // if (x >= y)
  68. // if (x != y)
  69. // if (x <= y)
  70. // False only
  71. static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
  72. const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
  73. // < > <= >= == != UnknownX2
  74. {True, False, Unknown, False, False, Unknown, True}, // <
  75. {False, True, False, Unknown, False, Unknown, True}, // >
  76. {True, False, True, Unknown, True, Unknown, False}, // <=
  77. {False, True, Unknown, True, True, Unknown, False}, // >=
  78. {False, False, Unknown, Unknown, True, False, True}, // ==
  79. {True, True, Unknown, Unknown, False, True, False}, // !=
  80. };
  81. static size_t getIndexFromOp(BinaryOperatorKind OP) {
  82. return static_cast<size_t>(OP - BO_LT);
  83. }
  84. public:
  85. constexpr size_t getCmpOpCount() const { return CmpOpCount; }
  86. static BinaryOperatorKind getOpFromIndex(size_t Index) {
  87. return static_cast<BinaryOperatorKind>(Index + BO_LT);
  88. }
  89. TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
  90. BinaryOperatorKind QueriedOP) const {
  91. return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
  92. }
  93. TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
  94. return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
  95. }
  96. };
  97. //===----------------------------------------------------------------------===//
  98. // RangeSet implementation
  99. //===----------------------------------------------------------------------===//
  100. RangeSet::ContainerType RangeSet::Factory::EmptySet{};
  101. RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
  102. ContainerType Result;
  103. Result.reserve(LHS.size() + RHS.size());
  104. std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
  105. std::back_inserter(Result));
  106. return makePersistent(std::move(Result));
  107. }
  108. RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
  109. ContainerType Result;
  110. Result.reserve(Original.size() + 1);
  111. const_iterator Lower = llvm::lower_bound(Original, Element);
  112. Result.insert(Result.end(), Original.begin(), Lower);
  113. Result.push_back(Element);
  114. Result.insert(Result.end(), Lower, Original.end());
  115. return makePersistent(std::move(Result));
  116. }
  117. RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
  118. return add(Original, Range(Point));
  119. }
  120. RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
  121. ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
  122. return makePersistent(std::move(Result));
  123. }
  124. RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
  125. ContainerType Result;
  126. Result.push_back(R);
  127. Result = unite(*Original.Impl, Result);
  128. return makePersistent(std::move(Result));
  129. }
  130. RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
  131. return unite(Original, Range(ValueFactory.getValue(Point)));
  132. }
  133. RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
  134. llvm::APSInt To) {
  135. return unite(Original,
  136. Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
  137. }
  138. template <typename T>
  139. void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
  140. std::swap(First, Second);
  141. std::swap(FirstEnd, SecondEnd);
  142. }
  143. RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
  144. const ContainerType &RHS) {
  145. if (LHS.empty())
  146. return RHS;
  147. if (RHS.empty())
  148. return LHS;
  149. using llvm::APSInt;
  150. using iterator = ContainerType::const_iterator;
  151. iterator First = LHS.begin();
  152. iterator FirstEnd = LHS.end();
  153. iterator Second = RHS.begin();
  154. iterator SecondEnd = RHS.end();
  155. APSIntType Ty = APSIntType(First->From());
  156. const APSInt Min = Ty.getMinValue();
  157. // Handle a corner case first when both range sets start from MIN.
  158. // This helps to avoid complicated conditions below. Specifically, this
  159. // particular check for `MIN` is not needed in the loop below every time
  160. // when we do `Second->From() - One` operation.
  161. if (Min == First->From() && Min == Second->From()) {
  162. if (First->To() > Second->To()) {
  163. // [ First ]--->
  164. // [ Second ]----->
  165. // MIN^
  166. // The Second range is entirely inside the First one.
  167. // Check if Second is the last in its RangeSet.
  168. if (++Second == SecondEnd)
  169. // [ First ]--[ First + 1 ]--->
  170. // [ Second ]--------------------->
  171. // MIN^
  172. // The Union is equal to First's RangeSet.
  173. return LHS;
  174. } else {
  175. // case 1: [ First ]----->
  176. // case 2: [ First ]--->
  177. // [ Second ]--->
  178. // MIN^
  179. // The First range is entirely inside or equal to the Second one.
  180. // Check if First is the last in its RangeSet.
  181. if (++First == FirstEnd)
  182. // [ First ]----------------------->
  183. // [ Second ]--[ Second + 1 ]---->
  184. // MIN^
  185. // The Union is equal to Second's RangeSet.
  186. return RHS;
  187. }
  188. }
  189. const APSInt One = Ty.getValue(1);
  190. ContainerType Result;
  191. // This is called when there are no ranges left in one of the ranges.
  192. // Append the rest of the ranges from another range set to the Result
  193. // and return with that.
  194. const auto AppendTheRest = [&Result](iterator I, iterator E) {
  195. Result.append(I, E);
  196. return Result;
  197. };
  198. while (true) {
  199. // We want to keep the following invariant at all times:
  200. // ---[ First ------>
  201. // -----[ Second --->
  202. if (First->From() > Second->From())
  203. swapIterators(First, FirstEnd, Second, SecondEnd);
  204. // The Union definitely starts with First->From().
  205. // ----------[ First ------>
  206. // ------------[ Second --->
  207. // ----------[ Union ------>
  208. // UnionStart^
  209. const llvm::APSInt &UnionStart = First->From();
  210. // Loop where the invariant holds.
  211. while (true) {
  212. // Skip all enclosed ranges.
  213. // ---[ First ]--->
  214. // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
  215. while (First->To() >= Second->To()) {
  216. // Check if Second is the last in its RangeSet.
  217. if (++Second == SecondEnd) {
  218. // Append the Union.
  219. // ---[ Union ]--->
  220. // -----[ Second ]----->
  221. // --------[ First ]--->
  222. // UnionEnd^
  223. Result.emplace_back(UnionStart, First->To());
  224. // ---[ Union ]----------------->
  225. // --------------[ First + 1]--->
  226. // Append all remaining ranges from the First's RangeSet.
  227. return AppendTheRest(++First, FirstEnd);
  228. }
  229. }
  230. // Check if First and Second are disjoint. It means that we find
  231. // the end of the Union. Exit the loop and append the Union.
  232. // ---[ First ]=------------->
  233. // ------------=[ Second ]--->
  234. // ----MinusOne^
  235. if (First->To() < Second->From() - One)
  236. break;
  237. // First is entirely inside the Union. Go next.
  238. // ---[ Union ----------->
  239. // ---- [ First ]-------->
  240. // -------[ Second ]----->
  241. // Check if First is the last in its RangeSet.
  242. if (++First == FirstEnd) {
  243. // Append the Union.
  244. // ---[ Union ]--->
  245. // -----[ First ]------->
  246. // --------[ Second ]--->
  247. // UnionEnd^
  248. Result.emplace_back(UnionStart, Second->To());
  249. // ---[ Union ]------------------>
  250. // --------------[ Second + 1]--->
  251. // Append all remaining ranges from the Second's RangeSet.
  252. return AppendTheRest(++Second, SecondEnd);
  253. }
  254. // We know that we are at one of the two cases:
  255. // case 1: --[ First ]--------->
  256. // case 2: ----[ First ]------->
  257. // --------[ Second ]---------->
  258. // In both cases First starts after Second->From().
  259. // Make sure that the loop invariant holds.
  260. swapIterators(First, FirstEnd, Second, SecondEnd);
  261. }
  262. // Here First and Second are disjoint.
  263. // Append the Union.
  264. // ---[ Union ]--------------->
  265. // -----------------[ Second ]--->
  266. // ------[ First ]--------------->
  267. // UnionEnd^
  268. Result.emplace_back(UnionStart, First->To());
  269. // Check if First is the last in its RangeSet.
  270. if (++First == FirstEnd)
  271. // ---[ Union ]--------------->
  272. // --------------[ Second ]--->
  273. // Append all remaining ranges from the Second's RangeSet.
  274. return AppendTheRest(Second, SecondEnd);
  275. }
  276. llvm_unreachable("Normally, we should not reach here");
  277. }
  278. RangeSet RangeSet::Factory::getRangeSet(Range From) {
  279. ContainerType Result;
  280. Result.push_back(From);
  281. return makePersistent(std::move(Result));
  282. }
  283. RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
  284. llvm::FoldingSetNodeID ID;
  285. void *InsertPos;
  286. From.Profile(ID);
  287. ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
  288. if (!Result) {
  289. // It is cheaper to fully construct the resulting range on stack
  290. // and move it to the freshly allocated buffer if we don't have
  291. // a set like this already.
  292. Result = construct(std::move(From));
  293. Cache.InsertNode(Result, InsertPos);
  294. }
  295. return Result;
  296. }
  297. RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
  298. void *Buffer = Arena.Allocate();
  299. return new (Buffer) ContainerType(std::move(From));
  300. }
  301. const llvm::APSInt &RangeSet::getMinValue() const {
  302. assert(!isEmpty());
  303. return begin()->From();
  304. }
  305. const llvm::APSInt &RangeSet::getMaxValue() const {
  306. assert(!isEmpty());
  307. return std::prev(end())->To();
  308. }
  309. bool clang::ento::RangeSet::isUnsigned() const {
  310. assert(!isEmpty());
  311. return begin()->From().isUnsigned();
  312. }
  313. uint32_t clang::ento::RangeSet::getBitWidth() const {
  314. assert(!isEmpty());
  315. return begin()->From().getBitWidth();
  316. }
  317. APSIntType clang::ento::RangeSet::getAPSIntType() const {
  318. assert(!isEmpty());
  319. return APSIntType(begin()->From());
  320. }
  321. bool RangeSet::containsImpl(llvm::APSInt &Point) const {
  322. if (isEmpty() || !pin(Point))
  323. return false;
  324. Range Dummy(Point);
  325. const_iterator It = llvm::upper_bound(*this, Dummy);
  326. if (It == begin())
  327. return false;
  328. return std::prev(It)->Includes(Point);
  329. }
  330. bool RangeSet::pin(llvm::APSInt &Point) const {
  331. APSIntType Type(getMinValue());
  332. if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
  333. return false;
  334. Type.apply(Point);
  335. return true;
  336. }
  337. bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
  338. // This function has nine cases, the cartesian product of range-testing
  339. // both the upper and lower bounds against the symbol's type.
  340. // Each case requires a different pinning operation.
  341. // The function returns false if the described range is entirely outside
  342. // the range of values for the associated symbol.
  343. APSIntType Type(getMinValue());
  344. APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
  345. APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
  346. switch (LowerTest) {
  347. case APSIntType::RTR_Below:
  348. switch (UpperTest) {
  349. case APSIntType::RTR_Below:
  350. // The entire range is outside the symbol's set of possible values.
  351. // If this is a conventionally-ordered range, the state is infeasible.
  352. if (Lower <= Upper)
  353. return false;
  354. // However, if the range wraps around, it spans all possible values.
  355. Lower = Type.getMinValue();
  356. Upper = Type.getMaxValue();
  357. break;
  358. case APSIntType::RTR_Within:
  359. // The range starts below what's possible but ends within it. Pin.
  360. Lower = Type.getMinValue();
  361. Type.apply(Upper);
  362. break;
  363. case APSIntType::RTR_Above:
  364. // The range spans all possible values for the symbol. Pin.
  365. Lower = Type.getMinValue();
  366. Upper = Type.getMaxValue();
  367. break;
  368. }
  369. break;
  370. case APSIntType::RTR_Within:
  371. switch (UpperTest) {
  372. case APSIntType::RTR_Below:
  373. // The range wraps around, but all lower values are not possible.
  374. Type.apply(Lower);
  375. Upper = Type.getMaxValue();
  376. break;
  377. case APSIntType::RTR_Within:
  378. // The range may or may not wrap around, but both limits are valid.
  379. Type.apply(Lower);
  380. Type.apply(Upper);
  381. break;
  382. case APSIntType::RTR_Above:
  383. // The range starts within what's possible but ends above it. Pin.
  384. Type.apply(Lower);
  385. Upper = Type.getMaxValue();
  386. break;
  387. }
  388. break;
  389. case APSIntType::RTR_Above:
  390. switch (UpperTest) {
  391. case APSIntType::RTR_Below:
  392. // The range wraps but is outside the symbol's set of possible values.
  393. return false;
  394. case APSIntType::RTR_Within:
  395. // The range starts above what's possible but ends within it (wrap).
  396. Lower = Type.getMinValue();
  397. Type.apply(Upper);
  398. break;
  399. case APSIntType::RTR_Above:
  400. // The entire range is outside the symbol's set of possible values.
  401. // If this is a conventionally-ordered range, the state is infeasible.
  402. if (Lower <= Upper)
  403. return false;
  404. // However, if the range wraps around, it spans all possible values.
  405. Lower = Type.getMinValue();
  406. Upper = Type.getMaxValue();
  407. break;
  408. }
  409. break;
  410. }
  411. return true;
  412. }
  413. RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
  414. llvm::APSInt Upper) {
  415. if (What.isEmpty() || !What.pin(Lower, Upper))
  416. return getEmptySet();
  417. ContainerType DummyContainer;
  418. if (Lower <= Upper) {
  419. // [Lower, Upper] is a regular range.
  420. //
  421. // Shortcut: check that there is even a possibility of the intersection
  422. // by checking the two following situations:
  423. //
  424. // <---[ What ]---[------]------>
  425. // Lower Upper
  426. // -or-
  427. // <----[------]----[ What ]---->
  428. // Lower Upper
  429. if (What.getMaxValue() < Lower || Upper < What.getMinValue())
  430. return getEmptySet();
  431. DummyContainer.push_back(
  432. Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
  433. } else {
  434. // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
  435. //
  436. // Shortcut: check that there is even a possibility of the intersection
  437. // by checking the following situation:
  438. //
  439. // <------]---[ What ]---[------>
  440. // Upper Lower
  441. if (What.getMaxValue() < Lower && Upper < What.getMinValue())
  442. return getEmptySet();
  443. DummyContainer.push_back(
  444. Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
  445. DummyContainer.push_back(
  446. Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
  447. }
  448. return intersect(*What.Impl, DummyContainer);
  449. }
  450. RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
  451. const RangeSet::ContainerType &RHS) {
  452. ContainerType Result;
  453. Result.reserve(std::max(LHS.size(), RHS.size()));
  454. const_iterator First = LHS.begin(), Second = RHS.begin(),
  455. FirstEnd = LHS.end(), SecondEnd = RHS.end();
  456. // If we ran out of ranges in one set, but not in the other,
  457. // it means that those elements are definitely not in the
  458. // intersection.
  459. while (First != FirstEnd && Second != SecondEnd) {
  460. // We want to keep the following invariant at all times:
  461. //
  462. // ----[ First ---------------------->
  463. // --------[ Second ----------------->
  464. if (Second->From() < First->From())
  465. swapIterators(First, FirstEnd, Second, SecondEnd);
  466. // Loop where the invariant holds:
  467. do {
  468. // Check for the following situation:
  469. //
  470. // ----[ First ]--------------------->
  471. // ---------------[ Second ]--------->
  472. //
  473. // which means that...
  474. if (Second->From() > First->To()) {
  475. // ...First is not in the intersection.
  476. //
  477. // We should move on to the next range after First and break out of the
  478. // loop because the invariant might not be true.
  479. ++First;
  480. break;
  481. }
  482. // We have a guaranteed intersection at this point!
  483. // And this is the current situation:
  484. //
  485. // ----[ First ]----------------->
  486. // -------[ Second ------------------>
  487. //
  488. // Additionally, it definitely starts with Second->From().
  489. const llvm::APSInt &IntersectionStart = Second->From();
  490. // It is important to know which of the two ranges' ends
  491. // is greater. That "longer" range might have some other
  492. // intersections, while the "shorter" range might not.
  493. if (Second->To() > First->To()) {
  494. // Here we make a decision to keep First as the "longer"
  495. // range.
  496. swapIterators(First, FirstEnd, Second, SecondEnd);
  497. }
  498. // At this point, we have the following situation:
  499. //
  500. // ---- First ]-------------------->
  501. // ---- Second ]--[ Second+1 ---------->
  502. //
  503. // We don't know the relationship between First->From and
  504. // Second->From and we don't know whether Second+1 intersects
  505. // with First.
  506. //
  507. // However, we know that [IntersectionStart, Second->To] is
  508. // a part of the intersection...
  509. Result.push_back(Range(IntersectionStart, Second->To()));
  510. ++Second;
  511. // ...and that the invariant will hold for a valid Second+1
  512. // because First->From <= Second->To < (Second+1)->From.
  513. } while (Second != SecondEnd);
  514. }
  515. if (Result.empty())
  516. return getEmptySet();
  517. return makePersistent(std::move(Result));
  518. }
  519. RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
  520. // Shortcut: let's see if the intersection is even possible.
  521. if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
  522. RHS.getMaxValue() < LHS.getMinValue())
  523. return getEmptySet();
  524. return intersect(*LHS.Impl, *RHS.Impl);
  525. }
  526. RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
  527. if (LHS.containsImpl(Point))
  528. return getRangeSet(ValueFactory.getValue(Point));
  529. return getEmptySet();
  530. }
  531. RangeSet RangeSet::Factory::negate(RangeSet What) {
  532. if (What.isEmpty())
  533. return getEmptySet();
  534. const llvm::APSInt SampleValue = What.getMinValue();
  535. const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
  536. const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
  537. ContainerType Result;
  538. Result.reserve(What.size() + (SampleValue == MIN));
  539. // Handle a special case for MIN value.
  540. const_iterator It = What.begin();
  541. const_iterator End = What.end();
  542. const llvm::APSInt &From = It->From();
  543. const llvm::APSInt &To = It->To();
  544. if (From == MIN) {
  545. // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
  546. if (To == MAX) {
  547. return What;
  548. }
  549. const_iterator Last = std::prev(End);
  550. // Try to find and unite the following ranges:
  551. // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
  552. if (Last->To() == MAX) {
  553. // It means that in the original range we have ranges
  554. // [MIN, A], ... , [B, MAX]
  555. // And the result should be [MIN, -B], ..., [-A, MAX]
  556. Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
  557. // We already negated Last, so we can skip it.
  558. End = Last;
  559. } else {
  560. // Add a separate range for the lowest value.
  561. Result.emplace_back(MIN, MIN);
  562. }
  563. // Skip adding the second range in case when [From, To] are [MIN, MIN].
  564. if (To != MIN) {
  565. Result.emplace_back(ValueFactory.getValue(-To), MAX);
  566. }
  567. // Skip the first range in the loop.
  568. ++It;
  569. }
  570. // Negate all other ranges.
  571. for (; It != End; ++It) {
  572. // Negate int values.
  573. const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
  574. const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
  575. // Add a negated range.
  576. Result.emplace_back(NewFrom, NewTo);
  577. }
  578. llvm::sort(Result);
  579. return makePersistent(std::move(Result));
  580. }
  581. // Convert range set to the given integral type using truncation and promotion.
  582. // This works similar to APSIntType::apply function but for the range set.
  583. RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
  584. // Set is empty or NOOP (aka cast to the same type).
  585. if (What.isEmpty() || What.getAPSIntType() == Ty)
  586. return What;
  587. const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
  588. const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
  589. const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
  590. if (IsTruncation)
  591. return makePersistent(truncateTo(What, Ty));
  592. // Here we handle 2 cases:
  593. // - IsConversion && !IsPromotion.
  594. // In this case we handle changing a sign with same bitwidth: char -> uchar,
  595. // uint -> int. Here we convert negatives to positives and positives which
  596. // is out of range to negatives. We use convertTo function for that.
  597. // - IsConversion && IsPromotion && !What.isUnsigned().
  598. // In this case we handle changing a sign from signeds to unsigneds with
  599. // higher bitwidth: char -> uint, int-> uint64. The point is that we also
  600. // need convert negatives to positives and use convertTo function as well.
  601. // For example, we don't need such a convertion when converting unsigned to
  602. // signed with higher bitwidth, because all the values of unsigned is valid
  603. // for the such signed.
  604. if (IsConversion && (!IsPromotion || !What.isUnsigned()))
  605. return makePersistent(convertTo(What, Ty));
  606. assert(IsPromotion && "Only promotion operation from unsigneds left.");
  607. return makePersistent(promoteTo(What, Ty));
  608. }
  609. RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
  610. assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
  611. return castTo(What, ValueFactory.getAPSIntType(T));
  612. }
  613. RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
  614. APSIntType Ty) {
  615. using llvm::APInt;
  616. using llvm::APSInt;
  617. ContainerType Result;
  618. ContainerType Dummy;
  619. // CastRangeSize is an amount of all possible values of cast type.
  620. // Example: `char` has 256 values; `short` has 65536 values.
  621. // But in fact we use `amount of values` - 1, because
  622. // we can't keep `amount of values of UINT64` inside uint64_t.
  623. // E.g. 256 is an amount of all possible values of `char` and we can't keep
  624. // it inside `char`.
  625. // And it's OK, it's enough to do correct calculations.
  626. uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
  627. for (const Range &R : What) {
  628. // Get bounds of the given range.
  629. APSInt FromInt = R.From();
  630. APSInt ToInt = R.To();
  631. // CurrentRangeSize is an amount of all possible values of the current
  632. // range minus one.
  633. uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
  634. // This is an optimization for a specific case when this Range covers
  635. // the whole range of the target type.
  636. Dummy.clear();
  637. if (CurrentRangeSize >= CastRangeSize) {
  638. Dummy.emplace_back(ValueFactory.getMinValue(Ty),
  639. ValueFactory.getMaxValue(Ty));
  640. Result = std::move(Dummy);
  641. break;
  642. }
  643. // Cast the bounds.
  644. Ty.apply(FromInt);
  645. Ty.apply(ToInt);
  646. const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
  647. const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
  648. if (FromInt > ToInt) {
  649. Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
  650. Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
  651. } else
  652. Dummy.emplace_back(PersistentFrom, PersistentTo);
  653. // Every range retrieved after truncation potentialy has garbage values.
  654. // So, we have to unite every next range with the previouses.
  655. Result = unite(Result, Dummy);
  656. }
  657. return Result;
  658. }
  659. // Divide the convertion into two phases (presented as loops here).
  660. // First phase(loop) works when casted values go in ascending order.
  661. // E.g. char{1,3,5,127} -> uint{1,3,5,127}
  662. // Interrupt the first phase and go to second one when casted values start
  663. // go in descending order. That means that we crossed over the middle of
  664. // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
  665. // For instance:
  666. // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
  667. // Here we put {1,3,5} to one array and {-128, -1} to another
  668. // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
  669. // Here we put {128,129,255} to one array and {0,1,3} to another.
  670. // After that we unite both arrays.
  671. // NOTE: We don't just concatenate the arrays, because they may have
  672. // adjacent ranges, e.g.:
  673. // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
  674. // unite -> uchar(0, 255)
  675. // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
  676. // unite -> uchar(-2, 1)
  677. RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
  678. APSIntType Ty) {
  679. using llvm::APInt;
  680. using llvm::APSInt;
  681. using Bounds = std::pair<const APSInt &, const APSInt &>;
  682. ContainerType AscendArray;
  683. ContainerType DescendArray;
  684. auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
  685. // Get bounds of the given range.
  686. APSInt FromInt = R.From();
  687. APSInt ToInt = R.To();
  688. // Cast the bounds.
  689. Ty.apply(FromInt);
  690. Ty.apply(ToInt);
  691. return {VF.getValue(FromInt), VF.getValue(ToInt)};
  692. };
  693. // Phase 1. Fill the first array.
  694. APSInt LastConvertedInt = Ty.getMinValue();
  695. const auto *It = What.begin();
  696. const auto *E = What.end();
  697. while (It != E) {
  698. Bounds NewBounds = CastRange(*(It++));
  699. // If values stop going acsending order, go to the second phase(loop).
  700. if (NewBounds.first < LastConvertedInt) {
  701. DescendArray.emplace_back(NewBounds.first, NewBounds.second);
  702. break;
  703. }
  704. // If the range contains a midpoint, then split the range.
  705. // E.g. char(-5, 5) -> uchar(251, 5)
  706. // Here we shall add a range (251, 255) to the first array and (0, 5) to the
  707. // second one.
  708. if (NewBounds.first > NewBounds.second) {
  709. DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
  710. AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
  711. } else
  712. // Values are going acsending order.
  713. AscendArray.emplace_back(NewBounds.first, NewBounds.second);
  714. LastConvertedInt = NewBounds.first;
  715. }
  716. // Phase 2. Fill the second array.
  717. while (It != E) {
  718. Bounds NewBounds = CastRange(*(It++));
  719. DescendArray.emplace_back(NewBounds.first, NewBounds.second);
  720. }
  721. // Unite both arrays.
  722. return unite(AscendArray, DescendArray);
  723. }
  724. /// Promotion from unsigneds to signeds/unsigneds left.
  725. RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
  726. APSIntType Ty) {
  727. ContainerType Result;
  728. // We definitely know the size of the result set.
  729. Result.reserve(What.size());
  730. // Each unsigned value fits every larger type without any changes,
  731. // whether the larger type is signed or unsigned. So just promote and push
  732. // back each range one by one.
  733. for (const Range &R : What) {
  734. // Get bounds of the given range.
  735. llvm::APSInt FromInt = R.From();
  736. llvm::APSInt ToInt = R.To();
  737. // Cast the bounds.
  738. Ty.apply(FromInt);
  739. Ty.apply(ToInt);
  740. Result.emplace_back(ValueFactory.getValue(FromInt),
  741. ValueFactory.getValue(ToInt));
  742. }
  743. return Result;
  744. }
  745. RangeSet RangeSet::Factory::deletePoint(RangeSet From,
  746. const llvm::APSInt &Point) {
  747. if (!From.contains(Point))
  748. return From;
  749. llvm::APSInt Upper = Point;
  750. llvm::APSInt Lower = Point;
  751. ++Upper;
  752. --Lower;
  753. // Notice that the lower bound is greater than the upper bound.
  754. return intersect(From, Upper, Lower);
  755. }
  756. LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
  757. OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
  758. }
  759. LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
  760. LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
  761. OS << "{ ";
  762. llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
  763. OS << " }";
  764. }
  765. LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
  766. REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
  767. namespace {
  768. class EquivalenceClass;
  769. } // end anonymous namespace
  770. REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
  771. REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
  772. REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
  773. REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
  774. REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
  775. namespace {
  776. /// This class encapsulates a set of symbols equal to each other.
  777. ///
  778. /// The main idea of the approach requiring such classes is in narrowing
  779. /// and sharing constraints between symbols within the class. Also we can
  780. /// conclude that there is no practical need in storing constraints for
  781. /// every member of the class separately.
  782. ///
  783. /// Main terminology:
  784. ///
  785. /// * "Equivalence class" is an object of this class, which can be efficiently
  786. /// compared to other classes. It represents the whole class without
  787. /// storing the actual in it. The members of the class however can be
  788. /// retrieved from the state.
  789. ///
  790. /// * "Class members" are the symbols corresponding to the class. This means
  791. /// that A == B for every member symbols A and B from the class. Members of
  792. /// each class are stored in the state.
  793. ///
  794. /// * "Trivial class" is a class that has and ever had only one same symbol.
  795. ///
  796. /// * "Merge operation" merges two classes into one. It is the main operation
  797. /// to produce non-trivial classes.
  798. /// If, at some point, we can assume that two symbols from two distinct
  799. /// classes are equal, we can merge these classes.
  800. class EquivalenceClass : public llvm::FoldingSetNode {
  801. public:
  802. /// Find equivalence class for the given symbol in the given state.
  803. [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
  804. SymbolRef Sym);
  805. /// Merge classes for the given symbols and return a new state.
  806. [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
  807. ProgramStateRef State,
  808. SymbolRef First,
  809. SymbolRef Second);
  810. // Merge this class with the given class and return a new state.
  811. [[nodiscard]] inline ProgramStateRef
  812. merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
  813. /// Return a set of class members for the given state.
  814. [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
  815. /// Return true if the current class is trivial in the given state.
  816. /// A class is trivial if and only if there is not any member relations stored
  817. /// to it in State/ClassMembers.
  818. /// An equivalence class with one member might seem as it does not hold any
  819. /// meaningful information, i.e. that is a tautology. However, during the
  820. /// removal of dead symbols we do not remove classes with one member for
  821. /// resource and performance reasons. Consequently, a class with one member is
  822. /// not necessarily trivial. It could happen that we have a class with two
  823. /// members and then during the removal of dead symbols we remove one of its
  824. /// members. In this case, the class is still non-trivial (it still has the
  825. /// mappings in ClassMembers), even though it has only one member.
  826. [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
  827. /// Return true if the current class is trivial and its only member is dead.
  828. [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
  829. SymbolReaper &Reaper) const;
  830. [[nodiscard]] static inline ProgramStateRef
  831. markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
  832. SymbolRef Second);
  833. [[nodiscard]] static inline ProgramStateRef
  834. markDisequal(RangeSet::Factory &F, ProgramStateRef State,
  835. EquivalenceClass First, EquivalenceClass Second);
  836. [[nodiscard]] inline ProgramStateRef
  837. markDisequal(RangeSet::Factory &F, ProgramStateRef State,
  838. EquivalenceClass Other) const;
  839. [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
  840. SymbolRef Sym);
  841. [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
  842. [[nodiscard]] inline ClassSet
  843. getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
  844. [[nodiscard]] static inline std::optional<bool>
  845. areEqual(ProgramStateRef State, EquivalenceClass First,
  846. EquivalenceClass Second);
  847. [[nodiscard]] static inline std::optional<bool>
  848. areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
  849. /// Remove one member from the class.
  850. [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
  851. const SymbolRef Old);
  852. /// Iterate over all symbols and try to simplify them.
  853. [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
  854. RangeSet::Factory &F,
  855. ProgramStateRef State,
  856. EquivalenceClass Class);
  857. void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
  858. LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
  859. dumpToStream(State, llvm::errs());
  860. }
  861. /// Check equivalence data for consistency.
  862. [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
  863. isClassDataConsistent(ProgramStateRef State);
  864. [[nodiscard]] QualType getType() const {
  865. return getRepresentativeSymbol()->getType();
  866. }
  867. EquivalenceClass() = delete;
  868. EquivalenceClass(const EquivalenceClass &) = default;
  869. EquivalenceClass &operator=(const EquivalenceClass &) = delete;
  870. EquivalenceClass(EquivalenceClass &&) = default;
  871. EquivalenceClass &operator=(EquivalenceClass &&) = delete;
  872. bool operator==(const EquivalenceClass &Other) const {
  873. return ID == Other.ID;
  874. }
  875. bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
  876. bool operator!=(const EquivalenceClass &Other) const {
  877. return !operator==(Other);
  878. }
  879. static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
  880. ID.AddInteger(CID);
  881. }
  882. void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
  883. private:
  884. /* implicit */ EquivalenceClass(SymbolRef Sym)
  885. : ID(reinterpret_cast<uintptr_t>(Sym)) {}
  886. /// This function is intended to be used ONLY within the class.
  887. /// The fact that ID is a pointer to a symbol is an implementation detail
  888. /// and should stay that way.
  889. /// In the current implementation, we use it to retrieve the only member
  890. /// of the trivial class.
  891. SymbolRef getRepresentativeSymbol() const {
  892. return reinterpret_cast<SymbolRef>(ID);
  893. }
  894. static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
  895. inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
  896. SymbolSet Members, EquivalenceClass Other,
  897. SymbolSet OtherMembers);
  898. static inline bool
  899. addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
  900. RangeSet::Factory &F, ProgramStateRef State,
  901. EquivalenceClass First, EquivalenceClass Second);
  902. /// This is a unique identifier of the class.
  903. uintptr_t ID;
  904. };
  905. //===----------------------------------------------------------------------===//
  906. // Constraint functions
  907. //===----------------------------------------------------------------------===//
  908. [[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
  909. areFeasible(ConstraintRangeTy Constraints) {
  910. return llvm::none_of(
  911. Constraints,
  912. [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
  913. return ClassConstraint.second.isEmpty();
  914. });
  915. }
  916. [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
  917. EquivalenceClass Class) {
  918. return State->get<ConstraintRange>(Class);
  919. }
  920. [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
  921. SymbolRef Sym) {
  922. return getConstraint(State, EquivalenceClass::find(State, Sym));
  923. }
  924. [[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
  925. EquivalenceClass Class,
  926. RangeSet Constraint) {
  927. return State->set<ConstraintRange>(Class, Constraint);
  928. }
  929. [[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
  930. ConstraintRangeTy Constraints) {
  931. return State->set<ConstraintRange>(Constraints);
  932. }
  933. //===----------------------------------------------------------------------===//
  934. // Equality/diseqiality abstraction
  935. //===----------------------------------------------------------------------===//
  936. /// A small helper function for detecting symbolic (dis)equality.
  937. ///
  938. /// Equality check can have different forms (like a == b or a - b) and this
  939. /// class encapsulates those away if the only thing the user wants to check -
  940. /// whether it's equality/diseqiality or not.
  941. ///
  942. /// \returns true if assuming this Sym to be true means equality of operands
  943. /// false if it means disequality of operands
  944. /// None otherwise
  945. std::optional<bool> meansEquality(const SymSymExpr *Sym) {
  946. switch (Sym->getOpcode()) {
  947. case BO_Sub:
  948. // This case is: A - B != 0 -> disequality check.
  949. return false;
  950. case BO_EQ:
  951. // This case is: A == B != 0 -> equality check.
  952. return true;
  953. case BO_NE:
  954. // This case is: A != B != 0 -> diseqiality check.
  955. return false;
  956. default:
  957. return std::nullopt;
  958. }
  959. }
  960. //===----------------------------------------------------------------------===//
  961. // Intersection functions
  962. //===----------------------------------------------------------------------===//
  963. template <class SecondTy, class... RestTy>
  964. [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  965. SecondTy Second, RestTy... Tail);
  966. template <class... RangeTy> struct IntersectionTraits;
  967. template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
  968. // Found RangeSet, no need to check any further
  969. using Type = RangeSet;
  970. };
  971. template <> struct IntersectionTraits<> {
  972. // We ran out of types, and we didn't find any RangeSet, so the result should
  973. // be optional.
  974. using Type = std::optional<RangeSet>;
  975. };
  976. template <class OptionalOrPointer, class... TailTy>
  977. struct IntersectionTraits<OptionalOrPointer, TailTy...> {
  978. // If current type is Optional or a raw pointer, we should keep looking.
  979. using Type = typename IntersectionTraits<TailTy...>::Type;
  980. };
  981. template <class EndTy>
  982. [[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
  983. // If the list contains only RangeSet or std::optional<RangeSet>, simply
  984. // return that range set.
  985. return End;
  986. }
  987. [[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
  988. intersect(RangeSet::Factory &F, const RangeSet *End) {
  989. // This is an extraneous conversion from a raw pointer into
  990. // std::optional<RangeSet>
  991. if (End) {
  992. return *End;
  993. }
  994. return std::nullopt;
  995. }
  996. template <class... RestTy>
  997. [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  998. RangeSet Second, RestTy... Tail) {
  999. // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
  1000. // of the function and can be sure that the result is RangeSet.
  1001. return intersect(F, F.intersect(Head, Second), Tail...);
  1002. }
  1003. template <class SecondTy, class... RestTy>
  1004. [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
  1005. SecondTy Second, RestTy... Tail) {
  1006. if (Second) {
  1007. // Here we call the <RangeSet,RangeSet,...> version of the function...
  1008. return intersect(F, Head, *Second, Tail...);
  1009. }
  1010. // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
  1011. // means that the result is definitely RangeSet.
  1012. return intersect(F, Head, Tail...);
  1013. }
  1014. /// Main generic intersect function.
  1015. /// It intersects all of the given range sets. If some of the given arguments
  1016. /// don't hold a range set (nullptr or std::nullopt), the function will skip
  1017. /// them.
  1018. ///
  1019. /// Available representations for the arguments are:
  1020. /// * RangeSet
  1021. /// * std::optional<RangeSet>
  1022. /// * RangeSet *
  1023. /// Pointer to a RangeSet is automatically assumed to be nullable and will get
  1024. /// checked as well as the optional version. If this behaviour is undesired,
  1025. /// please dereference the pointer in the call.
  1026. ///
  1027. /// Return type depends on the arguments' types. If we can be sure in compile
  1028. /// time that there will be a range set as a result, the returning type is
  1029. /// simply RangeSet, in other cases we have to back off to
  1030. /// std::optional<RangeSet>.
  1031. ///
  1032. /// Please, prefer optional range sets to raw pointers. If the last argument is
  1033. /// a raw pointer and all previous arguments are std::nullopt, it will cost one
  1034. /// additional check to convert RangeSet * into std::optional<RangeSet>.
  1035. template <class HeadTy, class SecondTy, class... RestTy>
  1036. [[nodiscard]] inline
  1037. typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
  1038. intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
  1039. RestTy... Tail) {
  1040. if (Head) {
  1041. return intersect(F, *Head, Second, Tail...);
  1042. }
  1043. return intersect(F, Second, Tail...);
  1044. }
  1045. //===----------------------------------------------------------------------===//
  1046. // Symbolic reasoning logic
  1047. //===----------------------------------------------------------------------===//
  1048. /// A little component aggregating all of the reasoning we have about
  1049. /// the ranges of symbolic expressions.
  1050. ///
  1051. /// Even when we don't know the exact values of the operands, we still
  1052. /// can get a pretty good estimate of the result's range.
  1053. class SymbolicRangeInferrer
  1054. : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
  1055. public:
  1056. template <class SourceType>
  1057. static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
  1058. SourceType Origin) {
  1059. SymbolicRangeInferrer Inferrer(F, State);
  1060. return Inferrer.infer(Origin);
  1061. }
  1062. RangeSet VisitSymExpr(SymbolRef Sym) {
  1063. if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
  1064. return *RS;
  1065. // If we've reached this line, the actual type of the symbolic
  1066. // expression is not supported for advanced inference.
  1067. // In this case, we simply backoff to the default "let's simply
  1068. // infer the range from the expression's type".
  1069. return infer(Sym->getType());
  1070. }
  1071. RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
  1072. if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
  1073. return *RS;
  1074. return infer(USE->getType());
  1075. }
  1076. RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
  1077. return VisitBinaryOperator(Sym);
  1078. }
  1079. RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
  1080. return VisitBinaryOperator(Sym);
  1081. }
  1082. RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
  1083. return intersect(
  1084. RangeFactory,
  1085. // If Sym is a difference of symbols A - B, then maybe we have range
  1086. // set stored for B - A.
  1087. //
  1088. // If we have range set stored for both A - B and B - A then
  1089. // calculate the effective range set by intersecting the range set
  1090. // for A - B and the negated range set of B - A.
  1091. getRangeForNegatedSymSym(SSE),
  1092. // If Sym is a comparison expression (except <=>),
  1093. // find any other comparisons with the same operands.
  1094. // See function description.
  1095. getRangeForComparisonSymbol(SSE),
  1096. // If Sym is (dis)equality, we might have some information
  1097. // on that in our equality classes data structure.
  1098. getRangeForEqualities(SSE),
  1099. // And we should always check what we can get from the operands.
  1100. VisitBinaryOperator(SSE));
  1101. }
  1102. private:
  1103. SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
  1104. : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
  1105. /// Infer range information from the given integer constant.
  1106. ///
  1107. /// It's not a real "inference", but is here for operating with
  1108. /// sub-expressions in a more polymorphic manner.
  1109. RangeSet inferAs(const llvm::APSInt &Val, QualType) {
  1110. return {RangeFactory, Val};
  1111. }
  1112. /// Infer range information from symbol in the context of the given type.
  1113. RangeSet inferAs(SymbolRef Sym, QualType DestType) {
  1114. QualType ActualType = Sym->getType();
  1115. // Check that we can reason about the symbol at all.
  1116. if (ActualType->isIntegralOrEnumerationType() ||
  1117. Loc::isLocType(ActualType)) {
  1118. return infer(Sym);
  1119. }
  1120. // Otherwise, let's simply infer from the destination type.
  1121. // We couldn't figure out nothing else about that expression.
  1122. return infer(DestType);
  1123. }
  1124. RangeSet infer(SymbolRef Sym) {
  1125. return intersect(RangeFactory,
  1126. // Of course, we should take the constraint directly
  1127. // associated with this symbol into consideration.
  1128. getConstraint(State, Sym),
  1129. // Apart from the Sym itself, we can infer quite a lot if
  1130. // we look into subexpressions of Sym.
  1131. Visit(Sym));
  1132. }
  1133. RangeSet infer(EquivalenceClass Class) {
  1134. if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
  1135. return *AssociatedConstraint;
  1136. return infer(Class.getType());
  1137. }
  1138. /// Infer range information solely from the type.
  1139. RangeSet infer(QualType T) {
  1140. // Lazily generate a new RangeSet representing all possible values for the
  1141. // given symbol type.
  1142. RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
  1143. ValueFactory.getMaxValue(T));
  1144. // References are known to be non-zero.
  1145. if (T->isReferenceType())
  1146. return assumeNonZero(Result, T);
  1147. return Result;
  1148. }
  1149. template <class BinarySymExprTy>
  1150. RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
  1151. // TODO #1: VisitBinaryOperator implementation might not make a good
  1152. // use of the inferred ranges. In this case, we might be calculating
  1153. // everything for nothing. This being said, we should introduce some
  1154. // sort of laziness mechanism here.
  1155. //
  1156. // TODO #2: We didn't go into the nested expressions before, so it
  1157. // might cause us spending much more time doing the inference.
  1158. // This can be a problem for deeply nested expressions that are
  1159. // involved in conditions and get tested continuously. We definitely
  1160. // need to address this issue and introduce some sort of caching
  1161. // in here.
  1162. QualType ResultType = Sym->getType();
  1163. return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
  1164. Sym->getOpcode(),
  1165. inferAs(Sym->getRHS(), ResultType), ResultType);
  1166. }
  1167. RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
  1168. RangeSet RHS, QualType T);
  1169. //===----------------------------------------------------------------------===//
  1170. // Ranges and operators
  1171. //===----------------------------------------------------------------------===//
  1172. /// Return a rough approximation of the given range set.
  1173. ///
  1174. /// For the range set:
  1175. /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
  1176. /// it will return the range [x_0, y_N].
  1177. static Range fillGaps(RangeSet Origin) {
  1178. assert(!Origin.isEmpty());
  1179. return {Origin.getMinValue(), Origin.getMaxValue()};
  1180. }
  1181. /// Try to convert given range into the given type.
  1182. ///
  1183. /// It will return std::nullopt only when the trivial conversion is possible.
  1184. std::optional<Range> convert(const Range &Origin, APSIntType To) {
  1185. if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
  1186. To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
  1187. return std::nullopt;
  1188. }
  1189. return Range(ValueFactory.Convert(To, Origin.From()),
  1190. ValueFactory.Convert(To, Origin.To()));
  1191. }
  1192. template <BinaryOperator::Opcode Op>
  1193. RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
  1194. assert(!LHS.isEmpty() && !RHS.isEmpty());
  1195. Range CoarseLHS = fillGaps(LHS);
  1196. Range CoarseRHS = fillGaps(RHS);
  1197. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1198. // We need to convert ranges to the resulting type, so we can compare values
  1199. // and combine them in a meaningful (in terms of the given operation) way.
  1200. auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
  1201. auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
  1202. // It is hard to reason about ranges when conversion changes
  1203. // borders of the ranges.
  1204. if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
  1205. return infer(T);
  1206. }
  1207. return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
  1208. }
  1209. template <BinaryOperator::Opcode Op>
  1210. RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
  1211. return infer(T);
  1212. }
  1213. /// Return a symmetrical range for the given range and type.
  1214. ///
  1215. /// If T is signed, return the smallest range [-x..x] that covers the original
  1216. /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
  1217. /// exist due to original range covering min(T)).
  1218. ///
  1219. /// If T is unsigned, return the smallest range [0..x] that covers the
  1220. /// original range.
  1221. Range getSymmetricalRange(Range Origin, QualType T) {
  1222. APSIntType RangeType = ValueFactory.getAPSIntType(T);
  1223. if (RangeType.isUnsigned()) {
  1224. return Range(ValueFactory.getMinValue(RangeType), Origin.To());
  1225. }
  1226. if (Origin.From().isMinSignedValue()) {
  1227. // If mini is a minimal signed value, absolute value of it is greater
  1228. // than the maximal signed value. In order to avoid these
  1229. // complications, we simply return the whole range.
  1230. return {ValueFactory.getMinValue(RangeType),
  1231. ValueFactory.getMaxValue(RangeType)};
  1232. }
  1233. // At this point, we are sure that the type is signed and we can safely
  1234. // use unary - operator.
  1235. //
  1236. // While calculating absolute maximum, we can use the following formula
  1237. // because of these reasons:
  1238. // * If From >= 0 then To >= From and To >= -From.
  1239. // AbsMax == To == max(To, -From)
  1240. // * If To <= 0 then -From >= -To and -From >= From.
  1241. // AbsMax == -From == max(-From, To)
  1242. // * Otherwise, From <= 0, To >= 0, and
  1243. // AbsMax == max(abs(From), abs(To))
  1244. llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
  1245. // Intersection is guaranteed to be non-empty.
  1246. return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
  1247. }
  1248. /// Return a range set subtracting zero from \p Domain.
  1249. RangeSet assumeNonZero(RangeSet Domain, QualType T) {
  1250. APSIntType IntType = ValueFactory.getAPSIntType(T);
  1251. return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
  1252. }
  1253. template <typename ProduceNegatedSymFunc>
  1254. std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
  1255. QualType T) {
  1256. // Do not negate if the type cannot be meaningfully negated.
  1257. if (!T->isUnsignedIntegerOrEnumerationType() &&
  1258. !T->isSignedIntegerOrEnumerationType())
  1259. return std::nullopt;
  1260. if (SymbolRef NegatedSym = F())
  1261. if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
  1262. return RangeFactory.negate(*NegatedRange);
  1263. return std::nullopt;
  1264. }
  1265. std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
  1266. // Just get the operand when we negate a symbol that is already negated.
  1267. // -(-a) == a
  1268. return getRangeForNegatedExpr(
  1269. [USE]() -> SymbolRef {
  1270. if (USE->getOpcode() == UO_Minus)
  1271. return USE->getOperand();
  1272. return nullptr;
  1273. },
  1274. USE->getType());
  1275. }
  1276. std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
  1277. return getRangeForNegatedExpr(
  1278. [SSE, State = this->State]() -> SymbolRef {
  1279. if (SSE->getOpcode() == BO_Sub)
  1280. return State->getSymbolManager().getSymSymExpr(
  1281. SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
  1282. return nullptr;
  1283. },
  1284. SSE->getType());
  1285. }
  1286. std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
  1287. return getRangeForNegatedExpr(
  1288. [Sym, State = this->State]() {
  1289. return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
  1290. Sym->getType());
  1291. },
  1292. Sym->getType());
  1293. }
  1294. // Returns ranges only for binary comparison operators (except <=>)
  1295. // when left and right operands are symbolic values.
  1296. // Finds any other comparisons with the same operands.
  1297. // Then do logical calculations and refuse impossible branches.
  1298. // E.g. (x < y) and (x > y) at the same time are impossible.
  1299. // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
  1300. // E.g. (x == y) and (y == x) are just reversed but the same.
  1301. // It covers all possible combinations (see CmpOpTable description).
  1302. // Note that `x` and `y` can also stand for subexpressions,
  1303. // not only for actual symbols.
  1304. std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
  1305. const BinaryOperatorKind CurrentOP = SSE->getOpcode();
  1306. // We currently do not support <=> (C++20).
  1307. if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
  1308. return std::nullopt;
  1309. static const OperatorRelationsTable CmpOpTable{};
  1310. const SymExpr *LHS = SSE->getLHS();
  1311. const SymExpr *RHS = SSE->getRHS();
  1312. QualType T = SSE->getType();
  1313. SymbolManager &SymMgr = State->getSymbolManager();
  1314. // We use this variable to store the last queried operator (`QueriedOP`)
  1315. // for which the `getCmpOpState` returned with `Unknown`. If there are two
  1316. // different OPs that returned `Unknown` then we have to query the special
  1317. // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
  1318. // never returns `Unknown`, so `CurrentOP` is a good initial value.
  1319. BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
  1320. // Loop goes through all of the columns exept the last one ('UnknownX2').
  1321. // We treat `UnknownX2` column separately at the end of the loop body.
  1322. for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
  1323. // Let's find an expression e.g. (x < y).
  1324. BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
  1325. const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
  1326. const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
  1327. // If ranges were not previously found,
  1328. // try to find a reversed expression (y > x).
  1329. if (!QueriedRangeSet) {
  1330. const BinaryOperatorKind ROP =
  1331. BinaryOperator::reverseComparisonOp(QueriedOP);
  1332. SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
  1333. QueriedRangeSet = getConstraint(State, SymSym);
  1334. }
  1335. if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
  1336. continue;
  1337. const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
  1338. const bool isInFalseBranch =
  1339. ConcreteValue ? (*ConcreteValue == 0) : false;
  1340. // If it is a false branch, we shall be guided by opposite operator,
  1341. // because the table is made assuming we are in the true branch.
  1342. // E.g. when (x <= y) is false, then (x > y) is true.
  1343. if (isInFalseBranch)
  1344. QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
  1345. OperatorRelationsTable::TriStateKind BranchState =
  1346. CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
  1347. if (BranchState == OperatorRelationsTable::Unknown) {
  1348. if (LastQueriedOpToUnknown != CurrentOP &&
  1349. LastQueriedOpToUnknown != QueriedOP) {
  1350. // If we got the Unknown state for both different operators.
  1351. // if (x <= y) // assume true
  1352. // if (x != y) // assume true
  1353. // if (x < y) // would be also true
  1354. // Get a state from `UnknownX2` column.
  1355. BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
  1356. } else {
  1357. LastQueriedOpToUnknown = QueriedOP;
  1358. continue;
  1359. }
  1360. }
  1361. return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
  1362. : getFalseRange(T);
  1363. }
  1364. return std::nullopt;
  1365. }
  1366. std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
  1367. std::optional<bool> Equality = meansEquality(Sym);
  1368. if (!Equality)
  1369. return std::nullopt;
  1370. if (std::optional<bool> AreEqual =
  1371. EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
  1372. // Here we cover two cases at once:
  1373. // * if Sym is equality and its operands are known to be equal -> true
  1374. // * if Sym is disequality and its operands are disequal -> true
  1375. if (*AreEqual == *Equality) {
  1376. return getTrueRange(Sym->getType());
  1377. }
  1378. // Opposite combinations result in false.
  1379. return getFalseRange(Sym->getType());
  1380. }
  1381. return std::nullopt;
  1382. }
  1383. RangeSet getTrueRange(QualType T) {
  1384. RangeSet TypeRange = infer(T);
  1385. return assumeNonZero(TypeRange, T);
  1386. }
  1387. RangeSet getFalseRange(QualType T) {
  1388. const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
  1389. return RangeSet(RangeFactory, Zero);
  1390. }
  1391. BasicValueFactory &ValueFactory;
  1392. RangeSet::Factory &RangeFactory;
  1393. ProgramStateRef State;
  1394. };
  1395. //===----------------------------------------------------------------------===//
  1396. // Range-based reasoning about symbolic operations
  1397. //===----------------------------------------------------------------------===//
  1398. template <>
  1399. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
  1400. RangeSet RHS,
  1401. QualType T) {
  1402. assert(!LHS.isEmpty() && !RHS.isEmpty());
  1403. if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
  1404. if (intersect(RangeFactory, LHS, RHS).isEmpty())
  1405. return getTrueRange(T);
  1406. } else {
  1407. // We can only lose information if we are casting smaller signed type to
  1408. // bigger unsigned type. For e.g.,
  1409. // LHS (unsigned short): [2, USHRT_MAX]
  1410. // RHS (signed short): [SHRT_MIN, 0]
  1411. //
  1412. // Casting RHS to LHS type will leave us with overlapping values
  1413. // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
  1414. //
  1415. // We can avoid this by checking if signed type's maximum value is lesser
  1416. // than unsigned type's minimum value.
  1417. // If both have different signs then only we can get more information.
  1418. if (LHS.isUnsigned() != RHS.isUnsigned()) {
  1419. if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
  1420. if (RHS.getMaxValue().isNegative() ||
  1421. LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
  1422. return getTrueRange(T);
  1423. } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
  1424. if (LHS.getMaxValue().isNegative() ||
  1425. RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
  1426. return getTrueRange(T);
  1427. }
  1428. }
  1429. // Both RangeSets should be casted to bigger unsigned type.
  1430. APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
  1431. LHS.isUnsigned() || RHS.isUnsigned());
  1432. RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
  1433. RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
  1434. if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
  1435. return getTrueRange(T);
  1436. }
  1437. // In all other cases, the resulting range cannot be deduced.
  1438. return infer(T);
  1439. }
  1440. template <>
  1441. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
  1442. QualType T) {
  1443. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1444. llvm::APSInt Zero = ResultType.getZeroValue();
  1445. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1446. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1447. bool IsLHSNegative = LHS.To() < Zero;
  1448. bool IsRHSNegative = RHS.To() < Zero;
  1449. // Check if both ranges have the same sign.
  1450. if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
  1451. (IsLHSNegative && IsRHSNegative)) {
  1452. // The result is definitely greater or equal than any of the operands.
  1453. const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
  1454. // We estimate maximal value for positives as the maximal value for the
  1455. // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
  1456. //
  1457. // TODO: We basically, limit the resulting range from below, but don't do
  1458. // anything with the upper bound.
  1459. //
  1460. // For positive operands, it can be done as follows: for the upper
  1461. // bound of LHS and RHS we calculate the most significant bit set.
  1462. // Let's call it the N-th bit. Then we can estimate the maximal
  1463. // number to be 2^(N+1)-1, i.e. the number with all the bits up to
  1464. // the N-th bit set.
  1465. const llvm::APSInt &Max = IsLHSNegative
  1466. ? ValueFactory.getValue(--Zero)
  1467. : ValueFactory.getMaxValue(ResultType);
  1468. return {RangeFactory, ValueFactory.getValue(Min), Max};
  1469. }
  1470. // Otherwise, let's check if at least one of the operands is negative.
  1471. if (IsLHSNegative || IsRHSNegative) {
  1472. // This means that the result is definitely negative as well.
  1473. return {RangeFactory, ValueFactory.getMinValue(ResultType),
  1474. ValueFactory.getValue(--Zero)};
  1475. }
  1476. RangeSet DefaultRange = infer(T);
  1477. // It is pretty hard to reason about operands with different signs
  1478. // (and especially with possibly different signs). We simply check if it
  1479. // can be zero. In order to conclude that the result could not be zero,
  1480. // at least one of the operands should be definitely not zero itself.
  1481. if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
  1482. return assumeNonZero(DefaultRange, T);
  1483. }
  1484. // Nothing much else to do here.
  1485. return DefaultRange;
  1486. }
  1487. template <>
  1488. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
  1489. Range RHS,
  1490. QualType T) {
  1491. APSIntType ResultType = ValueFactory.getAPSIntType(T);
  1492. llvm::APSInt Zero = ResultType.getZeroValue();
  1493. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1494. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1495. bool IsLHSNegative = LHS.To() < Zero;
  1496. bool IsRHSNegative = RHS.To() < Zero;
  1497. // Check if both ranges have the same sign.
  1498. if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
  1499. (IsLHSNegative && IsRHSNegative)) {
  1500. // The result is definitely less or equal than any of the operands.
  1501. const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
  1502. // We conservatively estimate lower bound to be the smallest positive
  1503. // or negative value corresponding to the sign of the operands.
  1504. const llvm::APSInt &Min = IsLHSNegative
  1505. ? ValueFactory.getMinValue(ResultType)
  1506. : ValueFactory.getValue(Zero);
  1507. return {RangeFactory, Min, Max};
  1508. }
  1509. // Otherwise, let's check if at least one of the operands is positive.
  1510. if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
  1511. // This makes result definitely positive.
  1512. //
  1513. // We can also reason about a maximal value by finding the maximal
  1514. // value of the positive operand.
  1515. const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
  1516. // The minimal value on the other hand is much harder to reason about.
  1517. // The only thing we know for sure is that the result is positive.
  1518. return {RangeFactory, ValueFactory.getValue(Zero),
  1519. ValueFactory.getValue(Max)};
  1520. }
  1521. // Nothing much else to do here.
  1522. return infer(T);
  1523. }
  1524. template <>
  1525. RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
  1526. Range RHS,
  1527. QualType T) {
  1528. llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
  1529. Range ConservativeRange = getSymmetricalRange(RHS, T);
  1530. llvm::APSInt Max = ConservativeRange.To();
  1531. llvm::APSInt Min = ConservativeRange.From();
  1532. if (Max == Zero) {
  1533. // It's an undefined behaviour to divide by 0 and it seems like we know
  1534. // for sure that RHS is 0. Let's say that the resulting range is
  1535. // simply infeasible for that matter.
  1536. return RangeFactory.getEmptySet();
  1537. }
  1538. // At this point, our conservative range is closed. The result, however,
  1539. // couldn't be greater than the RHS' maximal absolute value. Because of
  1540. // this reason, we turn the range into open (or half-open in case of
  1541. // unsigned integers).
  1542. //
  1543. // While we operate on integer values, an open interval (a, b) can be easily
  1544. // represented by the closed interval [a + 1, b - 1]. And this is exactly
  1545. // what we do next.
  1546. //
  1547. // If we are dealing with unsigned case, we shouldn't move the lower bound.
  1548. if (Min.isSigned()) {
  1549. ++Min;
  1550. }
  1551. --Max;
  1552. bool IsLHSPositiveOrZero = LHS.From() >= Zero;
  1553. bool IsRHSPositiveOrZero = RHS.From() >= Zero;
  1554. // Remainder operator results with negative operands is implementation
  1555. // defined. Positive cases are much easier to reason about though.
  1556. if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
  1557. // If maximal value of LHS is less than maximal value of RHS,
  1558. // the result won't get greater than LHS.To().
  1559. Max = std::min(LHS.To(), Max);
  1560. // We want to check if it is a situation similar to the following:
  1561. //
  1562. // <------------|---[ LHS ]--------[ RHS ]----->
  1563. // -INF 0 +INF
  1564. //
  1565. // In this situation, we can conclude that (LHS / RHS) == 0 and
  1566. // (LHS % RHS) == LHS.
  1567. Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
  1568. }
  1569. // Nevertheless, the symmetrical range for RHS is a conservative estimate
  1570. // for any sign of either LHS, or RHS.
  1571. return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
  1572. }
  1573. RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
  1574. BinaryOperator::Opcode Op,
  1575. RangeSet RHS, QualType T) {
  1576. // We should propagate information about unfeasbility of one of the
  1577. // operands to the resulting range.
  1578. if (LHS.isEmpty() || RHS.isEmpty()) {
  1579. return RangeFactory.getEmptySet();
  1580. }
  1581. switch (Op) {
  1582. case BO_NE:
  1583. return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
  1584. case BO_Or:
  1585. return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
  1586. case BO_And:
  1587. return VisitBinaryOperator<BO_And>(LHS, RHS, T);
  1588. case BO_Rem:
  1589. return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
  1590. default:
  1591. return infer(T);
  1592. }
  1593. }
  1594. //===----------------------------------------------------------------------===//
  1595. // Constraint manager implementation details
  1596. //===----------------------------------------------------------------------===//
  1597. class RangeConstraintManager : public RangedConstraintManager {
  1598. public:
  1599. RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
  1600. : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
  1601. //===------------------------------------------------------------------===//
  1602. // Implementation for interface from ConstraintManager.
  1603. //===------------------------------------------------------------------===//
  1604. bool haveEqualConstraints(ProgramStateRef S1,
  1605. ProgramStateRef S2) const override {
  1606. // NOTE: ClassMembers are as simple as back pointers for ClassMap,
  1607. // so comparing constraint ranges and class maps should be
  1608. // sufficient.
  1609. return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
  1610. S1->get<ClassMap>() == S2->get<ClassMap>();
  1611. }
  1612. bool canReasonAbout(SVal X) const override;
  1613. ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
  1614. const llvm::APSInt *getSymVal(ProgramStateRef State,
  1615. SymbolRef Sym) const override;
  1616. ProgramStateRef removeDeadBindings(ProgramStateRef State,
  1617. SymbolReaper &SymReaper) override;
  1618. void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
  1619. unsigned int Space = 0, bool IsDot = false) const override;
  1620. void printValue(raw_ostream &Out, ProgramStateRef State,
  1621. SymbolRef Sym) override;
  1622. void printConstraints(raw_ostream &Out, ProgramStateRef State,
  1623. const char *NL = "\n", unsigned int Space = 0,
  1624. bool IsDot = false) const;
  1625. void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
  1626. const char *NL = "\n", unsigned int Space = 0,
  1627. bool IsDot = false) const;
  1628. void printDisequalities(raw_ostream &Out, ProgramStateRef State,
  1629. const char *NL = "\n", unsigned int Space = 0,
  1630. bool IsDot = false) const;
  1631. //===------------------------------------------------------------------===//
  1632. // Implementation for interface from RangedConstraintManager.
  1633. //===------------------------------------------------------------------===//
  1634. ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
  1635. const llvm::APSInt &V,
  1636. const llvm::APSInt &Adjustment) override;
  1637. ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
  1638. const llvm::APSInt &V,
  1639. const llvm::APSInt &Adjustment) override;
  1640. ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
  1641. const llvm::APSInt &V,
  1642. const llvm::APSInt &Adjustment) override;
  1643. ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
  1644. const llvm::APSInt &V,
  1645. const llvm::APSInt &Adjustment) override;
  1646. ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
  1647. const llvm::APSInt &V,
  1648. const llvm::APSInt &Adjustment) override;
  1649. ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
  1650. const llvm::APSInt &V,
  1651. const llvm::APSInt &Adjustment) override;
  1652. ProgramStateRef assumeSymWithinInclusiveRange(
  1653. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  1654. const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
  1655. ProgramStateRef assumeSymOutsideInclusiveRange(
  1656. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  1657. const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
  1658. private:
  1659. RangeSet::Factory F;
  1660. RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
  1661. RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
  1662. ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
  1663. RangeSet Range);
  1664. ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
  1665. RangeSet Range);
  1666. RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
  1667. const llvm::APSInt &Int,
  1668. const llvm::APSInt &Adjustment);
  1669. RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
  1670. const llvm::APSInt &Int,
  1671. const llvm::APSInt &Adjustment);
  1672. RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
  1673. const llvm::APSInt &Int,
  1674. const llvm::APSInt &Adjustment);
  1675. RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
  1676. const llvm::APSInt &Int,
  1677. const llvm::APSInt &Adjustment);
  1678. RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
  1679. const llvm::APSInt &Int,
  1680. const llvm::APSInt &Adjustment);
  1681. };
  1682. //===----------------------------------------------------------------------===//
  1683. // Constraint assignment logic
  1684. //===----------------------------------------------------------------------===//
  1685. /// ConstraintAssignorBase is a small utility class that unifies visitor
  1686. /// for ranges with a visitor for constraints (rangeset/range/constant).
  1687. ///
  1688. /// It is designed to have one derived class, but generally it can have more.
  1689. /// Derived class can control which types we handle by defining methods of the
  1690. /// following form:
  1691. ///
  1692. /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
  1693. /// CONSTRAINT Constraint);
  1694. ///
  1695. /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
  1696. /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
  1697. /// return value signifies whether we should try other handle methods
  1698. /// (i.e. false would mean to stop right after calling this method)
  1699. template <class Derived> class ConstraintAssignorBase {
  1700. public:
  1701. using Const = const llvm::APSInt &;
  1702. #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
  1703. #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
  1704. if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
  1705. return false
  1706. void assign(SymbolRef Sym, RangeSet Constraint) {
  1707. assignImpl(Sym, Constraint);
  1708. }
  1709. bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
  1710. switch (Sym->getKind()) {
  1711. #define SYMBOL(Id, Parent) \
  1712. case SymExpr::Id##Kind: \
  1713. DISPATCH(Id);
  1714. #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
  1715. }
  1716. llvm_unreachable("Unknown SymExpr kind!");
  1717. }
  1718. #define DEFAULT_ASSIGN(Id) \
  1719. bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
  1720. return true; \
  1721. } \
  1722. bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
  1723. bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
  1724. // When we dispatch for constraint types, we first try to check
  1725. // if the new constraint is the constant and try the corresponding
  1726. // assignor methods. If it didn't interrupt, we can proceed to the
  1727. // range, and finally to the range set.
  1728. #define CONSTRAINT_DISPATCH(Id) \
  1729. if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
  1730. ASSIGN(Id, Const, Sym, *Const); \
  1731. } \
  1732. if (Constraint.size() == 1) { \
  1733. ASSIGN(Id, Range, Sym, *Constraint.begin()); \
  1734. } \
  1735. ASSIGN(Id, RangeSet, Sym, Constraint)
  1736. // Our internal assign method first tries to call assignor methods for all
  1737. // constraint types that apply. And if not interrupted, continues with its
  1738. // parent class.
  1739. #define SYMBOL(Id, Parent) \
  1740. bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
  1741. CONSTRAINT_DISPATCH(Id); \
  1742. DISPATCH(Parent); \
  1743. } \
  1744. DEFAULT_ASSIGN(Id)
  1745. #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
  1746. #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
  1747. // Default implementations for the top class that doesn't have parents.
  1748. bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
  1749. CONSTRAINT_DISPATCH(SymExpr);
  1750. return true;
  1751. }
  1752. DEFAULT_ASSIGN(SymExpr);
  1753. #undef DISPATCH
  1754. #undef CONSTRAINT_DISPATCH
  1755. #undef DEFAULT_ASSIGN
  1756. #undef ASSIGN
  1757. };
  1758. /// A little component aggregating all of the reasoning we have about
  1759. /// assigning new constraints to symbols.
  1760. ///
  1761. /// The main purpose of this class is to associate constraints to symbols,
  1762. /// and impose additional constraints on other symbols, when we can imply
  1763. /// them.
  1764. ///
  1765. /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
  1766. /// can provide more precise ranges by looking into the operands of the
  1767. /// expression in question, ConstraintAssignor looks into the operands
  1768. /// to see if we can imply more from the new constraint.
  1769. class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
  1770. public:
  1771. template <class ClassOrSymbol>
  1772. [[nodiscard]] static ProgramStateRef
  1773. assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
  1774. ClassOrSymbol CoS, RangeSet NewConstraint) {
  1775. if (!State || NewConstraint.isEmpty())
  1776. return nullptr;
  1777. ConstraintAssignor Assignor{State, Builder, F};
  1778. return Assignor.assign(CoS, NewConstraint);
  1779. }
  1780. /// Handle expressions like: a % b != 0.
  1781. template <typename SymT>
  1782. bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
  1783. if (Sym->getOpcode() != BO_Rem)
  1784. return true;
  1785. // a % b != 0 implies that a != 0.
  1786. if (!Constraint.containsZero()) {
  1787. SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
  1788. if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
  1789. State = State->assume(*NonLocSymSVal, true);
  1790. if (!State)
  1791. return false;
  1792. }
  1793. }
  1794. return true;
  1795. }
  1796. inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
  1797. inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
  1798. RangeSet Constraint) {
  1799. return handleRemainderOp(Sym, Constraint);
  1800. }
  1801. inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
  1802. RangeSet Constraint);
  1803. private:
  1804. ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
  1805. RangeSet::Factory &F)
  1806. : State(State), Builder(Builder), RangeFactory(F) {}
  1807. using Base = ConstraintAssignorBase<ConstraintAssignor>;
  1808. /// Base method for handling new constraints for symbols.
  1809. [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
  1810. // All constraints are actually associated with equivalence classes, and
  1811. // that's what we are going to do first.
  1812. State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
  1813. if (!State)
  1814. return nullptr;
  1815. // And after that we can check what other things we can get from this
  1816. // constraint.
  1817. Base::assign(Sym, NewConstraint);
  1818. return State;
  1819. }
  1820. /// Base method for handling new constraints for classes.
  1821. [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
  1822. RangeSet NewConstraint) {
  1823. // There is a chance that we might need to update constraints for the
  1824. // classes that are known to be disequal to Class.
  1825. //
  1826. // In order for this to be even possible, the new constraint should
  1827. // be simply a constant because we can't reason about range disequalities.
  1828. if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
  1829. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1830. ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
  1831. // Add new constraint.
  1832. Constraints = CF.add(Constraints, Class, NewConstraint);
  1833. for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
  1834. RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
  1835. RangeFactory, State, DisequalClass);
  1836. UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
  1837. // If we end up with at least one of the disequal classes to be
  1838. // constrained with an empty range-set, the state is infeasible.
  1839. if (UpdatedConstraint.isEmpty())
  1840. return nullptr;
  1841. Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
  1842. }
  1843. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  1844. "a state with infeasible constraints");
  1845. return setConstraints(State, Constraints);
  1846. }
  1847. return setConstraint(State, Class, NewConstraint);
  1848. }
  1849. ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
  1850. SymbolRef RHS) {
  1851. return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
  1852. }
  1853. ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
  1854. SymbolRef RHS) {
  1855. return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
  1856. }
  1857. [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
  1858. assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
  1859. if (Constraint.getConcreteValue())
  1860. return !Constraint.getConcreteValue()->isZero();
  1861. if (!Constraint.containsZero())
  1862. return true;
  1863. return std::nullopt;
  1864. }
  1865. ProgramStateRef State;
  1866. SValBuilder &Builder;
  1867. RangeSet::Factory &RangeFactory;
  1868. };
  1869. bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
  1870. const llvm::APSInt &Constraint) {
  1871. llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
  1872. // Iterate over all equivalence classes and try to simplify them.
  1873. ClassMembersTy Members = State->get<ClassMembers>();
  1874. for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
  1875. EquivalenceClass Class = ClassToSymbolSet.first;
  1876. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1877. if (!State)
  1878. return false;
  1879. SimplifiedClasses.insert(Class);
  1880. }
  1881. // Trivial equivalence classes (those that have only one symbol member) are
  1882. // not stored in the State. Thus, we must skim through the constraints as
  1883. // well. And we try to simplify symbols in the constraints.
  1884. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1885. for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
  1886. EquivalenceClass Class = ClassConstraint.first;
  1887. if (SimplifiedClasses.count(Class)) // Already simplified.
  1888. continue;
  1889. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1890. if (!State)
  1891. return false;
  1892. }
  1893. // We may have trivial equivalence classes in the disequality info as
  1894. // well, and we need to simplify them.
  1895. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  1896. for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
  1897. DisequalityInfo) {
  1898. EquivalenceClass Class = DisequalityEntry.first;
  1899. ClassSet DisequalClasses = DisequalityEntry.second;
  1900. State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
  1901. if (!State)
  1902. return false;
  1903. }
  1904. return true;
  1905. }
  1906. bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
  1907. RangeSet Constraint) {
  1908. if (!handleRemainderOp(Sym, Constraint))
  1909. return false;
  1910. std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
  1911. if (!ConstraintAsBool)
  1912. return true;
  1913. if (std::optional<bool> Equality = meansEquality(Sym)) {
  1914. // Here we cover two cases:
  1915. // * if Sym is equality and the new constraint is true -> Sym's operands
  1916. // should be marked as equal
  1917. // * if Sym is disequality and the new constraint is false -> Sym's
  1918. // operands should be also marked as equal
  1919. if (*Equality == *ConstraintAsBool) {
  1920. State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
  1921. } else {
  1922. // Other combinations leave as with disequal operands.
  1923. State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
  1924. }
  1925. if (!State)
  1926. return false;
  1927. }
  1928. return true;
  1929. }
  1930. } // end anonymous namespace
  1931. std::unique_ptr<ConstraintManager>
  1932. ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
  1933. ExprEngine *Eng) {
  1934. return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
  1935. }
  1936. ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
  1937. ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
  1938. ConstraintMap Result = F.getEmptyMap();
  1939. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  1940. for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
  1941. EquivalenceClass Class = ClassConstraint.first;
  1942. SymbolSet ClassMembers = Class.getClassMembers(State);
  1943. assert(!ClassMembers.isEmpty() &&
  1944. "Class must always have at least one member!");
  1945. SymbolRef Representative = *ClassMembers.begin();
  1946. Result = F.add(Result, Representative, ClassConstraint.second);
  1947. }
  1948. return Result;
  1949. }
  1950. //===----------------------------------------------------------------------===//
  1951. // EqualityClass implementation details
  1952. //===----------------------------------------------------------------------===//
  1953. LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
  1954. raw_ostream &os) const {
  1955. SymbolSet ClassMembers = getClassMembers(State);
  1956. for (const SymbolRef &MemberSym : ClassMembers) {
  1957. MemberSym->dump();
  1958. os << "\n";
  1959. }
  1960. }
  1961. inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
  1962. SymbolRef Sym) {
  1963. assert(State && "State should not be null");
  1964. assert(Sym && "Symbol should not be null");
  1965. // We store far from all Symbol -> Class mappings
  1966. if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
  1967. return *NontrivialClass;
  1968. // This is a trivial class of Sym.
  1969. return Sym;
  1970. }
  1971. inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
  1972. ProgramStateRef State,
  1973. SymbolRef First,
  1974. SymbolRef Second) {
  1975. EquivalenceClass FirstClass = find(State, First);
  1976. EquivalenceClass SecondClass = find(State, Second);
  1977. return FirstClass.merge(F, State, SecondClass);
  1978. }
  1979. inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
  1980. ProgramStateRef State,
  1981. EquivalenceClass Other) {
  1982. // It is already the same class.
  1983. if (*this == Other)
  1984. return State;
  1985. // FIXME: As of now, we support only equivalence classes of the same type.
  1986. // This limitation is connected to the lack of explicit casts in
  1987. // our symbolic expression model.
  1988. //
  1989. // That means that for `int x` and `char y` we don't distinguish
  1990. // between these two very different cases:
  1991. // * `x == y`
  1992. // * `(char)x == y`
  1993. //
  1994. // The moment we introduce symbolic casts, this restriction can be
  1995. // lifted.
  1996. if (getType() != Other.getType())
  1997. return State;
  1998. SymbolSet Members = getClassMembers(State);
  1999. SymbolSet OtherMembers = Other.getClassMembers(State);
  2000. // We estimate the size of the class by the height of tree containing
  2001. // its members. Merging is not a trivial operation, so it's easier to
  2002. // merge the smaller class into the bigger one.
  2003. if (Members.getHeight() >= OtherMembers.getHeight()) {
  2004. return mergeImpl(F, State, Members, Other, OtherMembers);
  2005. } else {
  2006. return Other.mergeImpl(F, State, OtherMembers, *this, Members);
  2007. }
  2008. }
  2009. inline ProgramStateRef
  2010. EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
  2011. ProgramStateRef State, SymbolSet MyMembers,
  2012. EquivalenceClass Other, SymbolSet OtherMembers) {
  2013. // Essentially what we try to recreate here is some kind of union-find
  2014. // data structure. It does have certain limitations due to persistence
  2015. // and the need to remove elements from classes.
  2016. //
  2017. // In this setting, EquialityClass object is the representative of the class
  2018. // or the parent element. ClassMap is a mapping of class members to their
  2019. // parent. Unlike the union-find structure, they all point directly to the
  2020. // class representative because we don't have an opportunity to actually do
  2021. // path compression when dealing with immutability. This means that we
  2022. // compress paths every time we do merges. It also means that we lose
  2023. // the main amortized complexity benefit from the original data structure.
  2024. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2025. ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
  2026. // 1. If the merged classes have any constraints associated with them, we
  2027. // need to transfer them to the class we have left.
  2028. //
  2029. // Intersection here makes perfect sense because both of these constraints
  2030. // must hold for the whole new class.
  2031. if (std::optional<RangeSet> NewClassConstraint =
  2032. intersect(RangeFactory, getConstraint(State, *this),
  2033. getConstraint(State, Other))) {
  2034. // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
  2035. // range inferrer shouldn't generate ranges incompatible with
  2036. // equivalence classes. However, at the moment, due to imperfections
  2037. // in the solver, it is possible and the merge function can also
  2038. // return infeasible states aka null states.
  2039. if (NewClassConstraint->isEmpty())
  2040. // Infeasible state
  2041. return nullptr;
  2042. // No need in tracking constraints of a now-dissolved class.
  2043. Constraints = CRF.remove(Constraints, Other);
  2044. // Assign new constraints for this class.
  2045. Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
  2046. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  2047. "a state with infeasible constraints");
  2048. State = State->set<ConstraintRange>(Constraints);
  2049. }
  2050. // 2. Get ALL equivalence-related maps
  2051. ClassMapTy Classes = State->get<ClassMap>();
  2052. ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
  2053. ClassMembersTy Members = State->get<ClassMembers>();
  2054. ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
  2055. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  2056. DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
  2057. ClassSet::Factory &CF = State->get_context<ClassSet>();
  2058. SymbolSet::Factory &F = getMembersFactory(State);
  2059. // 2. Merge members of the Other class into the current class.
  2060. SymbolSet NewClassMembers = MyMembers;
  2061. for (SymbolRef Sym : OtherMembers) {
  2062. NewClassMembers = F.add(NewClassMembers, Sym);
  2063. // *this is now the class for all these new symbols.
  2064. Classes = CMF.add(Classes, Sym, *this);
  2065. }
  2066. // 3. Adjust member mapping.
  2067. //
  2068. // No need in tracking members of a now-dissolved class.
  2069. Members = MF.remove(Members, Other);
  2070. // Now only the current class is mapped to all the symbols.
  2071. Members = MF.add(Members, *this, NewClassMembers);
  2072. // 4. Update disequality relations
  2073. ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
  2074. // We are about to merge two classes but they are already known to be
  2075. // non-equal. This is a contradiction.
  2076. if (DisequalToOther.contains(*this))
  2077. return nullptr;
  2078. if (!DisequalToOther.isEmpty()) {
  2079. ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
  2080. DisequalityInfo = DF.remove(DisequalityInfo, Other);
  2081. for (EquivalenceClass DisequalClass : DisequalToOther) {
  2082. DisequalToThis = CF.add(DisequalToThis, DisequalClass);
  2083. // Disequality is a symmetric relation meaning that if
  2084. // DisequalToOther not null then the set for DisequalClass is not
  2085. // empty and has at least Other.
  2086. ClassSet OriginalSetLinkedToOther =
  2087. *DisequalityInfo.lookup(DisequalClass);
  2088. // Other will be eliminated and we should replace it with the bigger
  2089. // united class.
  2090. ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
  2091. NewSet = CF.add(NewSet, *this);
  2092. DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
  2093. }
  2094. DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
  2095. State = State->set<DisequalityMap>(DisequalityInfo);
  2096. }
  2097. // 5. Update the state
  2098. State = State->set<ClassMap>(Classes);
  2099. State = State->set<ClassMembers>(Members);
  2100. return State;
  2101. }
  2102. inline SymbolSet::Factory &
  2103. EquivalenceClass::getMembersFactory(ProgramStateRef State) {
  2104. return State->get_context<SymbolSet>();
  2105. }
  2106. SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
  2107. if (const SymbolSet *Members = State->get<ClassMembers>(*this))
  2108. return *Members;
  2109. // This class is trivial, so we need to construct a set
  2110. // with just that one symbol from the class.
  2111. SymbolSet::Factory &F = getMembersFactory(State);
  2112. return F.add(F.getEmptySet(), getRepresentativeSymbol());
  2113. }
  2114. bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
  2115. return State->get<ClassMembers>(*this) == nullptr;
  2116. }
  2117. bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
  2118. SymbolReaper &Reaper) const {
  2119. return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
  2120. }
  2121. inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
  2122. ProgramStateRef State,
  2123. SymbolRef First,
  2124. SymbolRef Second) {
  2125. return markDisequal(RF, State, find(State, First), find(State, Second));
  2126. }
  2127. inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
  2128. ProgramStateRef State,
  2129. EquivalenceClass First,
  2130. EquivalenceClass Second) {
  2131. return First.markDisequal(RF, State, Second);
  2132. }
  2133. inline ProgramStateRef
  2134. EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
  2135. EquivalenceClass Other) const {
  2136. // If we know that two classes are equal, we can only produce an infeasible
  2137. // state.
  2138. if (*this == Other) {
  2139. return nullptr;
  2140. }
  2141. DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
  2142. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2143. // Disequality is a symmetric relation, so if we mark A as disequal to B,
  2144. // we should also mark B as disequalt to A.
  2145. if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
  2146. Other) ||
  2147. !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
  2148. *this))
  2149. return nullptr;
  2150. assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
  2151. "a state with infeasible constraints");
  2152. State = State->set<DisequalityMap>(DisequalityInfo);
  2153. State = State->set<ConstraintRange>(Constraints);
  2154. return State;
  2155. }
  2156. inline bool EquivalenceClass::addToDisequalityInfo(
  2157. DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
  2158. RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
  2159. EquivalenceClass Second) {
  2160. // 1. Get all of the required factories.
  2161. DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
  2162. ClassSet::Factory &CF = State->get_context<ClassSet>();
  2163. ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
  2164. // 2. Add Second to the set of classes disequal to First.
  2165. const ClassSet *CurrentSet = Info.lookup(First);
  2166. ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
  2167. NewSet = CF.add(NewSet, Second);
  2168. Info = F.add(Info, First, NewSet);
  2169. // 3. If Second is known to be a constant, we can delete this point
  2170. // from the constraint asociated with First.
  2171. //
  2172. // So, if Second == 10, it means that First != 10.
  2173. // At the same time, the same logic does not apply to ranges.
  2174. if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
  2175. if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
  2176. RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
  2177. RF, State, First.getRepresentativeSymbol());
  2178. FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
  2179. // If the First class is about to be constrained with an empty
  2180. // range-set, the state is infeasible.
  2181. if (FirstConstraint.isEmpty())
  2182. return false;
  2183. Constraints = CRF.add(Constraints, First, FirstConstraint);
  2184. }
  2185. return true;
  2186. }
  2187. inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
  2188. SymbolRef FirstSym,
  2189. SymbolRef SecondSym) {
  2190. return EquivalenceClass::areEqual(State, find(State, FirstSym),
  2191. find(State, SecondSym));
  2192. }
  2193. inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
  2194. EquivalenceClass First,
  2195. EquivalenceClass Second) {
  2196. // The same equivalence class => symbols are equal.
  2197. if (First == Second)
  2198. return true;
  2199. // Let's check if we know anything about these two classes being not equal to
  2200. // each other.
  2201. ClassSet DisequalToFirst = First.getDisequalClasses(State);
  2202. if (DisequalToFirst.contains(Second))
  2203. return false;
  2204. // It is not clear.
  2205. return std::nullopt;
  2206. }
  2207. [[nodiscard]] ProgramStateRef
  2208. EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
  2209. SymbolSet ClsMembers = getClassMembers(State);
  2210. assert(ClsMembers.contains(Old));
  2211. // Remove `Old`'s Class->Sym relation.
  2212. SymbolSet::Factory &F = getMembersFactory(State);
  2213. ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
  2214. ClsMembers = F.remove(ClsMembers, Old);
  2215. // Ensure another precondition of the removeMember function (we can check
  2216. // this only with isEmpty, thus we have to do the remove first).
  2217. assert(!ClsMembers.isEmpty() &&
  2218. "Class should have had at least two members before member removal");
  2219. // Overwrite the existing members assigned to this class.
  2220. ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
  2221. ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
  2222. State = State->set<ClassMembers>(ClassMembersMap);
  2223. // Remove `Old`'s Sym->Class relation.
  2224. ClassMapTy Classes = State->get<ClassMap>();
  2225. ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
  2226. Classes = CMF.remove(Classes, Old);
  2227. State = State->set<ClassMap>(Classes);
  2228. return State;
  2229. }
  2230. // Re-evaluate an SVal with top-level `State->assume` logic.
  2231. [[nodiscard]] ProgramStateRef
  2232. reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
  2233. if (!Constraint)
  2234. return State;
  2235. const auto DefinedVal = TheValue.castAs<DefinedSVal>();
  2236. // If the SVal is 0, we can simply interpret that as `false`.
  2237. if (Constraint->encodesFalseRange())
  2238. return State->assume(DefinedVal, false);
  2239. // If the constraint does not encode 0 then we can interpret that as `true`
  2240. // AND as a Range(Set).
  2241. if (Constraint->encodesTrueRange()) {
  2242. State = State->assume(DefinedVal, true);
  2243. if (!State)
  2244. return nullptr;
  2245. // Fall through, re-assume based on the range values as well.
  2246. }
  2247. // Overestimate the individual Ranges with the RangeSet' lowest and
  2248. // highest values.
  2249. return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
  2250. Constraint->getMaxValue(), true);
  2251. }
  2252. // Iterate over all symbols and try to simplify them. Once a symbol is
  2253. // simplified then we check if we can merge the simplified symbol's equivalence
  2254. // class to this class. This way, we simplify not just the symbols but the
  2255. // classes as well: we strive to keep the number of the classes to be the
  2256. // absolute minimum.
  2257. [[nodiscard]] ProgramStateRef
  2258. EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
  2259. ProgramStateRef State, EquivalenceClass Class) {
  2260. SymbolSet ClassMembers = Class.getClassMembers(State);
  2261. for (const SymbolRef &MemberSym : ClassMembers) {
  2262. const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
  2263. const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
  2264. // The symbol is collapsed to a constant, check if the current State is
  2265. // still feasible.
  2266. if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
  2267. const llvm::APSInt &SV = CI->getValue();
  2268. const RangeSet *ClassConstraint = getConstraint(State, Class);
  2269. // We have found a contradiction.
  2270. if (ClassConstraint && !ClassConstraint->contains(SV))
  2271. return nullptr;
  2272. }
  2273. if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
  2274. // The simplified symbol should be the member of the original Class,
  2275. // however, it might be in another existing class at the moment. We
  2276. // have to merge these classes.
  2277. ProgramStateRef OldState = State;
  2278. State = merge(F, State, MemberSym, SimplifiedMemberSym);
  2279. if (!State)
  2280. return nullptr;
  2281. // No state change, no merge happened actually.
  2282. if (OldState == State)
  2283. continue;
  2284. // Be aware that `SimplifiedMemberSym` might refer to an already dead
  2285. // symbol. In that case, the eqclass of that might not be the same as the
  2286. // eqclass of `MemberSym`. This is because the dead symbols are not
  2287. // preserved in the `ClassMap`, hence
  2288. // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
  2289. // compared to the eqclass of `MemberSym`.
  2290. // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
  2291. // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
  2292. //
  2293. // Note that `MemberSym` must be alive here since that is from the
  2294. // `ClassMembers` where all the symbols are alive.
  2295. // Remove the old and more complex symbol.
  2296. State = find(State, MemberSym).removeMember(State, MemberSym);
  2297. // Query the class constraint again b/c that may have changed during the
  2298. // merge above.
  2299. const RangeSet *ClassConstraint = getConstraint(State, Class);
  2300. // Re-evaluate an SVal with top-level `State->assume`, this ignites
  2301. // a RECURSIVE algorithm that will reach a FIXPOINT.
  2302. //
  2303. // About performance and complexity: Let us assume that in a State we
  2304. // have N non-trivial equivalence classes and that all constraints and
  2305. // disequality info is related to non-trivial classes. In the worst case,
  2306. // we can simplify only one symbol of one class in each iteration. The
  2307. // number of symbols in one class cannot grow b/c we replace the old
  2308. // symbol with the simplified one. Also, the number of the equivalence
  2309. // classes can decrease only, b/c the algorithm does a merge operation
  2310. // optionally. We need N iterations in this case to reach the fixpoint.
  2311. // Thus, the steps needed to be done in the worst case is proportional to
  2312. // N*N.
  2313. //
  2314. // This worst case scenario can be extended to that case when we have
  2315. // trivial classes in the constraints and in the disequality map. This
  2316. // case can be reduced to the case with a State where there are only
  2317. // non-trivial classes. This is because a merge operation on two trivial
  2318. // classes results in one non-trivial class.
  2319. State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
  2320. if (!State)
  2321. return nullptr;
  2322. }
  2323. }
  2324. return State;
  2325. }
  2326. inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
  2327. SymbolRef Sym) {
  2328. return find(State, Sym).getDisequalClasses(State);
  2329. }
  2330. inline ClassSet
  2331. EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
  2332. return getDisequalClasses(State->get<DisequalityMap>(),
  2333. State->get_context<ClassSet>());
  2334. }
  2335. inline ClassSet
  2336. EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
  2337. ClassSet::Factory &Factory) const {
  2338. if (const ClassSet *DisequalClasses = Map.lookup(*this))
  2339. return *DisequalClasses;
  2340. return Factory.getEmptySet();
  2341. }
  2342. bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
  2343. ClassMembersTy Members = State->get<ClassMembers>();
  2344. for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
  2345. for (SymbolRef Member : ClassMembersPair.second) {
  2346. // Every member of the class should have a mapping back to the class.
  2347. if (find(State, Member) == ClassMembersPair.first) {
  2348. continue;
  2349. }
  2350. return false;
  2351. }
  2352. }
  2353. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2354. for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
  2355. EquivalenceClass Class = DisequalityInfo.first;
  2356. ClassSet DisequalClasses = DisequalityInfo.second;
  2357. // There is no use in keeping empty sets in the map.
  2358. if (DisequalClasses.isEmpty())
  2359. return false;
  2360. // Disequality is symmetrical, i.e. for every Class A and B that A != B,
  2361. // B != A should also be true.
  2362. for (EquivalenceClass DisequalClass : DisequalClasses) {
  2363. const ClassSet *DisequalToDisequalClasses =
  2364. Disequalities.lookup(DisequalClass);
  2365. // It should be a set of at least one element: Class
  2366. if (!DisequalToDisequalClasses ||
  2367. !DisequalToDisequalClasses->contains(Class))
  2368. return false;
  2369. }
  2370. }
  2371. return true;
  2372. }
  2373. //===----------------------------------------------------------------------===//
  2374. // RangeConstraintManager implementation
  2375. //===----------------------------------------------------------------------===//
  2376. bool RangeConstraintManager::canReasonAbout(SVal X) const {
  2377. std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
  2378. if (SymVal && SymVal->isExpression()) {
  2379. const SymExpr *SE = SymVal->getSymbol();
  2380. if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
  2381. switch (SIE->getOpcode()) {
  2382. // We don't reason yet about bitwise-constraints on symbolic values.
  2383. case BO_And:
  2384. case BO_Or:
  2385. case BO_Xor:
  2386. return false;
  2387. // We don't reason yet about these arithmetic constraints on
  2388. // symbolic values.
  2389. case BO_Mul:
  2390. case BO_Div:
  2391. case BO_Rem:
  2392. case BO_Shl:
  2393. case BO_Shr:
  2394. return false;
  2395. // All other cases.
  2396. default:
  2397. return true;
  2398. }
  2399. }
  2400. if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
  2401. // FIXME: Handle <=> here.
  2402. if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
  2403. BinaryOperator::isRelationalOp(SSE->getOpcode())) {
  2404. // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
  2405. // We've recently started producing Loc <> NonLoc comparisons (that
  2406. // result from casts of one of the operands between eg. intptr_t and
  2407. // void *), but we can't reason about them yet.
  2408. if (Loc::isLocType(SSE->getLHS()->getType())) {
  2409. return Loc::isLocType(SSE->getRHS()->getType());
  2410. }
  2411. }
  2412. }
  2413. return false;
  2414. }
  2415. return true;
  2416. }
  2417. ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
  2418. SymbolRef Sym) {
  2419. const RangeSet *Ranges = getConstraint(State, Sym);
  2420. // If we don't have any information about this symbol, it's underconstrained.
  2421. if (!Ranges)
  2422. return ConditionTruthVal();
  2423. // If we have a concrete value, see if it's zero.
  2424. if (const llvm::APSInt *Value = Ranges->getConcreteValue())
  2425. return *Value == 0;
  2426. BasicValueFactory &BV = getBasicVals();
  2427. APSIntType IntType = BV.getAPSIntType(Sym->getType());
  2428. llvm::APSInt Zero = IntType.getZeroValue();
  2429. // Check if zero is in the set of possible values.
  2430. if (!Ranges->contains(Zero))
  2431. return false;
  2432. // Zero is a possible value, but it is not the /only/ possible value.
  2433. return ConditionTruthVal();
  2434. }
  2435. const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
  2436. SymbolRef Sym) const {
  2437. const RangeSet *T = getConstraint(St, Sym);
  2438. return T ? T->getConcreteValue() : nullptr;
  2439. }
  2440. //===----------------------------------------------------------------------===//
  2441. // Remove dead symbols from existing constraints
  2442. //===----------------------------------------------------------------------===//
  2443. /// Scan all symbols referenced by the constraints. If the symbol is not alive
  2444. /// as marked in LSymbols, mark it as dead in DSymbols.
  2445. ProgramStateRef
  2446. RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
  2447. SymbolReaper &SymReaper) {
  2448. ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
  2449. ClassMembersTy NewClassMembersMap = ClassMembersMap;
  2450. ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
  2451. SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
  2452. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2453. ConstraintRangeTy NewConstraints = Constraints;
  2454. ConstraintRangeTy::Factory &ConstraintFactory =
  2455. State->get_context<ConstraintRange>();
  2456. ClassMapTy Map = State->get<ClassMap>();
  2457. ClassMapTy NewMap = Map;
  2458. ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
  2459. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2460. DisequalityMapTy::Factory &DisequalityFactory =
  2461. State->get_context<DisequalityMap>();
  2462. ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
  2463. bool ClassMapChanged = false;
  2464. bool MembersMapChanged = false;
  2465. bool ConstraintMapChanged = false;
  2466. bool DisequalitiesChanged = false;
  2467. auto removeDeadClass = [&](EquivalenceClass Class) {
  2468. // Remove associated constraint ranges.
  2469. Constraints = ConstraintFactory.remove(Constraints, Class);
  2470. ConstraintMapChanged = true;
  2471. // Update disequality information to not hold any information on the
  2472. // removed class.
  2473. ClassSet DisequalClasses =
  2474. Class.getDisequalClasses(Disequalities, ClassSetFactory);
  2475. if (!DisequalClasses.isEmpty()) {
  2476. for (EquivalenceClass DisequalClass : DisequalClasses) {
  2477. ClassSet DisequalToDisequalSet =
  2478. DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
  2479. // DisequalToDisequalSet is guaranteed to be non-empty for consistent
  2480. // disequality info.
  2481. assert(!DisequalToDisequalSet.isEmpty());
  2482. ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
  2483. // No need in keeping an empty set.
  2484. if (NewSet.isEmpty()) {
  2485. Disequalities =
  2486. DisequalityFactory.remove(Disequalities, DisequalClass);
  2487. } else {
  2488. Disequalities =
  2489. DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
  2490. }
  2491. }
  2492. // Remove the data for the class
  2493. Disequalities = DisequalityFactory.remove(Disequalities, Class);
  2494. DisequalitiesChanged = true;
  2495. }
  2496. };
  2497. // 1. Let's see if dead symbols are trivial and have associated constraints.
  2498. for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
  2499. Constraints) {
  2500. EquivalenceClass Class = ClassConstraintPair.first;
  2501. if (Class.isTriviallyDead(State, SymReaper)) {
  2502. // If this class is trivial, we can remove its constraints right away.
  2503. removeDeadClass(Class);
  2504. }
  2505. }
  2506. // 2. We don't need to track classes for dead symbols.
  2507. for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
  2508. SymbolRef Sym = SymbolClassPair.first;
  2509. if (SymReaper.isDead(Sym)) {
  2510. ClassMapChanged = true;
  2511. NewMap = ClassFactory.remove(NewMap, Sym);
  2512. }
  2513. }
  2514. // 3. Remove dead members from classes and remove dead non-trivial classes
  2515. // and their constraints.
  2516. for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
  2517. ClassMembersMap) {
  2518. EquivalenceClass Class = ClassMembersPair.first;
  2519. SymbolSet LiveMembers = ClassMembersPair.second;
  2520. bool MembersChanged = false;
  2521. for (SymbolRef Member : ClassMembersPair.second) {
  2522. if (SymReaper.isDead(Member)) {
  2523. MembersChanged = true;
  2524. LiveMembers = SetFactory.remove(LiveMembers, Member);
  2525. }
  2526. }
  2527. // Check if the class changed.
  2528. if (!MembersChanged)
  2529. continue;
  2530. MembersMapChanged = true;
  2531. if (LiveMembers.isEmpty()) {
  2532. // The class is dead now, we need to wipe it out of the members map...
  2533. NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
  2534. // ...and remove all of its constraints.
  2535. removeDeadClass(Class);
  2536. } else {
  2537. // We need to change the members associated with the class.
  2538. NewClassMembersMap =
  2539. EMFactory.add(NewClassMembersMap, Class, LiveMembers);
  2540. }
  2541. }
  2542. // 4. Update the state with new maps.
  2543. //
  2544. // Here we try to be humble and update a map only if it really changed.
  2545. if (ClassMapChanged)
  2546. State = State->set<ClassMap>(NewMap);
  2547. if (MembersMapChanged)
  2548. State = State->set<ClassMembers>(NewClassMembersMap);
  2549. if (ConstraintMapChanged)
  2550. State = State->set<ConstraintRange>(Constraints);
  2551. if (DisequalitiesChanged)
  2552. State = State->set<DisequalityMap>(Disequalities);
  2553. assert(EquivalenceClass::isClassDataConsistent(State));
  2554. return State;
  2555. }
  2556. RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
  2557. SymbolRef Sym) {
  2558. return SymbolicRangeInferrer::inferRange(F, State, Sym);
  2559. }
  2560. ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
  2561. SymbolRef Sym,
  2562. RangeSet Range) {
  2563. return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
  2564. }
  2565. //===------------------------------------------------------------------------===
  2566. // assumeSymX methods: protected interface for RangeConstraintManager.
  2567. //===------------------------------------------------------------------------===/
  2568. // The syntax for ranges below is mathematical, using [x, y] for closed ranges
  2569. // and (x, y) for open ranges. These ranges are modular, corresponding with
  2570. // a common treatment of C integer overflow. This means that these methods
  2571. // do not have to worry about overflow; RangeSet::Intersect can handle such a
  2572. // "wraparound" range.
  2573. // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
  2574. // UINT_MAX, 0, 1, and 2.
  2575. ProgramStateRef
  2576. RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
  2577. const llvm::APSInt &Int,
  2578. const llvm::APSInt &Adjustment) {
  2579. // Before we do any real work, see if the value can even show up.
  2580. APSIntType AdjustmentType(Adjustment);
  2581. if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
  2582. return St;
  2583. llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
  2584. RangeSet New = getRange(St, Sym);
  2585. New = F.deletePoint(New, Point);
  2586. return setRange(St, Sym, New);
  2587. }
  2588. ProgramStateRef
  2589. RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
  2590. const llvm::APSInt &Int,
  2591. const llvm::APSInt &Adjustment) {
  2592. // Before we do any real work, see if the value can even show up.
  2593. APSIntType AdjustmentType(Adjustment);
  2594. if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
  2595. return nullptr;
  2596. // [Int-Adjustment, Int-Adjustment]
  2597. llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
  2598. RangeSet New = getRange(St, Sym);
  2599. New = F.intersect(New, AdjInt);
  2600. return setRange(St, Sym, New);
  2601. }
  2602. RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
  2603. SymbolRef Sym,
  2604. const llvm::APSInt &Int,
  2605. const llvm::APSInt &Adjustment) {
  2606. // Before we do any real work, see if the value can even show up.
  2607. APSIntType AdjustmentType(Adjustment);
  2608. switch (AdjustmentType.testInRange(Int, true)) {
  2609. case APSIntType::RTR_Below:
  2610. return F.getEmptySet();
  2611. case APSIntType::RTR_Within:
  2612. break;
  2613. case APSIntType::RTR_Above:
  2614. return getRange(St, Sym);
  2615. }
  2616. // Special case for Int == Min. This is always false.
  2617. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2618. llvm::APSInt Min = AdjustmentType.getMinValue();
  2619. if (ComparisonVal == Min)
  2620. return F.getEmptySet();
  2621. llvm::APSInt Lower = Min - Adjustment;
  2622. llvm::APSInt Upper = ComparisonVal - Adjustment;
  2623. --Upper;
  2624. RangeSet Result = getRange(St, Sym);
  2625. return F.intersect(Result, Lower, Upper);
  2626. }
  2627. ProgramStateRef
  2628. RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
  2629. const llvm::APSInt &Int,
  2630. const llvm::APSInt &Adjustment) {
  2631. RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
  2632. return setRange(St, Sym, New);
  2633. }
  2634. RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
  2635. SymbolRef Sym,
  2636. const llvm::APSInt &Int,
  2637. const llvm::APSInt &Adjustment) {
  2638. // Before we do any real work, see if the value can even show up.
  2639. APSIntType AdjustmentType(Adjustment);
  2640. switch (AdjustmentType.testInRange(Int, true)) {
  2641. case APSIntType::RTR_Below:
  2642. return getRange(St, Sym);
  2643. case APSIntType::RTR_Within:
  2644. break;
  2645. case APSIntType::RTR_Above:
  2646. return F.getEmptySet();
  2647. }
  2648. // Special case for Int == Max. This is always false.
  2649. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2650. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2651. if (ComparisonVal == Max)
  2652. return F.getEmptySet();
  2653. llvm::APSInt Lower = ComparisonVal - Adjustment;
  2654. llvm::APSInt Upper = Max - Adjustment;
  2655. ++Lower;
  2656. RangeSet SymRange = getRange(St, Sym);
  2657. return F.intersect(SymRange, Lower, Upper);
  2658. }
  2659. ProgramStateRef
  2660. RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
  2661. const llvm::APSInt &Int,
  2662. const llvm::APSInt &Adjustment) {
  2663. RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
  2664. return setRange(St, Sym, New);
  2665. }
  2666. RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
  2667. SymbolRef Sym,
  2668. const llvm::APSInt &Int,
  2669. const llvm::APSInt &Adjustment) {
  2670. // Before we do any real work, see if the value can even show up.
  2671. APSIntType AdjustmentType(Adjustment);
  2672. switch (AdjustmentType.testInRange(Int, true)) {
  2673. case APSIntType::RTR_Below:
  2674. return getRange(St, Sym);
  2675. case APSIntType::RTR_Within:
  2676. break;
  2677. case APSIntType::RTR_Above:
  2678. return F.getEmptySet();
  2679. }
  2680. // Special case for Int == Min. This is always feasible.
  2681. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2682. llvm::APSInt Min = AdjustmentType.getMinValue();
  2683. if (ComparisonVal == Min)
  2684. return getRange(St, Sym);
  2685. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2686. llvm::APSInt Lower = ComparisonVal - Adjustment;
  2687. llvm::APSInt Upper = Max - Adjustment;
  2688. RangeSet SymRange = getRange(St, Sym);
  2689. return F.intersect(SymRange, Lower, Upper);
  2690. }
  2691. ProgramStateRef
  2692. RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
  2693. const llvm::APSInt &Int,
  2694. const llvm::APSInt &Adjustment) {
  2695. RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
  2696. return setRange(St, Sym, New);
  2697. }
  2698. RangeSet
  2699. RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
  2700. const llvm::APSInt &Int,
  2701. const llvm::APSInt &Adjustment) {
  2702. // Before we do any real work, see if the value can even show up.
  2703. APSIntType AdjustmentType(Adjustment);
  2704. switch (AdjustmentType.testInRange(Int, true)) {
  2705. case APSIntType::RTR_Below:
  2706. return F.getEmptySet();
  2707. case APSIntType::RTR_Within:
  2708. break;
  2709. case APSIntType::RTR_Above:
  2710. return RS();
  2711. }
  2712. // Special case for Int == Max. This is always feasible.
  2713. llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  2714. llvm::APSInt Max = AdjustmentType.getMaxValue();
  2715. if (ComparisonVal == Max)
  2716. return RS();
  2717. llvm::APSInt Min = AdjustmentType.getMinValue();
  2718. llvm::APSInt Lower = Min - Adjustment;
  2719. llvm::APSInt Upper = ComparisonVal - Adjustment;
  2720. RangeSet Default = RS();
  2721. return F.intersect(Default, Lower, Upper);
  2722. }
  2723. RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
  2724. SymbolRef Sym,
  2725. const llvm::APSInt &Int,
  2726. const llvm::APSInt &Adjustment) {
  2727. return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
  2728. }
  2729. ProgramStateRef
  2730. RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
  2731. const llvm::APSInt &Int,
  2732. const llvm::APSInt &Adjustment) {
  2733. RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
  2734. return setRange(St, Sym, New);
  2735. }
  2736. ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
  2737. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  2738. const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
  2739. RangeSet New = getSymGERange(State, Sym, From, Adjustment);
  2740. if (New.isEmpty())
  2741. return nullptr;
  2742. RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
  2743. return setRange(State, Sym, Out);
  2744. }
  2745. ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
  2746. ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
  2747. const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
  2748. RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
  2749. RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
  2750. RangeSet New(F.add(RangeLT, RangeGT));
  2751. return setRange(State, Sym, New);
  2752. }
  2753. //===----------------------------------------------------------------------===//
  2754. // Pretty-printing.
  2755. //===----------------------------------------------------------------------===//
  2756. void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
  2757. const char *NL, unsigned int Space,
  2758. bool IsDot) const {
  2759. printConstraints(Out, State, NL, Space, IsDot);
  2760. printEquivalenceClasses(Out, State, NL, Space, IsDot);
  2761. printDisequalities(Out, State, NL, Space, IsDot);
  2762. }
  2763. void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
  2764. SymbolRef Sym) {
  2765. const RangeSet RS = getRange(State, Sym);
  2766. Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
  2767. RS.dump(Out);
  2768. }
  2769. static std::string toString(const SymbolRef &Sym) {
  2770. std::string S;
  2771. llvm::raw_string_ostream O(S);
  2772. Sym->dumpToStream(O);
  2773. return O.str();
  2774. }
  2775. void RangeConstraintManager::printConstraints(raw_ostream &Out,
  2776. ProgramStateRef State,
  2777. const char *NL,
  2778. unsigned int Space,
  2779. bool IsDot) const {
  2780. ConstraintRangeTy Constraints = State->get<ConstraintRange>();
  2781. Indent(Out, Space, IsDot) << "\"constraints\": ";
  2782. if (Constraints.isEmpty()) {
  2783. Out << "null," << NL;
  2784. return;
  2785. }
  2786. std::map<std::string, RangeSet> OrderedConstraints;
  2787. for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
  2788. SymbolSet ClassMembers = P.first.getClassMembers(State);
  2789. for (const SymbolRef &ClassMember : ClassMembers) {
  2790. bool insertion_took_place;
  2791. std::tie(std::ignore, insertion_took_place) =
  2792. OrderedConstraints.insert({toString(ClassMember), P.second});
  2793. assert(insertion_took_place &&
  2794. "two symbols should not have the same dump");
  2795. }
  2796. }
  2797. ++Space;
  2798. Out << '[' << NL;
  2799. bool First = true;
  2800. for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
  2801. if (First) {
  2802. First = false;
  2803. } else {
  2804. Out << ',';
  2805. Out << NL;
  2806. }
  2807. Indent(Out, Space, IsDot)
  2808. << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
  2809. P.second.dump(Out);
  2810. Out << "\" }";
  2811. }
  2812. Out << NL;
  2813. --Space;
  2814. Indent(Out, Space, IsDot) << "]," << NL;
  2815. }
  2816. static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
  2817. SymbolSet ClassMembers = Class.getClassMembers(State);
  2818. llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
  2819. ClassMembers.end());
  2820. llvm::sort(ClassMembersSorted,
  2821. [](const SymbolRef &LHS, const SymbolRef &RHS) {
  2822. return toString(LHS) < toString(RHS);
  2823. });
  2824. bool FirstMember = true;
  2825. std::string Str;
  2826. llvm::raw_string_ostream Out(Str);
  2827. Out << "[ ";
  2828. for (SymbolRef ClassMember : ClassMembersSorted) {
  2829. if (FirstMember)
  2830. FirstMember = false;
  2831. else
  2832. Out << ", ";
  2833. Out << "\"" << ClassMember << "\"";
  2834. }
  2835. Out << " ]";
  2836. return Out.str();
  2837. }
  2838. void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
  2839. ProgramStateRef State,
  2840. const char *NL,
  2841. unsigned int Space,
  2842. bool IsDot) const {
  2843. ClassMembersTy Members = State->get<ClassMembers>();
  2844. Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
  2845. if (Members.isEmpty()) {
  2846. Out << "null," << NL;
  2847. return;
  2848. }
  2849. std::set<std::string> MembersStr;
  2850. for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
  2851. MembersStr.insert(toString(State, ClassToSymbolSet.first));
  2852. ++Space;
  2853. Out << '[' << NL;
  2854. bool FirstClass = true;
  2855. for (const std::string &Str : MembersStr) {
  2856. if (FirstClass) {
  2857. FirstClass = false;
  2858. } else {
  2859. Out << ',';
  2860. Out << NL;
  2861. }
  2862. Indent(Out, Space, IsDot);
  2863. Out << Str;
  2864. }
  2865. Out << NL;
  2866. --Space;
  2867. Indent(Out, Space, IsDot) << "]," << NL;
  2868. }
  2869. void RangeConstraintManager::printDisequalities(raw_ostream &Out,
  2870. ProgramStateRef State,
  2871. const char *NL,
  2872. unsigned int Space,
  2873. bool IsDot) const {
  2874. DisequalityMapTy Disequalities = State->get<DisequalityMap>();
  2875. Indent(Out, Space, IsDot) << "\"disequality_info\": ";
  2876. if (Disequalities.isEmpty()) {
  2877. Out << "null," << NL;
  2878. return;
  2879. }
  2880. // Transform the disequality info to an ordered map of
  2881. // [string -> (ordered set of strings)]
  2882. using EqClassesStrTy = std::set<std::string>;
  2883. using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
  2884. DisequalityInfoStrTy DisequalityInfoStr;
  2885. for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
  2886. EquivalenceClass Class = ClassToDisEqSet.first;
  2887. ClassSet DisequalClasses = ClassToDisEqSet.second;
  2888. EqClassesStrTy MembersStr;
  2889. for (EquivalenceClass DisEqClass : DisequalClasses)
  2890. MembersStr.insert(toString(State, DisEqClass));
  2891. DisequalityInfoStr.insert({toString(State, Class), MembersStr});
  2892. }
  2893. ++Space;
  2894. Out << '[' << NL;
  2895. bool FirstClass = true;
  2896. for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
  2897. DisequalityInfoStr) {
  2898. const std::string &Class = ClassToDisEqSet.first;
  2899. if (FirstClass) {
  2900. FirstClass = false;
  2901. } else {
  2902. Out << ',';
  2903. Out << NL;
  2904. }
  2905. Indent(Out, Space, IsDot) << "{" << NL;
  2906. unsigned int DisEqSpace = Space + 1;
  2907. Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
  2908. Out << Class;
  2909. const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
  2910. if (!DisequalClasses.empty()) {
  2911. Out << "," << NL;
  2912. Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
  2913. unsigned int DisEqClassSpace = DisEqSpace + 1;
  2914. Indent(Out, DisEqClassSpace, IsDot);
  2915. bool FirstDisEqClass = true;
  2916. for (const std::string &DisEqClass : DisequalClasses) {
  2917. if (FirstDisEqClass) {
  2918. FirstDisEqClass = false;
  2919. } else {
  2920. Out << ',' << NL;
  2921. Indent(Out, DisEqClassSpace, IsDot);
  2922. }
  2923. Out << DisEqClass;
  2924. }
  2925. Out << "]" << NL;
  2926. }
  2927. Indent(Out, Space, IsDot) << "}";
  2928. }
  2929. Out << NL;
  2930. --Space;
  2931. Indent(Out, Space, IsDot) << "]," << NL;
  2932. }