1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436 |
- //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines RangeConstraintManager, a class that tracks simple
- // equality and inequality constraints on symbolic values of ProgramState.
- //
- //===----------------------------------------------------------------------===//
- #include "clang/Basic/JsonSupport.h"
- #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
- #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
- #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
- #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
- #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
- #include "llvm/ADT/FoldingSet.h"
- #include "llvm/ADT/ImmutableSet.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/ADT/SmallSet.h"
- #include "llvm/ADT/StringExtras.h"
- #include "llvm/Support/Compiler.h"
- #include "llvm/Support/raw_ostream.h"
- #include <algorithm>
- #include <iterator>
- #include <optional>
- using namespace clang;
- using namespace ento;
- // This class can be extended with other tables which will help to reason
- // about ranges more precisely.
- class OperatorRelationsTable {
- static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
- BO_GE < BO_EQ && BO_EQ < BO_NE,
- "This class relies on operators order. Rework it otherwise.");
- public:
- enum TriStateKind {
- False = 0,
- True,
- Unknown,
- };
- private:
- // CmpOpTable holds states which represent the corresponding range for
- // branching an exploded graph. We can reason about the branch if there is
- // a previously known fact of the existence of a comparison expression with
- // operands used in the current expression.
- // E.g. assuming (x < y) is true that means (x != y) is surely true.
- // if (x previous_operation y) // < | != | >
- // if (x operation y) // != | > | <
- // tristate // True | Unknown | False
- //
- // CmpOpTable represents next:
- // __|< |> |<=|>=|==|!=|UnknownX2|
- // < |1 |0 |* |0 |0 |* |1 |
- // > |0 |1 |0 |* |0 |* |1 |
- // <=|1 |0 |1 |* |1 |* |0 |
- // >=|0 |1 |* |1 |1 |* |0 |
- // ==|0 |0 |* |* |1 |0 |1 |
- // !=|1 |1 |* |* |0 |1 |0 |
- //
- // Columns stands for a previous operator.
- // Rows stands for a current operator.
- // Each row has exactly two `Unknown` cases.
- // UnknownX2 means that both `Unknown` previous operators are met in code,
- // and there is a special column for that, for example:
- // if (x >= y)
- // if (x != y)
- // if (x <= y)
- // False only
- static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
- const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
- // < > <= >= == != UnknownX2
- {True, False, Unknown, False, False, Unknown, True}, // <
- {False, True, False, Unknown, False, Unknown, True}, // >
- {True, False, True, Unknown, True, Unknown, False}, // <=
- {False, True, Unknown, True, True, Unknown, False}, // >=
- {False, False, Unknown, Unknown, True, False, True}, // ==
- {True, True, Unknown, Unknown, False, True, False}, // !=
- };
- static size_t getIndexFromOp(BinaryOperatorKind OP) {
- return static_cast<size_t>(OP - BO_LT);
- }
- public:
- constexpr size_t getCmpOpCount() const { return CmpOpCount; }
- static BinaryOperatorKind getOpFromIndex(size_t Index) {
- return static_cast<BinaryOperatorKind>(Index + BO_LT);
- }
- TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
- BinaryOperatorKind QueriedOP) const {
- return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
- }
- TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
- return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
- }
- };
- //===----------------------------------------------------------------------===//
- // RangeSet implementation
- //===----------------------------------------------------------------------===//
- RangeSet::ContainerType RangeSet::Factory::EmptySet{};
- RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
- ContainerType Result;
- Result.reserve(LHS.size() + RHS.size());
- std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
- std::back_inserter(Result));
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
- ContainerType Result;
- Result.reserve(Original.size() + 1);
- const_iterator Lower = llvm::lower_bound(Original, Element);
- Result.insert(Result.end(), Original.begin(), Lower);
- Result.push_back(Element);
- Result.insert(Result.end(), Lower, Original.end());
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
- return add(Original, Range(Point));
- }
- RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
- ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
- ContainerType Result;
- Result.push_back(R);
- Result = unite(*Original.Impl, Result);
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
- return unite(Original, Range(ValueFactory.getValue(Point)));
- }
- RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
- llvm::APSInt To) {
- return unite(Original,
- Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
- }
- template <typename T>
- void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
- std::swap(First, Second);
- std::swap(FirstEnd, SecondEnd);
- }
- RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
- const ContainerType &RHS) {
- if (LHS.empty())
- return RHS;
- if (RHS.empty())
- return LHS;
- using llvm::APSInt;
- using iterator = ContainerType::const_iterator;
- iterator First = LHS.begin();
- iterator FirstEnd = LHS.end();
- iterator Second = RHS.begin();
- iterator SecondEnd = RHS.end();
- APSIntType Ty = APSIntType(First->From());
- const APSInt Min = Ty.getMinValue();
- // Handle a corner case first when both range sets start from MIN.
- // This helps to avoid complicated conditions below. Specifically, this
- // particular check for `MIN` is not needed in the loop below every time
- // when we do `Second->From() - One` operation.
- if (Min == First->From() && Min == Second->From()) {
- if (First->To() > Second->To()) {
- // [ First ]--->
- // [ Second ]----->
- // MIN^
- // The Second range is entirely inside the First one.
- // Check if Second is the last in its RangeSet.
- if (++Second == SecondEnd)
- // [ First ]--[ First + 1 ]--->
- // [ Second ]--------------------->
- // MIN^
- // The Union is equal to First's RangeSet.
- return LHS;
- } else {
- // case 1: [ First ]----->
- // case 2: [ First ]--->
- // [ Second ]--->
- // MIN^
- // The First range is entirely inside or equal to the Second one.
- // Check if First is the last in its RangeSet.
- if (++First == FirstEnd)
- // [ First ]----------------------->
- // [ Second ]--[ Second + 1 ]---->
- // MIN^
- // The Union is equal to Second's RangeSet.
- return RHS;
- }
- }
- const APSInt One = Ty.getValue(1);
- ContainerType Result;
- // This is called when there are no ranges left in one of the ranges.
- // Append the rest of the ranges from another range set to the Result
- // and return with that.
- const auto AppendTheRest = [&Result](iterator I, iterator E) {
- Result.append(I, E);
- return Result;
- };
- while (true) {
- // We want to keep the following invariant at all times:
- // ---[ First ------>
- // -----[ Second --->
- if (First->From() > Second->From())
- swapIterators(First, FirstEnd, Second, SecondEnd);
- // The Union definitely starts with First->From().
- // ----------[ First ------>
- // ------------[ Second --->
- // ----------[ Union ------>
- // UnionStart^
- const llvm::APSInt &UnionStart = First->From();
- // Loop where the invariant holds.
- while (true) {
- // Skip all enclosed ranges.
- // ---[ First ]--->
- // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
- while (First->To() >= Second->To()) {
- // Check if Second is the last in its RangeSet.
- if (++Second == SecondEnd) {
- // Append the Union.
- // ---[ Union ]--->
- // -----[ Second ]----->
- // --------[ First ]--->
- // UnionEnd^
- Result.emplace_back(UnionStart, First->To());
- // ---[ Union ]----------------->
- // --------------[ First + 1]--->
- // Append all remaining ranges from the First's RangeSet.
- return AppendTheRest(++First, FirstEnd);
- }
- }
- // Check if First and Second are disjoint. It means that we find
- // the end of the Union. Exit the loop and append the Union.
- // ---[ First ]=------------->
- // ------------=[ Second ]--->
- // ----MinusOne^
- if (First->To() < Second->From() - One)
- break;
- // First is entirely inside the Union. Go next.
- // ---[ Union ----------->
- // ---- [ First ]-------->
- // -------[ Second ]----->
- // Check if First is the last in its RangeSet.
- if (++First == FirstEnd) {
- // Append the Union.
- // ---[ Union ]--->
- // -----[ First ]------->
- // --------[ Second ]--->
- // UnionEnd^
- Result.emplace_back(UnionStart, Second->To());
- // ---[ Union ]------------------>
- // --------------[ Second + 1]--->
- // Append all remaining ranges from the Second's RangeSet.
- return AppendTheRest(++Second, SecondEnd);
- }
- // We know that we are at one of the two cases:
- // case 1: --[ First ]--------->
- // case 2: ----[ First ]------->
- // --------[ Second ]---------->
- // In both cases First starts after Second->From().
- // Make sure that the loop invariant holds.
- swapIterators(First, FirstEnd, Second, SecondEnd);
- }
- // Here First and Second are disjoint.
- // Append the Union.
- // ---[ Union ]--------------->
- // -----------------[ Second ]--->
- // ------[ First ]--------------->
- // UnionEnd^
- Result.emplace_back(UnionStart, First->To());
- // Check if First is the last in its RangeSet.
- if (++First == FirstEnd)
- // ---[ Union ]--------------->
- // --------------[ Second ]--->
- // Append all remaining ranges from the Second's RangeSet.
- return AppendTheRest(Second, SecondEnd);
- }
- llvm_unreachable("Normally, we should not reach here");
- }
- RangeSet RangeSet::Factory::getRangeSet(Range From) {
- ContainerType Result;
- Result.push_back(From);
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
- llvm::FoldingSetNodeID ID;
- void *InsertPos;
- From.Profile(ID);
- ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
- if (!Result) {
- // It is cheaper to fully construct the resulting range on stack
- // and move it to the freshly allocated buffer if we don't have
- // a set like this already.
- Result = construct(std::move(From));
- Cache.InsertNode(Result, InsertPos);
- }
- return Result;
- }
- RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
- void *Buffer = Arena.Allocate();
- return new (Buffer) ContainerType(std::move(From));
- }
- const llvm::APSInt &RangeSet::getMinValue() const {
- assert(!isEmpty());
- return begin()->From();
- }
- const llvm::APSInt &RangeSet::getMaxValue() const {
- assert(!isEmpty());
- return std::prev(end())->To();
- }
- bool clang::ento::RangeSet::isUnsigned() const {
- assert(!isEmpty());
- return begin()->From().isUnsigned();
- }
- uint32_t clang::ento::RangeSet::getBitWidth() const {
- assert(!isEmpty());
- return begin()->From().getBitWidth();
- }
- APSIntType clang::ento::RangeSet::getAPSIntType() const {
- assert(!isEmpty());
- return APSIntType(begin()->From());
- }
- bool RangeSet::containsImpl(llvm::APSInt &Point) const {
- if (isEmpty() || !pin(Point))
- return false;
- Range Dummy(Point);
- const_iterator It = llvm::upper_bound(*this, Dummy);
- if (It == begin())
- return false;
- return std::prev(It)->Includes(Point);
- }
- bool RangeSet::pin(llvm::APSInt &Point) const {
- APSIntType Type(getMinValue());
- if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
- return false;
- Type.apply(Point);
- return true;
- }
- bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
- // This function has nine cases, the cartesian product of range-testing
- // both the upper and lower bounds against the symbol's type.
- // Each case requires a different pinning operation.
- // The function returns false if the described range is entirely outside
- // the range of values for the associated symbol.
- APSIntType Type(getMinValue());
- APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
- APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
- switch (LowerTest) {
- case APSIntType::RTR_Below:
- switch (UpperTest) {
- case APSIntType::RTR_Below:
- // The entire range is outside the symbol's set of possible values.
- // If this is a conventionally-ordered range, the state is infeasible.
- if (Lower <= Upper)
- return false;
- // However, if the range wraps around, it spans all possible values.
- Lower = Type.getMinValue();
- Upper = Type.getMaxValue();
- break;
- case APSIntType::RTR_Within:
- // The range starts below what's possible but ends within it. Pin.
- Lower = Type.getMinValue();
- Type.apply(Upper);
- break;
- case APSIntType::RTR_Above:
- // The range spans all possible values for the symbol. Pin.
- Lower = Type.getMinValue();
- Upper = Type.getMaxValue();
- break;
- }
- break;
- case APSIntType::RTR_Within:
- switch (UpperTest) {
- case APSIntType::RTR_Below:
- // The range wraps around, but all lower values are not possible.
- Type.apply(Lower);
- Upper = Type.getMaxValue();
- break;
- case APSIntType::RTR_Within:
- // The range may or may not wrap around, but both limits are valid.
- Type.apply(Lower);
- Type.apply(Upper);
- break;
- case APSIntType::RTR_Above:
- // The range starts within what's possible but ends above it. Pin.
- Type.apply(Lower);
- Upper = Type.getMaxValue();
- break;
- }
- break;
- case APSIntType::RTR_Above:
- switch (UpperTest) {
- case APSIntType::RTR_Below:
- // The range wraps but is outside the symbol's set of possible values.
- return false;
- case APSIntType::RTR_Within:
- // The range starts above what's possible but ends within it (wrap).
- Lower = Type.getMinValue();
- Type.apply(Upper);
- break;
- case APSIntType::RTR_Above:
- // The entire range is outside the symbol's set of possible values.
- // If this is a conventionally-ordered range, the state is infeasible.
- if (Lower <= Upper)
- return false;
- // However, if the range wraps around, it spans all possible values.
- Lower = Type.getMinValue();
- Upper = Type.getMaxValue();
- break;
- }
- break;
- }
- return true;
- }
- RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
- llvm::APSInt Upper) {
- if (What.isEmpty() || !What.pin(Lower, Upper))
- return getEmptySet();
- ContainerType DummyContainer;
- if (Lower <= Upper) {
- // [Lower, Upper] is a regular range.
- //
- // Shortcut: check that there is even a possibility of the intersection
- // by checking the two following situations:
- //
- // <---[ What ]---[------]------>
- // Lower Upper
- // -or-
- // <----[------]----[ What ]---->
- // Lower Upper
- if (What.getMaxValue() < Lower || Upper < What.getMinValue())
- return getEmptySet();
- DummyContainer.push_back(
- Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
- } else {
- // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
- //
- // Shortcut: check that there is even a possibility of the intersection
- // by checking the following situation:
- //
- // <------]---[ What ]---[------>
- // Upper Lower
- if (What.getMaxValue() < Lower && Upper < What.getMinValue())
- return getEmptySet();
- DummyContainer.push_back(
- Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
- DummyContainer.push_back(
- Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
- }
- return intersect(*What.Impl, DummyContainer);
- }
- RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
- const RangeSet::ContainerType &RHS) {
- ContainerType Result;
- Result.reserve(std::max(LHS.size(), RHS.size()));
- const_iterator First = LHS.begin(), Second = RHS.begin(),
- FirstEnd = LHS.end(), SecondEnd = RHS.end();
- // If we ran out of ranges in one set, but not in the other,
- // it means that those elements are definitely not in the
- // intersection.
- while (First != FirstEnd && Second != SecondEnd) {
- // We want to keep the following invariant at all times:
- //
- // ----[ First ---------------------->
- // --------[ Second ----------------->
- if (Second->From() < First->From())
- swapIterators(First, FirstEnd, Second, SecondEnd);
- // Loop where the invariant holds:
- do {
- // Check for the following situation:
- //
- // ----[ First ]--------------------->
- // ---------------[ Second ]--------->
- //
- // which means that...
- if (Second->From() > First->To()) {
- // ...First is not in the intersection.
- //
- // We should move on to the next range after First and break out of the
- // loop because the invariant might not be true.
- ++First;
- break;
- }
- // We have a guaranteed intersection at this point!
- // And this is the current situation:
- //
- // ----[ First ]----------------->
- // -------[ Second ------------------>
- //
- // Additionally, it definitely starts with Second->From().
- const llvm::APSInt &IntersectionStart = Second->From();
- // It is important to know which of the two ranges' ends
- // is greater. That "longer" range might have some other
- // intersections, while the "shorter" range might not.
- if (Second->To() > First->To()) {
- // Here we make a decision to keep First as the "longer"
- // range.
- swapIterators(First, FirstEnd, Second, SecondEnd);
- }
- // At this point, we have the following situation:
- //
- // ---- First ]-------------------->
- // ---- Second ]--[ Second+1 ---------->
- //
- // We don't know the relationship between First->From and
- // Second->From and we don't know whether Second+1 intersects
- // with First.
- //
- // However, we know that [IntersectionStart, Second->To] is
- // a part of the intersection...
- Result.push_back(Range(IntersectionStart, Second->To()));
- ++Second;
- // ...and that the invariant will hold for a valid Second+1
- // because First->From <= Second->To < (Second+1)->From.
- } while (Second != SecondEnd);
- }
- if (Result.empty())
- return getEmptySet();
- return makePersistent(std::move(Result));
- }
- RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
- // Shortcut: let's see if the intersection is even possible.
- if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
- RHS.getMaxValue() < LHS.getMinValue())
- return getEmptySet();
- return intersect(*LHS.Impl, *RHS.Impl);
- }
- RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
- if (LHS.containsImpl(Point))
- return getRangeSet(ValueFactory.getValue(Point));
- return getEmptySet();
- }
- RangeSet RangeSet::Factory::negate(RangeSet What) {
- if (What.isEmpty())
- return getEmptySet();
- const llvm::APSInt SampleValue = What.getMinValue();
- const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
- const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
- ContainerType Result;
- Result.reserve(What.size() + (SampleValue == MIN));
- // Handle a special case for MIN value.
- const_iterator It = What.begin();
- const_iterator End = What.end();
- const llvm::APSInt &From = It->From();
- const llvm::APSInt &To = It->To();
- if (From == MIN) {
- // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
- if (To == MAX) {
- return What;
- }
- const_iterator Last = std::prev(End);
- // Try to find and unite the following ranges:
- // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
- if (Last->To() == MAX) {
- // It means that in the original range we have ranges
- // [MIN, A], ... , [B, MAX]
- // And the result should be [MIN, -B], ..., [-A, MAX]
- Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
- // We already negated Last, so we can skip it.
- End = Last;
- } else {
- // Add a separate range for the lowest value.
- Result.emplace_back(MIN, MIN);
- }
- // Skip adding the second range in case when [From, To] are [MIN, MIN].
- if (To != MIN) {
- Result.emplace_back(ValueFactory.getValue(-To), MAX);
- }
- // Skip the first range in the loop.
- ++It;
- }
- // Negate all other ranges.
- for (; It != End; ++It) {
- // Negate int values.
- const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
- const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
- // Add a negated range.
- Result.emplace_back(NewFrom, NewTo);
- }
- llvm::sort(Result);
- return makePersistent(std::move(Result));
- }
- // Convert range set to the given integral type using truncation and promotion.
- // This works similar to APSIntType::apply function but for the range set.
- RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
- // Set is empty or NOOP (aka cast to the same type).
- if (What.isEmpty() || What.getAPSIntType() == Ty)
- return What;
- const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
- const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
- const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
- if (IsTruncation)
- return makePersistent(truncateTo(What, Ty));
- // Here we handle 2 cases:
- // - IsConversion && !IsPromotion.
- // In this case we handle changing a sign with same bitwidth: char -> uchar,
- // uint -> int. Here we convert negatives to positives and positives which
- // is out of range to negatives. We use convertTo function for that.
- // - IsConversion && IsPromotion && !What.isUnsigned().
- // In this case we handle changing a sign from signeds to unsigneds with
- // higher bitwidth: char -> uint, int-> uint64. The point is that we also
- // need convert negatives to positives and use convertTo function as well.
- // For example, we don't need such a convertion when converting unsigned to
- // signed with higher bitwidth, because all the values of unsigned is valid
- // for the such signed.
- if (IsConversion && (!IsPromotion || !What.isUnsigned()))
- return makePersistent(convertTo(What, Ty));
- assert(IsPromotion && "Only promotion operation from unsigneds left.");
- return makePersistent(promoteTo(What, Ty));
- }
- RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
- assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
- return castTo(What, ValueFactory.getAPSIntType(T));
- }
- RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
- APSIntType Ty) {
- using llvm::APInt;
- using llvm::APSInt;
- ContainerType Result;
- ContainerType Dummy;
- // CastRangeSize is an amount of all possible values of cast type.
- // Example: `char` has 256 values; `short` has 65536 values.
- // But in fact we use `amount of values` - 1, because
- // we can't keep `amount of values of UINT64` inside uint64_t.
- // E.g. 256 is an amount of all possible values of `char` and we can't keep
- // it inside `char`.
- // And it's OK, it's enough to do correct calculations.
- uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
- for (const Range &R : What) {
- // Get bounds of the given range.
- APSInt FromInt = R.From();
- APSInt ToInt = R.To();
- // CurrentRangeSize is an amount of all possible values of the current
- // range minus one.
- uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
- // This is an optimization for a specific case when this Range covers
- // the whole range of the target type.
- Dummy.clear();
- if (CurrentRangeSize >= CastRangeSize) {
- Dummy.emplace_back(ValueFactory.getMinValue(Ty),
- ValueFactory.getMaxValue(Ty));
- Result = std::move(Dummy);
- break;
- }
- // Cast the bounds.
- Ty.apply(FromInt);
- Ty.apply(ToInt);
- const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
- const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
- if (FromInt > ToInt) {
- Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
- Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
- } else
- Dummy.emplace_back(PersistentFrom, PersistentTo);
- // Every range retrieved after truncation potentialy has garbage values.
- // So, we have to unite every next range with the previouses.
- Result = unite(Result, Dummy);
- }
- return Result;
- }
- // Divide the convertion into two phases (presented as loops here).
- // First phase(loop) works when casted values go in ascending order.
- // E.g. char{1,3,5,127} -> uint{1,3,5,127}
- // Interrupt the first phase and go to second one when casted values start
- // go in descending order. That means that we crossed over the middle of
- // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
- // For instance:
- // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
- // Here we put {1,3,5} to one array and {-128, -1} to another
- // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
- // Here we put {128,129,255} to one array and {0,1,3} to another.
- // After that we unite both arrays.
- // NOTE: We don't just concatenate the arrays, because they may have
- // adjacent ranges, e.g.:
- // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
- // unite -> uchar(0, 255)
- // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
- // unite -> uchar(-2, 1)
- RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
- APSIntType Ty) {
- using llvm::APInt;
- using llvm::APSInt;
- using Bounds = std::pair<const APSInt &, const APSInt &>;
- ContainerType AscendArray;
- ContainerType DescendArray;
- auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
- // Get bounds of the given range.
- APSInt FromInt = R.From();
- APSInt ToInt = R.To();
- // Cast the bounds.
- Ty.apply(FromInt);
- Ty.apply(ToInt);
- return {VF.getValue(FromInt), VF.getValue(ToInt)};
- };
- // Phase 1. Fill the first array.
- APSInt LastConvertedInt = Ty.getMinValue();
- const auto *It = What.begin();
- const auto *E = What.end();
- while (It != E) {
- Bounds NewBounds = CastRange(*(It++));
- // If values stop going acsending order, go to the second phase(loop).
- if (NewBounds.first < LastConvertedInt) {
- DescendArray.emplace_back(NewBounds.first, NewBounds.second);
- break;
- }
- // If the range contains a midpoint, then split the range.
- // E.g. char(-5, 5) -> uchar(251, 5)
- // Here we shall add a range (251, 255) to the first array and (0, 5) to the
- // second one.
- if (NewBounds.first > NewBounds.second) {
- DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
- AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
- } else
- // Values are going acsending order.
- AscendArray.emplace_back(NewBounds.first, NewBounds.second);
- LastConvertedInt = NewBounds.first;
- }
- // Phase 2. Fill the second array.
- while (It != E) {
- Bounds NewBounds = CastRange(*(It++));
- DescendArray.emplace_back(NewBounds.first, NewBounds.second);
- }
- // Unite both arrays.
- return unite(AscendArray, DescendArray);
- }
- /// Promotion from unsigneds to signeds/unsigneds left.
- RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
- APSIntType Ty) {
- ContainerType Result;
- // We definitely know the size of the result set.
- Result.reserve(What.size());
- // Each unsigned value fits every larger type without any changes,
- // whether the larger type is signed or unsigned. So just promote and push
- // back each range one by one.
- for (const Range &R : What) {
- // Get bounds of the given range.
- llvm::APSInt FromInt = R.From();
- llvm::APSInt ToInt = R.To();
- // Cast the bounds.
- Ty.apply(FromInt);
- Ty.apply(ToInt);
- Result.emplace_back(ValueFactory.getValue(FromInt),
- ValueFactory.getValue(ToInt));
- }
- return Result;
- }
- RangeSet RangeSet::Factory::deletePoint(RangeSet From,
- const llvm::APSInt &Point) {
- if (!From.contains(Point))
- return From;
- llvm::APSInt Upper = Point;
- llvm::APSInt Lower = Point;
- ++Upper;
- --Lower;
- // Notice that the lower bound is greater than the upper bound.
- return intersect(From, Upper, Lower);
- }
- LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
- OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
- }
- LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
- LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
- OS << "{ ";
- llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
- OS << " }";
- }
- LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
- REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
- namespace {
- class EquivalenceClass;
- } // end anonymous namespace
- REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
- REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
- REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
- REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
- REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
- namespace {
- /// This class encapsulates a set of symbols equal to each other.
- ///
- /// The main idea of the approach requiring such classes is in narrowing
- /// and sharing constraints between symbols within the class. Also we can
- /// conclude that there is no practical need in storing constraints for
- /// every member of the class separately.
- ///
- /// Main terminology:
- ///
- /// * "Equivalence class" is an object of this class, which can be efficiently
- /// compared to other classes. It represents the whole class without
- /// storing the actual in it. The members of the class however can be
- /// retrieved from the state.
- ///
- /// * "Class members" are the symbols corresponding to the class. This means
- /// that A == B for every member symbols A and B from the class. Members of
- /// each class are stored in the state.
- ///
- /// * "Trivial class" is a class that has and ever had only one same symbol.
- ///
- /// * "Merge operation" merges two classes into one. It is the main operation
- /// to produce non-trivial classes.
- /// If, at some point, we can assume that two symbols from two distinct
- /// classes are equal, we can merge these classes.
- class EquivalenceClass : public llvm::FoldingSetNode {
- public:
- /// Find equivalence class for the given symbol in the given state.
- [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
- SymbolRef Sym);
- /// Merge classes for the given symbols and return a new state.
- [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
- ProgramStateRef State,
- SymbolRef First,
- SymbolRef Second);
- // Merge this class with the given class and return a new state.
- [[nodiscard]] inline ProgramStateRef
- merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
- /// Return a set of class members for the given state.
- [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
- /// Return true if the current class is trivial in the given state.
- /// A class is trivial if and only if there is not any member relations stored
- /// to it in State/ClassMembers.
- /// An equivalence class with one member might seem as it does not hold any
- /// meaningful information, i.e. that is a tautology. However, during the
- /// removal of dead symbols we do not remove classes with one member for
- /// resource and performance reasons. Consequently, a class with one member is
- /// not necessarily trivial. It could happen that we have a class with two
- /// members and then during the removal of dead symbols we remove one of its
- /// members. In this case, the class is still non-trivial (it still has the
- /// mappings in ClassMembers), even though it has only one member.
- [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
- /// Return true if the current class is trivial and its only member is dead.
- [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
- SymbolReaper &Reaper) const;
- [[nodiscard]] static inline ProgramStateRef
- markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
- SymbolRef Second);
- [[nodiscard]] static inline ProgramStateRef
- markDisequal(RangeSet::Factory &F, ProgramStateRef State,
- EquivalenceClass First, EquivalenceClass Second);
- [[nodiscard]] inline ProgramStateRef
- markDisequal(RangeSet::Factory &F, ProgramStateRef State,
- EquivalenceClass Other) const;
- [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
- SymbolRef Sym);
- [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
- [[nodiscard]] inline ClassSet
- getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
- [[nodiscard]] static inline std::optional<bool>
- areEqual(ProgramStateRef State, EquivalenceClass First,
- EquivalenceClass Second);
- [[nodiscard]] static inline std::optional<bool>
- areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
- /// Remove one member from the class.
- [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
- const SymbolRef Old);
- /// Iterate over all symbols and try to simplify them.
- [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
- RangeSet::Factory &F,
- ProgramStateRef State,
- EquivalenceClass Class);
- void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
- LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
- dumpToStream(State, llvm::errs());
- }
- /// Check equivalence data for consistency.
- [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
- isClassDataConsistent(ProgramStateRef State);
- [[nodiscard]] QualType getType() const {
- return getRepresentativeSymbol()->getType();
- }
- EquivalenceClass() = delete;
- EquivalenceClass(const EquivalenceClass &) = default;
- EquivalenceClass &operator=(const EquivalenceClass &) = delete;
- EquivalenceClass(EquivalenceClass &&) = default;
- EquivalenceClass &operator=(EquivalenceClass &&) = delete;
- bool operator==(const EquivalenceClass &Other) const {
- return ID == Other.ID;
- }
- bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
- bool operator!=(const EquivalenceClass &Other) const {
- return !operator==(Other);
- }
- static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
- ID.AddInteger(CID);
- }
- void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
- private:
- /* implicit */ EquivalenceClass(SymbolRef Sym)
- : ID(reinterpret_cast<uintptr_t>(Sym)) {}
- /// This function is intended to be used ONLY within the class.
- /// The fact that ID is a pointer to a symbol is an implementation detail
- /// and should stay that way.
- /// In the current implementation, we use it to retrieve the only member
- /// of the trivial class.
- SymbolRef getRepresentativeSymbol() const {
- return reinterpret_cast<SymbolRef>(ID);
- }
- static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
- inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
- SymbolSet Members, EquivalenceClass Other,
- SymbolSet OtherMembers);
- static inline bool
- addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
- RangeSet::Factory &F, ProgramStateRef State,
- EquivalenceClass First, EquivalenceClass Second);
- /// This is a unique identifier of the class.
- uintptr_t ID;
- };
- //===----------------------------------------------------------------------===//
- // Constraint functions
- //===----------------------------------------------------------------------===//
- [[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
- areFeasible(ConstraintRangeTy Constraints) {
- return llvm::none_of(
- Constraints,
- [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
- return ClassConstraint.second.isEmpty();
- });
- }
- [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
- EquivalenceClass Class) {
- return State->get<ConstraintRange>(Class);
- }
- [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
- SymbolRef Sym) {
- return getConstraint(State, EquivalenceClass::find(State, Sym));
- }
- [[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
- EquivalenceClass Class,
- RangeSet Constraint) {
- return State->set<ConstraintRange>(Class, Constraint);
- }
- [[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
- ConstraintRangeTy Constraints) {
- return State->set<ConstraintRange>(Constraints);
- }
- //===----------------------------------------------------------------------===//
- // Equality/diseqiality abstraction
- //===----------------------------------------------------------------------===//
- /// A small helper function for detecting symbolic (dis)equality.
- ///
- /// Equality check can have different forms (like a == b or a - b) and this
- /// class encapsulates those away if the only thing the user wants to check -
- /// whether it's equality/diseqiality or not.
- ///
- /// \returns true if assuming this Sym to be true means equality of operands
- /// false if it means disequality of operands
- /// None otherwise
- std::optional<bool> meansEquality(const SymSymExpr *Sym) {
- switch (Sym->getOpcode()) {
- case BO_Sub:
- // This case is: A - B != 0 -> disequality check.
- return false;
- case BO_EQ:
- // This case is: A == B != 0 -> equality check.
- return true;
- case BO_NE:
- // This case is: A != B != 0 -> diseqiality check.
- return false;
- default:
- return std::nullopt;
- }
- }
- //===----------------------------------------------------------------------===//
- // Intersection functions
- //===----------------------------------------------------------------------===//
- template <class SecondTy, class... RestTy>
- [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
- SecondTy Second, RestTy... Tail);
- template <class... RangeTy> struct IntersectionTraits;
- template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
- // Found RangeSet, no need to check any further
- using Type = RangeSet;
- };
- template <> struct IntersectionTraits<> {
- // We ran out of types, and we didn't find any RangeSet, so the result should
- // be optional.
- using Type = std::optional<RangeSet>;
- };
- template <class OptionalOrPointer, class... TailTy>
- struct IntersectionTraits<OptionalOrPointer, TailTy...> {
- // If current type is Optional or a raw pointer, we should keep looking.
- using Type = typename IntersectionTraits<TailTy...>::Type;
- };
- template <class EndTy>
- [[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
- // If the list contains only RangeSet or std::optional<RangeSet>, simply
- // return that range set.
- return End;
- }
- [[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
- intersect(RangeSet::Factory &F, const RangeSet *End) {
- // This is an extraneous conversion from a raw pointer into
- // std::optional<RangeSet>
- if (End) {
- return *End;
- }
- return std::nullopt;
- }
- template <class... RestTy>
- [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
- RangeSet Second, RestTy... Tail) {
- // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
- // of the function and can be sure that the result is RangeSet.
- return intersect(F, F.intersect(Head, Second), Tail...);
- }
- template <class SecondTy, class... RestTy>
- [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
- SecondTy Second, RestTy... Tail) {
- if (Second) {
- // Here we call the <RangeSet,RangeSet,...> version of the function...
- return intersect(F, Head, *Second, Tail...);
- }
- // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
- // means that the result is definitely RangeSet.
- return intersect(F, Head, Tail...);
- }
- /// Main generic intersect function.
- /// It intersects all of the given range sets. If some of the given arguments
- /// don't hold a range set (nullptr or std::nullopt), the function will skip
- /// them.
- ///
- /// Available representations for the arguments are:
- /// * RangeSet
- /// * std::optional<RangeSet>
- /// * RangeSet *
- /// Pointer to a RangeSet is automatically assumed to be nullable and will get
- /// checked as well as the optional version. If this behaviour is undesired,
- /// please dereference the pointer in the call.
- ///
- /// Return type depends on the arguments' types. If we can be sure in compile
- /// time that there will be a range set as a result, the returning type is
- /// simply RangeSet, in other cases we have to back off to
- /// std::optional<RangeSet>.
- ///
- /// Please, prefer optional range sets to raw pointers. If the last argument is
- /// a raw pointer and all previous arguments are std::nullopt, it will cost one
- /// additional check to convert RangeSet * into std::optional<RangeSet>.
- template <class HeadTy, class SecondTy, class... RestTy>
- [[nodiscard]] inline
- typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
- intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
- RestTy... Tail) {
- if (Head) {
- return intersect(F, *Head, Second, Tail...);
- }
- return intersect(F, Second, Tail...);
- }
- //===----------------------------------------------------------------------===//
- // Symbolic reasoning logic
- //===----------------------------------------------------------------------===//
- /// A little component aggregating all of the reasoning we have about
- /// the ranges of symbolic expressions.
- ///
- /// Even when we don't know the exact values of the operands, we still
- /// can get a pretty good estimate of the result's range.
- class SymbolicRangeInferrer
- : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
- public:
- template <class SourceType>
- static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
- SourceType Origin) {
- SymbolicRangeInferrer Inferrer(F, State);
- return Inferrer.infer(Origin);
- }
- RangeSet VisitSymExpr(SymbolRef Sym) {
- if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
- return *RS;
- // If we've reached this line, the actual type of the symbolic
- // expression is not supported for advanced inference.
- // In this case, we simply backoff to the default "let's simply
- // infer the range from the expression's type".
- return infer(Sym->getType());
- }
- RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
- if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
- return *RS;
- return infer(USE->getType());
- }
- RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
- return VisitBinaryOperator(Sym);
- }
- RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
- return VisitBinaryOperator(Sym);
- }
- RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
- return intersect(
- RangeFactory,
- // If Sym is a difference of symbols A - B, then maybe we have range
- // set stored for B - A.
- //
- // If we have range set stored for both A - B and B - A then
- // calculate the effective range set by intersecting the range set
- // for A - B and the negated range set of B - A.
- getRangeForNegatedSymSym(SSE),
- // If Sym is a comparison expression (except <=>),
- // find any other comparisons with the same operands.
- // See function description.
- getRangeForComparisonSymbol(SSE),
- // If Sym is (dis)equality, we might have some information
- // on that in our equality classes data structure.
- getRangeForEqualities(SSE),
- // And we should always check what we can get from the operands.
- VisitBinaryOperator(SSE));
- }
- private:
- SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
- : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
- /// Infer range information from the given integer constant.
- ///
- /// It's not a real "inference", but is here for operating with
- /// sub-expressions in a more polymorphic manner.
- RangeSet inferAs(const llvm::APSInt &Val, QualType) {
- return {RangeFactory, Val};
- }
- /// Infer range information from symbol in the context of the given type.
- RangeSet inferAs(SymbolRef Sym, QualType DestType) {
- QualType ActualType = Sym->getType();
- // Check that we can reason about the symbol at all.
- if (ActualType->isIntegralOrEnumerationType() ||
- Loc::isLocType(ActualType)) {
- return infer(Sym);
- }
- // Otherwise, let's simply infer from the destination type.
- // We couldn't figure out nothing else about that expression.
- return infer(DestType);
- }
- RangeSet infer(SymbolRef Sym) {
- return intersect(RangeFactory,
- // Of course, we should take the constraint directly
- // associated with this symbol into consideration.
- getConstraint(State, Sym),
- // Apart from the Sym itself, we can infer quite a lot if
- // we look into subexpressions of Sym.
- Visit(Sym));
- }
- RangeSet infer(EquivalenceClass Class) {
- if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
- return *AssociatedConstraint;
- return infer(Class.getType());
- }
- /// Infer range information solely from the type.
- RangeSet infer(QualType T) {
- // Lazily generate a new RangeSet representing all possible values for the
- // given symbol type.
- RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
- ValueFactory.getMaxValue(T));
- // References are known to be non-zero.
- if (T->isReferenceType())
- return assumeNonZero(Result, T);
- return Result;
- }
- template <class BinarySymExprTy>
- RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
- // TODO #1: VisitBinaryOperator implementation might not make a good
- // use of the inferred ranges. In this case, we might be calculating
- // everything for nothing. This being said, we should introduce some
- // sort of laziness mechanism here.
- //
- // TODO #2: We didn't go into the nested expressions before, so it
- // might cause us spending much more time doing the inference.
- // This can be a problem for deeply nested expressions that are
- // involved in conditions and get tested continuously. We definitely
- // need to address this issue and introduce some sort of caching
- // in here.
- QualType ResultType = Sym->getType();
- return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
- Sym->getOpcode(),
- inferAs(Sym->getRHS(), ResultType), ResultType);
- }
- RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
- RangeSet RHS, QualType T);
- //===----------------------------------------------------------------------===//
- // Ranges and operators
- //===----------------------------------------------------------------------===//
- /// Return a rough approximation of the given range set.
- ///
- /// For the range set:
- /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
- /// it will return the range [x_0, y_N].
- static Range fillGaps(RangeSet Origin) {
- assert(!Origin.isEmpty());
- return {Origin.getMinValue(), Origin.getMaxValue()};
- }
- /// Try to convert given range into the given type.
- ///
- /// It will return std::nullopt only when the trivial conversion is possible.
- std::optional<Range> convert(const Range &Origin, APSIntType To) {
- if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
- To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
- return std::nullopt;
- }
- return Range(ValueFactory.Convert(To, Origin.From()),
- ValueFactory.Convert(To, Origin.To()));
- }
- template <BinaryOperator::Opcode Op>
- RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
- assert(!LHS.isEmpty() && !RHS.isEmpty());
- Range CoarseLHS = fillGaps(LHS);
- Range CoarseRHS = fillGaps(RHS);
- APSIntType ResultType = ValueFactory.getAPSIntType(T);
- // We need to convert ranges to the resulting type, so we can compare values
- // and combine them in a meaningful (in terms of the given operation) way.
- auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
- auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
- // It is hard to reason about ranges when conversion changes
- // borders of the ranges.
- if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
- return infer(T);
- }
- return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
- }
- template <BinaryOperator::Opcode Op>
- RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
- return infer(T);
- }
- /// Return a symmetrical range for the given range and type.
- ///
- /// If T is signed, return the smallest range [-x..x] that covers the original
- /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
- /// exist due to original range covering min(T)).
- ///
- /// If T is unsigned, return the smallest range [0..x] that covers the
- /// original range.
- Range getSymmetricalRange(Range Origin, QualType T) {
- APSIntType RangeType = ValueFactory.getAPSIntType(T);
- if (RangeType.isUnsigned()) {
- return Range(ValueFactory.getMinValue(RangeType), Origin.To());
- }
- if (Origin.From().isMinSignedValue()) {
- // If mini is a minimal signed value, absolute value of it is greater
- // than the maximal signed value. In order to avoid these
- // complications, we simply return the whole range.
- return {ValueFactory.getMinValue(RangeType),
- ValueFactory.getMaxValue(RangeType)};
- }
- // At this point, we are sure that the type is signed and we can safely
- // use unary - operator.
- //
- // While calculating absolute maximum, we can use the following formula
- // because of these reasons:
- // * If From >= 0 then To >= From and To >= -From.
- // AbsMax == To == max(To, -From)
- // * If To <= 0 then -From >= -To and -From >= From.
- // AbsMax == -From == max(-From, To)
- // * Otherwise, From <= 0, To >= 0, and
- // AbsMax == max(abs(From), abs(To))
- llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
- // Intersection is guaranteed to be non-empty.
- return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
- }
- /// Return a range set subtracting zero from \p Domain.
- RangeSet assumeNonZero(RangeSet Domain, QualType T) {
- APSIntType IntType = ValueFactory.getAPSIntType(T);
- return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
- }
- template <typename ProduceNegatedSymFunc>
- std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
- QualType T) {
- // Do not negate if the type cannot be meaningfully negated.
- if (!T->isUnsignedIntegerOrEnumerationType() &&
- !T->isSignedIntegerOrEnumerationType())
- return std::nullopt;
- if (SymbolRef NegatedSym = F())
- if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
- return RangeFactory.negate(*NegatedRange);
- return std::nullopt;
- }
- std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
- // Just get the operand when we negate a symbol that is already negated.
- // -(-a) == a
- return getRangeForNegatedExpr(
- [USE]() -> SymbolRef {
- if (USE->getOpcode() == UO_Minus)
- return USE->getOperand();
- return nullptr;
- },
- USE->getType());
- }
- std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
- return getRangeForNegatedExpr(
- [SSE, State = this->State]() -> SymbolRef {
- if (SSE->getOpcode() == BO_Sub)
- return State->getSymbolManager().getSymSymExpr(
- SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
- return nullptr;
- },
- SSE->getType());
- }
- std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
- return getRangeForNegatedExpr(
- [Sym, State = this->State]() {
- return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
- Sym->getType());
- },
- Sym->getType());
- }
- // Returns ranges only for binary comparison operators (except <=>)
- // when left and right operands are symbolic values.
- // Finds any other comparisons with the same operands.
- // Then do logical calculations and refuse impossible branches.
- // E.g. (x < y) and (x > y) at the same time are impossible.
- // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
- // E.g. (x == y) and (y == x) are just reversed but the same.
- // It covers all possible combinations (see CmpOpTable description).
- // Note that `x` and `y` can also stand for subexpressions,
- // not only for actual symbols.
- std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
- const BinaryOperatorKind CurrentOP = SSE->getOpcode();
- // We currently do not support <=> (C++20).
- if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
- return std::nullopt;
- static const OperatorRelationsTable CmpOpTable{};
- const SymExpr *LHS = SSE->getLHS();
- const SymExpr *RHS = SSE->getRHS();
- QualType T = SSE->getType();
- SymbolManager &SymMgr = State->getSymbolManager();
- // We use this variable to store the last queried operator (`QueriedOP`)
- // for which the `getCmpOpState` returned with `Unknown`. If there are two
- // different OPs that returned `Unknown` then we have to query the special
- // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
- // never returns `Unknown`, so `CurrentOP` is a good initial value.
- BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
- // Loop goes through all of the columns exept the last one ('UnknownX2').
- // We treat `UnknownX2` column separately at the end of the loop body.
- for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
- // Let's find an expression e.g. (x < y).
- BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
- const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
- const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
- // If ranges were not previously found,
- // try to find a reversed expression (y > x).
- if (!QueriedRangeSet) {
- const BinaryOperatorKind ROP =
- BinaryOperator::reverseComparisonOp(QueriedOP);
- SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
- QueriedRangeSet = getConstraint(State, SymSym);
- }
- if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
- continue;
- const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
- const bool isInFalseBranch =
- ConcreteValue ? (*ConcreteValue == 0) : false;
- // If it is a false branch, we shall be guided by opposite operator,
- // because the table is made assuming we are in the true branch.
- // E.g. when (x <= y) is false, then (x > y) is true.
- if (isInFalseBranch)
- QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
- OperatorRelationsTable::TriStateKind BranchState =
- CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
- if (BranchState == OperatorRelationsTable::Unknown) {
- if (LastQueriedOpToUnknown != CurrentOP &&
- LastQueriedOpToUnknown != QueriedOP) {
- // If we got the Unknown state for both different operators.
- // if (x <= y) // assume true
- // if (x != y) // assume true
- // if (x < y) // would be also true
- // Get a state from `UnknownX2` column.
- BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
- } else {
- LastQueriedOpToUnknown = QueriedOP;
- continue;
- }
- }
- return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
- : getFalseRange(T);
- }
- return std::nullopt;
- }
- std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
- std::optional<bool> Equality = meansEquality(Sym);
- if (!Equality)
- return std::nullopt;
- if (std::optional<bool> AreEqual =
- EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
- // Here we cover two cases at once:
- // * if Sym is equality and its operands are known to be equal -> true
- // * if Sym is disequality and its operands are disequal -> true
- if (*AreEqual == *Equality) {
- return getTrueRange(Sym->getType());
- }
- // Opposite combinations result in false.
- return getFalseRange(Sym->getType());
- }
- return std::nullopt;
- }
- RangeSet getTrueRange(QualType T) {
- RangeSet TypeRange = infer(T);
- return assumeNonZero(TypeRange, T);
- }
- RangeSet getFalseRange(QualType T) {
- const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
- return RangeSet(RangeFactory, Zero);
- }
- BasicValueFactory &ValueFactory;
- RangeSet::Factory &RangeFactory;
- ProgramStateRef State;
- };
- //===----------------------------------------------------------------------===//
- // Range-based reasoning about symbolic operations
- //===----------------------------------------------------------------------===//
- template <>
- RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
- RangeSet RHS,
- QualType T) {
- assert(!LHS.isEmpty() && !RHS.isEmpty());
- if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
- if (intersect(RangeFactory, LHS, RHS).isEmpty())
- return getTrueRange(T);
- } else {
- // We can only lose information if we are casting smaller signed type to
- // bigger unsigned type. For e.g.,
- // LHS (unsigned short): [2, USHRT_MAX]
- // RHS (signed short): [SHRT_MIN, 0]
- //
- // Casting RHS to LHS type will leave us with overlapping values
- // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
- //
- // We can avoid this by checking if signed type's maximum value is lesser
- // than unsigned type's minimum value.
- // If both have different signs then only we can get more information.
- if (LHS.isUnsigned() != RHS.isUnsigned()) {
- if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
- if (RHS.getMaxValue().isNegative() ||
- LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
- return getTrueRange(T);
- } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
- if (LHS.getMaxValue().isNegative() ||
- RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
- return getTrueRange(T);
- }
- }
- // Both RangeSets should be casted to bigger unsigned type.
- APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
- LHS.isUnsigned() || RHS.isUnsigned());
- RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
- RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
- if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
- return getTrueRange(T);
- }
- // In all other cases, the resulting range cannot be deduced.
- return infer(T);
- }
- template <>
- RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
- QualType T) {
- APSIntType ResultType = ValueFactory.getAPSIntType(T);
- llvm::APSInt Zero = ResultType.getZeroValue();
- bool IsLHSPositiveOrZero = LHS.From() >= Zero;
- bool IsRHSPositiveOrZero = RHS.From() >= Zero;
- bool IsLHSNegative = LHS.To() < Zero;
- bool IsRHSNegative = RHS.To() < Zero;
- // Check if both ranges have the same sign.
- if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
- (IsLHSNegative && IsRHSNegative)) {
- // The result is definitely greater or equal than any of the operands.
- const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
- // We estimate maximal value for positives as the maximal value for the
- // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
- //
- // TODO: We basically, limit the resulting range from below, but don't do
- // anything with the upper bound.
- //
- // For positive operands, it can be done as follows: for the upper
- // bound of LHS and RHS we calculate the most significant bit set.
- // Let's call it the N-th bit. Then we can estimate the maximal
- // number to be 2^(N+1)-1, i.e. the number with all the bits up to
- // the N-th bit set.
- const llvm::APSInt &Max = IsLHSNegative
- ? ValueFactory.getValue(--Zero)
- : ValueFactory.getMaxValue(ResultType);
- return {RangeFactory, ValueFactory.getValue(Min), Max};
- }
- // Otherwise, let's check if at least one of the operands is negative.
- if (IsLHSNegative || IsRHSNegative) {
- // This means that the result is definitely negative as well.
- return {RangeFactory, ValueFactory.getMinValue(ResultType),
- ValueFactory.getValue(--Zero)};
- }
- RangeSet DefaultRange = infer(T);
- // It is pretty hard to reason about operands with different signs
- // (and especially with possibly different signs). We simply check if it
- // can be zero. In order to conclude that the result could not be zero,
- // at least one of the operands should be definitely not zero itself.
- if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
- return assumeNonZero(DefaultRange, T);
- }
- // Nothing much else to do here.
- return DefaultRange;
- }
- template <>
- RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
- Range RHS,
- QualType T) {
- APSIntType ResultType = ValueFactory.getAPSIntType(T);
- llvm::APSInt Zero = ResultType.getZeroValue();
- bool IsLHSPositiveOrZero = LHS.From() >= Zero;
- bool IsRHSPositiveOrZero = RHS.From() >= Zero;
- bool IsLHSNegative = LHS.To() < Zero;
- bool IsRHSNegative = RHS.To() < Zero;
- // Check if both ranges have the same sign.
- if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
- (IsLHSNegative && IsRHSNegative)) {
- // The result is definitely less or equal than any of the operands.
- const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
- // We conservatively estimate lower bound to be the smallest positive
- // or negative value corresponding to the sign of the operands.
- const llvm::APSInt &Min = IsLHSNegative
- ? ValueFactory.getMinValue(ResultType)
- : ValueFactory.getValue(Zero);
- return {RangeFactory, Min, Max};
- }
- // Otherwise, let's check if at least one of the operands is positive.
- if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
- // This makes result definitely positive.
- //
- // We can also reason about a maximal value by finding the maximal
- // value of the positive operand.
- const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
- // The minimal value on the other hand is much harder to reason about.
- // The only thing we know for sure is that the result is positive.
- return {RangeFactory, ValueFactory.getValue(Zero),
- ValueFactory.getValue(Max)};
- }
- // Nothing much else to do here.
- return infer(T);
- }
- template <>
- RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
- Range RHS,
- QualType T) {
- llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
- Range ConservativeRange = getSymmetricalRange(RHS, T);
- llvm::APSInt Max = ConservativeRange.To();
- llvm::APSInt Min = ConservativeRange.From();
- if (Max == Zero) {
- // It's an undefined behaviour to divide by 0 and it seems like we know
- // for sure that RHS is 0. Let's say that the resulting range is
- // simply infeasible for that matter.
- return RangeFactory.getEmptySet();
- }
- // At this point, our conservative range is closed. The result, however,
- // couldn't be greater than the RHS' maximal absolute value. Because of
- // this reason, we turn the range into open (or half-open in case of
- // unsigned integers).
- //
- // While we operate on integer values, an open interval (a, b) can be easily
- // represented by the closed interval [a + 1, b - 1]. And this is exactly
- // what we do next.
- //
- // If we are dealing with unsigned case, we shouldn't move the lower bound.
- if (Min.isSigned()) {
- ++Min;
- }
- --Max;
- bool IsLHSPositiveOrZero = LHS.From() >= Zero;
- bool IsRHSPositiveOrZero = RHS.From() >= Zero;
- // Remainder operator results with negative operands is implementation
- // defined. Positive cases are much easier to reason about though.
- if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
- // If maximal value of LHS is less than maximal value of RHS,
- // the result won't get greater than LHS.To().
- Max = std::min(LHS.To(), Max);
- // We want to check if it is a situation similar to the following:
- //
- // <------------|---[ LHS ]--------[ RHS ]----->
- // -INF 0 +INF
- //
- // In this situation, we can conclude that (LHS / RHS) == 0 and
- // (LHS % RHS) == LHS.
- Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
- }
- // Nevertheless, the symmetrical range for RHS is a conservative estimate
- // for any sign of either LHS, or RHS.
- return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
- }
- RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
- BinaryOperator::Opcode Op,
- RangeSet RHS, QualType T) {
- // We should propagate information about unfeasbility of one of the
- // operands to the resulting range.
- if (LHS.isEmpty() || RHS.isEmpty()) {
- return RangeFactory.getEmptySet();
- }
- switch (Op) {
- case BO_NE:
- return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
- case BO_Or:
- return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
- case BO_And:
- return VisitBinaryOperator<BO_And>(LHS, RHS, T);
- case BO_Rem:
- return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
- default:
- return infer(T);
- }
- }
- //===----------------------------------------------------------------------===//
- // Constraint manager implementation details
- //===----------------------------------------------------------------------===//
- class RangeConstraintManager : public RangedConstraintManager {
- public:
- RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
- : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
- //===------------------------------------------------------------------===//
- // Implementation for interface from ConstraintManager.
- //===------------------------------------------------------------------===//
- bool haveEqualConstraints(ProgramStateRef S1,
- ProgramStateRef S2) const override {
- // NOTE: ClassMembers are as simple as back pointers for ClassMap,
- // so comparing constraint ranges and class maps should be
- // sufficient.
- return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
- S1->get<ClassMap>() == S2->get<ClassMap>();
- }
- bool canReasonAbout(SVal X) const override;
- ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
- const llvm::APSInt *getSymVal(ProgramStateRef State,
- SymbolRef Sym) const override;
- ProgramStateRef removeDeadBindings(ProgramStateRef State,
- SymbolReaper &SymReaper) override;
- void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
- unsigned int Space = 0, bool IsDot = false) const override;
- void printValue(raw_ostream &Out, ProgramStateRef State,
- SymbolRef Sym) override;
- void printConstraints(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
- void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
- void printDisequalities(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
- //===------------------------------------------------------------------===//
- // Implementation for interface from RangedConstraintManager.
- //===------------------------------------------------------------------===//
- ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymWithinInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
- ProgramStateRef assumeSymOutsideInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
- private:
- RangeSet::Factory F;
- RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
- RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
- ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
- RangeSet Range);
- ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
- RangeSet Range);
- RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- };
- //===----------------------------------------------------------------------===//
- // Constraint assignment logic
- //===----------------------------------------------------------------------===//
- /// ConstraintAssignorBase is a small utility class that unifies visitor
- /// for ranges with a visitor for constraints (rangeset/range/constant).
- ///
- /// It is designed to have one derived class, but generally it can have more.
- /// Derived class can control which types we handle by defining methods of the
- /// following form:
- ///
- /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
- /// CONSTRAINT Constraint);
- ///
- /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
- /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
- /// return value signifies whether we should try other handle methods
- /// (i.e. false would mean to stop right after calling this method)
- template <class Derived> class ConstraintAssignorBase {
- public:
- using Const = const llvm::APSInt &;
- #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
- #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
- if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
- return false
- void assign(SymbolRef Sym, RangeSet Constraint) {
- assignImpl(Sym, Constraint);
- }
- bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
- switch (Sym->getKind()) {
- #define SYMBOL(Id, Parent) \
- case SymExpr::Id##Kind: \
- DISPATCH(Id);
- #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
- }
- llvm_unreachable("Unknown SymExpr kind!");
- }
- #define DEFAULT_ASSIGN(Id) \
- bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
- return true; \
- } \
- bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
- bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
- // When we dispatch for constraint types, we first try to check
- // if the new constraint is the constant and try the corresponding
- // assignor methods. If it didn't interrupt, we can proceed to the
- // range, and finally to the range set.
- #define CONSTRAINT_DISPATCH(Id) \
- if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
- ASSIGN(Id, Const, Sym, *Const); \
- } \
- if (Constraint.size() == 1) { \
- ASSIGN(Id, Range, Sym, *Constraint.begin()); \
- } \
- ASSIGN(Id, RangeSet, Sym, Constraint)
- // Our internal assign method first tries to call assignor methods for all
- // constraint types that apply. And if not interrupted, continues with its
- // parent class.
- #define SYMBOL(Id, Parent) \
- bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
- CONSTRAINT_DISPATCH(Id); \
- DISPATCH(Parent); \
- } \
- DEFAULT_ASSIGN(Id)
- #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
- #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
- // Default implementations for the top class that doesn't have parents.
- bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
- CONSTRAINT_DISPATCH(SymExpr);
- return true;
- }
- DEFAULT_ASSIGN(SymExpr);
- #undef DISPATCH
- #undef CONSTRAINT_DISPATCH
- #undef DEFAULT_ASSIGN
- #undef ASSIGN
- };
- /// A little component aggregating all of the reasoning we have about
- /// assigning new constraints to symbols.
- ///
- /// The main purpose of this class is to associate constraints to symbols,
- /// and impose additional constraints on other symbols, when we can imply
- /// them.
- ///
- /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
- /// can provide more precise ranges by looking into the operands of the
- /// expression in question, ConstraintAssignor looks into the operands
- /// to see if we can imply more from the new constraint.
- class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
- public:
- template <class ClassOrSymbol>
- [[nodiscard]] static ProgramStateRef
- assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
- ClassOrSymbol CoS, RangeSet NewConstraint) {
- if (!State || NewConstraint.isEmpty())
- return nullptr;
- ConstraintAssignor Assignor{State, Builder, F};
- return Assignor.assign(CoS, NewConstraint);
- }
- /// Handle expressions like: a % b != 0.
- template <typename SymT>
- bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
- if (Sym->getOpcode() != BO_Rem)
- return true;
- // a % b != 0 implies that a != 0.
- if (!Constraint.containsZero()) {
- SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
- if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
- State = State->assume(*NonLocSymSVal, true);
- if (!State)
- return false;
- }
- }
- return true;
- }
- inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
- inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
- RangeSet Constraint) {
- return handleRemainderOp(Sym, Constraint);
- }
- inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
- RangeSet Constraint);
- private:
- ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
- RangeSet::Factory &F)
- : State(State), Builder(Builder), RangeFactory(F) {}
- using Base = ConstraintAssignorBase<ConstraintAssignor>;
- /// Base method for handling new constraints for symbols.
- [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
- // All constraints are actually associated with equivalence classes, and
- // that's what we are going to do first.
- State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
- if (!State)
- return nullptr;
- // And after that we can check what other things we can get from this
- // constraint.
- Base::assign(Sym, NewConstraint);
- return State;
- }
- /// Base method for handling new constraints for classes.
- [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
- RangeSet NewConstraint) {
- // There is a chance that we might need to update constraints for the
- // classes that are known to be disequal to Class.
- //
- // In order for this to be even possible, the new constraint should
- // be simply a constant because we can't reason about range disequalities.
- if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
- // Add new constraint.
- Constraints = CF.add(Constraints, Class, NewConstraint);
- for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
- RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
- RangeFactory, State, DisequalClass);
- UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
- // If we end up with at least one of the disequal classes to be
- // constrained with an empty range-set, the state is infeasible.
- if (UpdatedConstraint.isEmpty())
- return nullptr;
- Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
- }
- assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
- "a state with infeasible constraints");
- return setConstraints(State, Constraints);
- }
- return setConstraint(State, Class, NewConstraint);
- }
- ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
- SymbolRef RHS) {
- return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
- }
- ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
- SymbolRef RHS) {
- return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
- }
- [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
- assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
- if (Constraint.getConcreteValue())
- return !Constraint.getConcreteValue()->isZero();
- if (!Constraint.containsZero())
- return true;
- return std::nullopt;
- }
- ProgramStateRef State;
- SValBuilder &Builder;
- RangeSet::Factory &RangeFactory;
- };
- bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
- const llvm::APSInt &Constraint) {
- llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
- // Iterate over all equivalence classes and try to simplify them.
- ClassMembersTy Members = State->get<ClassMembers>();
- for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
- EquivalenceClass Class = ClassToSymbolSet.first;
- State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
- if (!State)
- return false;
- SimplifiedClasses.insert(Class);
- }
- // Trivial equivalence classes (those that have only one symbol member) are
- // not stored in the State. Thus, we must skim through the constraints as
- // well. And we try to simplify symbols in the constraints.
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
- EquivalenceClass Class = ClassConstraint.first;
- if (SimplifiedClasses.count(Class)) // Already simplified.
- continue;
- State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
- if (!State)
- return false;
- }
- // We may have trivial equivalence classes in the disequality info as
- // well, and we need to simplify them.
- DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
- for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
- DisequalityInfo) {
- EquivalenceClass Class = DisequalityEntry.first;
- ClassSet DisequalClasses = DisequalityEntry.second;
- State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
- if (!State)
- return false;
- }
- return true;
- }
- bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
- RangeSet Constraint) {
- if (!handleRemainderOp(Sym, Constraint))
- return false;
- std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
- if (!ConstraintAsBool)
- return true;
- if (std::optional<bool> Equality = meansEquality(Sym)) {
- // Here we cover two cases:
- // * if Sym is equality and the new constraint is true -> Sym's operands
- // should be marked as equal
- // * if Sym is disequality and the new constraint is false -> Sym's
- // operands should be also marked as equal
- if (*Equality == *ConstraintAsBool) {
- State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
- } else {
- // Other combinations leave as with disequal operands.
- State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
- }
- if (!State)
- return false;
- }
- return true;
- }
- } // end anonymous namespace
- std::unique_ptr<ConstraintManager>
- ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
- ExprEngine *Eng) {
- return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
- }
- ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
- ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
- ConstraintMap Result = F.getEmptyMap();
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
- EquivalenceClass Class = ClassConstraint.first;
- SymbolSet ClassMembers = Class.getClassMembers(State);
- assert(!ClassMembers.isEmpty() &&
- "Class must always have at least one member!");
- SymbolRef Representative = *ClassMembers.begin();
- Result = F.add(Result, Representative, ClassConstraint.second);
- }
- return Result;
- }
- //===----------------------------------------------------------------------===//
- // EqualityClass implementation details
- //===----------------------------------------------------------------------===//
- LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
- raw_ostream &os) const {
- SymbolSet ClassMembers = getClassMembers(State);
- for (const SymbolRef &MemberSym : ClassMembers) {
- MemberSym->dump();
- os << "\n";
- }
- }
- inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
- SymbolRef Sym) {
- assert(State && "State should not be null");
- assert(Sym && "Symbol should not be null");
- // We store far from all Symbol -> Class mappings
- if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
- return *NontrivialClass;
- // This is a trivial class of Sym.
- return Sym;
- }
- inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
- ProgramStateRef State,
- SymbolRef First,
- SymbolRef Second) {
- EquivalenceClass FirstClass = find(State, First);
- EquivalenceClass SecondClass = find(State, Second);
- return FirstClass.merge(F, State, SecondClass);
- }
- inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
- ProgramStateRef State,
- EquivalenceClass Other) {
- // It is already the same class.
- if (*this == Other)
- return State;
- // FIXME: As of now, we support only equivalence classes of the same type.
- // This limitation is connected to the lack of explicit casts in
- // our symbolic expression model.
- //
- // That means that for `int x` and `char y` we don't distinguish
- // between these two very different cases:
- // * `x == y`
- // * `(char)x == y`
- //
- // The moment we introduce symbolic casts, this restriction can be
- // lifted.
- if (getType() != Other.getType())
- return State;
- SymbolSet Members = getClassMembers(State);
- SymbolSet OtherMembers = Other.getClassMembers(State);
- // We estimate the size of the class by the height of tree containing
- // its members. Merging is not a trivial operation, so it's easier to
- // merge the smaller class into the bigger one.
- if (Members.getHeight() >= OtherMembers.getHeight()) {
- return mergeImpl(F, State, Members, Other, OtherMembers);
- } else {
- return Other.mergeImpl(F, State, OtherMembers, *this, Members);
- }
- }
- inline ProgramStateRef
- EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
- ProgramStateRef State, SymbolSet MyMembers,
- EquivalenceClass Other, SymbolSet OtherMembers) {
- // Essentially what we try to recreate here is some kind of union-find
- // data structure. It does have certain limitations due to persistence
- // and the need to remove elements from classes.
- //
- // In this setting, EquialityClass object is the representative of the class
- // or the parent element. ClassMap is a mapping of class members to their
- // parent. Unlike the union-find structure, they all point directly to the
- // class representative because we don't have an opportunity to actually do
- // path compression when dealing with immutability. This means that we
- // compress paths every time we do merges. It also means that we lose
- // the main amortized complexity benefit from the original data structure.
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
- // 1. If the merged classes have any constraints associated with them, we
- // need to transfer them to the class we have left.
- //
- // Intersection here makes perfect sense because both of these constraints
- // must hold for the whole new class.
- if (std::optional<RangeSet> NewClassConstraint =
- intersect(RangeFactory, getConstraint(State, *this),
- getConstraint(State, Other))) {
- // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
- // range inferrer shouldn't generate ranges incompatible with
- // equivalence classes. However, at the moment, due to imperfections
- // in the solver, it is possible and the merge function can also
- // return infeasible states aka null states.
- if (NewClassConstraint->isEmpty())
- // Infeasible state
- return nullptr;
- // No need in tracking constraints of a now-dissolved class.
- Constraints = CRF.remove(Constraints, Other);
- // Assign new constraints for this class.
- Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
- assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
- "a state with infeasible constraints");
- State = State->set<ConstraintRange>(Constraints);
- }
- // 2. Get ALL equivalence-related maps
- ClassMapTy Classes = State->get<ClassMap>();
- ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
- ClassMembersTy Members = State->get<ClassMembers>();
- ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
- DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
- DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
- ClassSet::Factory &CF = State->get_context<ClassSet>();
- SymbolSet::Factory &F = getMembersFactory(State);
- // 2. Merge members of the Other class into the current class.
- SymbolSet NewClassMembers = MyMembers;
- for (SymbolRef Sym : OtherMembers) {
- NewClassMembers = F.add(NewClassMembers, Sym);
- // *this is now the class for all these new symbols.
- Classes = CMF.add(Classes, Sym, *this);
- }
- // 3. Adjust member mapping.
- //
- // No need in tracking members of a now-dissolved class.
- Members = MF.remove(Members, Other);
- // Now only the current class is mapped to all the symbols.
- Members = MF.add(Members, *this, NewClassMembers);
- // 4. Update disequality relations
- ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
- // We are about to merge two classes but they are already known to be
- // non-equal. This is a contradiction.
- if (DisequalToOther.contains(*this))
- return nullptr;
- if (!DisequalToOther.isEmpty()) {
- ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
- DisequalityInfo = DF.remove(DisequalityInfo, Other);
- for (EquivalenceClass DisequalClass : DisequalToOther) {
- DisequalToThis = CF.add(DisequalToThis, DisequalClass);
- // Disequality is a symmetric relation meaning that if
- // DisequalToOther not null then the set for DisequalClass is not
- // empty and has at least Other.
- ClassSet OriginalSetLinkedToOther =
- *DisequalityInfo.lookup(DisequalClass);
- // Other will be eliminated and we should replace it with the bigger
- // united class.
- ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
- NewSet = CF.add(NewSet, *this);
- DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
- }
- DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
- State = State->set<DisequalityMap>(DisequalityInfo);
- }
- // 5. Update the state
- State = State->set<ClassMap>(Classes);
- State = State->set<ClassMembers>(Members);
- return State;
- }
- inline SymbolSet::Factory &
- EquivalenceClass::getMembersFactory(ProgramStateRef State) {
- return State->get_context<SymbolSet>();
- }
- SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
- if (const SymbolSet *Members = State->get<ClassMembers>(*this))
- return *Members;
- // This class is trivial, so we need to construct a set
- // with just that one symbol from the class.
- SymbolSet::Factory &F = getMembersFactory(State);
- return F.add(F.getEmptySet(), getRepresentativeSymbol());
- }
- bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
- return State->get<ClassMembers>(*this) == nullptr;
- }
- bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
- SymbolReaper &Reaper) const {
- return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
- }
- inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
- ProgramStateRef State,
- SymbolRef First,
- SymbolRef Second) {
- return markDisequal(RF, State, find(State, First), find(State, Second));
- }
- inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
- ProgramStateRef State,
- EquivalenceClass First,
- EquivalenceClass Second) {
- return First.markDisequal(RF, State, Second);
- }
- inline ProgramStateRef
- EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
- EquivalenceClass Other) const {
- // If we know that two classes are equal, we can only produce an infeasible
- // state.
- if (*this == Other) {
- return nullptr;
- }
- DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- // Disequality is a symmetric relation, so if we mark A as disequal to B,
- // we should also mark B as disequalt to A.
- if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
- Other) ||
- !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
- *this))
- return nullptr;
- assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
- "a state with infeasible constraints");
- State = State->set<DisequalityMap>(DisequalityInfo);
- State = State->set<ConstraintRange>(Constraints);
- return State;
- }
- inline bool EquivalenceClass::addToDisequalityInfo(
- DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
- RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
- EquivalenceClass Second) {
- // 1. Get all of the required factories.
- DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
- ClassSet::Factory &CF = State->get_context<ClassSet>();
- ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
- // 2. Add Second to the set of classes disequal to First.
- const ClassSet *CurrentSet = Info.lookup(First);
- ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
- NewSet = CF.add(NewSet, Second);
- Info = F.add(Info, First, NewSet);
- // 3. If Second is known to be a constant, we can delete this point
- // from the constraint asociated with First.
- //
- // So, if Second == 10, it means that First != 10.
- // At the same time, the same logic does not apply to ranges.
- if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
- if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
- RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
- RF, State, First.getRepresentativeSymbol());
- FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
- // If the First class is about to be constrained with an empty
- // range-set, the state is infeasible.
- if (FirstConstraint.isEmpty())
- return false;
- Constraints = CRF.add(Constraints, First, FirstConstraint);
- }
- return true;
- }
- inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
- SymbolRef FirstSym,
- SymbolRef SecondSym) {
- return EquivalenceClass::areEqual(State, find(State, FirstSym),
- find(State, SecondSym));
- }
- inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
- EquivalenceClass First,
- EquivalenceClass Second) {
- // The same equivalence class => symbols are equal.
- if (First == Second)
- return true;
- // Let's check if we know anything about these two classes being not equal to
- // each other.
- ClassSet DisequalToFirst = First.getDisequalClasses(State);
- if (DisequalToFirst.contains(Second))
- return false;
- // It is not clear.
- return std::nullopt;
- }
- [[nodiscard]] ProgramStateRef
- EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
- SymbolSet ClsMembers = getClassMembers(State);
- assert(ClsMembers.contains(Old));
- // Remove `Old`'s Class->Sym relation.
- SymbolSet::Factory &F = getMembersFactory(State);
- ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
- ClsMembers = F.remove(ClsMembers, Old);
- // Ensure another precondition of the removeMember function (we can check
- // this only with isEmpty, thus we have to do the remove first).
- assert(!ClsMembers.isEmpty() &&
- "Class should have had at least two members before member removal");
- // Overwrite the existing members assigned to this class.
- ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
- ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
- State = State->set<ClassMembers>(ClassMembersMap);
- // Remove `Old`'s Sym->Class relation.
- ClassMapTy Classes = State->get<ClassMap>();
- ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
- Classes = CMF.remove(Classes, Old);
- State = State->set<ClassMap>(Classes);
- return State;
- }
- // Re-evaluate an SVal with top-level `State->assume` logic.
- [[nodiscard]] ProgramStateRef
- reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
- if (!Constraint)
- return State;
- const auto DefinedVal = TheValue.castAs<DefinedSVal>();
- // If the SVal is 0, we can simply interpret that as `false`.
- if (Constraint->encodesFalseRange())
- return State->assume(DefinedVal, false);
- // If the constraint does not encode 0 then we can interpret that as `true`
- // AND as a Range(Set).
- if (Constraint->encodesTrueRange()) {
- State = State->assume(DefinedVal, true);
- if (!State)
- return nullptr;
- // Fall through, re-assume based on the range values as well.
- }
- // Overestimate the individual Ranges with the RangeSet' lowest and
- // highest values.
- return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
- Constraint->getMaxValue(), true);
- }
- // Iterate over all symbols and try to simplify them. Once a symbol is
- // simplified then we check if we can merge the simplified symbol's equivalence
- // class to this class. This way, we simplify not just the symbols but the
- // classes as well: we strive to keep the number of the classes to be the
- // absolute minimum.
- [[nodiscard]] ProgramStateRef
- EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
- ProgramStateRef State, EquivalenceClass Class) {
- SymbolSet ClassMembers = Class.getClassMembers(State);
- for (const SymbolRef &MemberSym : ClassMembers) {
- const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
- const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
- // The symbol is collapsed to a constant, check if the current State is
- // still feasible.
- if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
- const llvm::APSInt &SV = CI->getValue();
- const RangeSet *ClassConstraint = getConstraint(State, Class);
- // We have found a contradiction.
- if (ClassConstraint && !ClassConstraint->contains(SV))
- return nullptr;
- }
- if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
- // The simplified symbol should be the member of the original Class,
- // however, it might be in another existing class at the moment. We
- // have to merge these classes.
- ProgramStateRef OldState = State;
- State = merge(F, State, MemberSym, SimplifiedMemberSym);
- if (!State)
- return nullptr;
- // No state change, no merge happened actually.
- if (OldState == State)
- continue;
- // Be aware that `SimplifiedMemberSym` might refer to an already dead
- // symbol. In that case, the eqclass of that might not be the same as the
- // eqclass of `MemberSym`. This is because the dead symbols are not
- // preserved in the `ClassMap`, hence
- // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
- // compared to the eqclass of `MemberSym`.
- // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
- // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
- //
- // Note that `MemberSym` must be alive here since that is from the
- // `ClassMembers` where all the symbols are alive.
- // Remove the old and more complex symbol.
- State = find(State, MemberSym).removeMember(State, MemberSym);
- // Query the class constraint again b/c that may have changed during the
- // merge above.
- const RangeSet *ClassConstraint = getConstraint(State, Class);
- // Re-evaluate an SVal with top-level `State->assume`, this ignites
- // a RECURSIVE algorithm that will reach a FIXPOINT.
- //
- // About performance and complexity: Let us assume that in a State we
- // have N non-trivial equivalence classes and that all constraints and
- // disequality info is related to non-trivial classes. In the worst case,
- // we can simplify only one symbol of one class in each iteration. The
- // number of symbols in one class cannot grow b/c we replace the old
- // symbol with the simplified one. Also, the number of the equivalence
- // classes can decrease only, b/c the algorithm does a merge operation
- // optionally. We need N iterations in this case to reach the fixpoint.
- // Thus, the steps needed to be done in the worst case is proportional to
- // N*N.
- //
- // This worst case scenario can be extended to that case when we have
- // trivial classes in the constraints and in the disequality map. This
- // case can be reduced to the case with a State where there are only
- // non-trivial classes. This is because a merge operation on two trivial
- // classes results in one non-trivial class.
- State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
- if (!State)
- return nullptr;
- }
- }
- return State;
- }
- inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
- SymbolRef Sym) {
- return find(State, Sym).getDisequalClasses(State);
- }
- inline ClassSet
- EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
- return getDisequalClasses(State->get<DisequalityMap>(),
- State->get_context<ClassSet>());
- }
- inline ClassSet
- EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
- ClassSet::Factory &Factory) const {
- if (const ClassSet *DisequalClasses = Map.lookup(*this))
- return *DisequalClasses;
- return Factory.getEmptySet();
- }
- bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
- ClassMembersTy Members = State->get<ClassMembers>();
- for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
- for (SymbolRef Member : ClassMembersPair.second) {
- // Every member of the class should have a mapping back to the class.
- if (find(State, Member) == ClassMembersPair.first) {
- continue;
- }
- return false;
- }
- }
- DisequalityMapTy Disequalities = State->get<DisequalityMap>();
- for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
- EquivalenceClass Class = DisequalityInfo.first;
- ClassSet DisequalClasses = DisequalityInfo.second;
- // There is no use in keeping empty sets in the map.
- if (DisequalClasses.isEmpty())
- return false;
- // Disequality is symmetrical, i.e. for every Class A and B that A != B,
- // B != A should also be true.
- for (EquivalenceClass DisequalClass : DisequalClasses) {
- const ClassSet *DisequalToDisequalClasses =
- Disequalities.lookup(DisequalClass);
- // It should be a set of at least one element: Class
- if (!DisequalToDisequalClasses ||
- !DisequalToDisequalClasses->contains(Class))
- return false;
- }
- }
- return true;
- }
- //===----------------------------------------------------------------------===//
- // RangeConstraintManager implementation
- //===----------------------------------------------------------------------===//
- bool RangeConstraintManager::canReasonAbout(SVal X) const {
- std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
- if (SymVal && SymVal->isExpression()) {
- const SymExpr *SE = SymVal->getSymbol();
- if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
- switch (SIE->getOpcode()) {
- // We don't reason yet about bitwise-constraints on symbolic values.
- case BO_And:
- case BO_Or:
- case BO_Xor:
- return false;
- // We don't reason yet about these arithmetic constraints on
- // symbolic values.
- case BO_Mul:
- case BO_Div:
- case BO_Rem:
- case BO_Shl:
- case BO_Shr:
- return false;
- // All other cases.
- default:
- return true;
- }
- }
- if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
- // FIXME: Handle <=> here.
- if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
- BinaryOperator::isRelationalOp(SSE->getOpcode())) {
- // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
- // We've recently started producing Loc <> NonLoc comparisons (that
- // result from casts of one of the operands between eg. intptr_t and
- // void *), but we can't reason about them yet.
- if (Loc::isLocType(SSE->getLHS()->getType())) {
- return Loc::isLocType(SSE->getRHS()->getType());
- }
- }
- }
- return false;
- }
- return true;
- }
- ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
- SymbolRef Sym) {
- const RangeSet *Ranges = getConstraint(State, Sym);
- // If we don't have any information about this symbol, it's underconstrained.
- if (!Ranges)
- return ConditionTruthVal();
- // If we have a concrete value, see if it's zero.
- if (const llvm::APSInt *Value = Ranges->getConcreteValue())
- return *Value == 0;
- BasicValueFactory &BV = getBasicVals();
- APSIntType IntType = BV.getAPSIntType(Sym->getType());
- llvm::APSInt Zero = IntType.getZeroValue();
- // Check if zero is in the set of possible values.
- if (!Ranges->contains(Zero))
- return false;
- // Zero is a possible value, but it is not the /only/ possible value.
- return ConditionTruthVal();
- }
- const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
- SymbolRef Sym) const {
- const RangeSet *T = getConstraint(St, Sym);
- return T ? T->getConcreteValue() : nullptr;
- }
- //===----------------------------------------------------------------------===//
- // Remove dead symbols from existing constraints
- //===----------------------------------------------------------------------===//
- /// Scan all symbols referenced by the constraints. If the symbol is not alive
- /// as marked in LSymbols, mark it as dead in DSymbols.
- ProgramStateRef
- RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
- SymbolReaper &SymReaper) {
- ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
- ClassMembersTy NewClassMembersMap = ClassMembersMap;
- ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
- SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- ConstraintRangeTy NewConstraints = Constraints;
- ConstraintRangeTy::Factory &ConstraintFactory =
- State->get_context<ConstraintRange>();
- ClassMapTy Map = State->get<ClassMap>();
- ClassMapTy NewMap = Map;
- ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
- DisequalityMapTy Disequalities = State->get<DisequalityMap>();
- DisequalityMapTy::Factory &DisequalityFactory =
- State->get_context<DisequalityMap>();
- ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
- bool ClassMapChanged = false;
- bool MembersMapChanged = false;
- bool ConstraintMapChanged = false;
- bool DisequalitiesChanged = false;
- auto removeDeadClass = [&](EquivalenceClass Class) {
- // Remove associated constraint ranges.
- Constraints = ConstraintFactory.remove(Constraints, Class);
- ConstraintMapChanged = true;
- // Update disequality information to not hold any information on the
- // removed class.
- ClassSet DisequalClasses =
- Class.getDisequalClasses(Disequalities, ClassSetFactory);
- if (!DisequalClasses.isEmpty()) {
- for (EquivalenceClass DisequalClass : DisequalClasses) {
- ClassSet DisequalToDisequalSet =
- DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
- // DisequalToDisequalSet is guaranteed to be non-empty for consistent
- // disequality info.
- assert(!DisequalToDisequalSet.isEmpty());
- ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
- // No need in keeping an empty set.
- if (NewSet.isEmpty()) {
- Disequalities =
- DisequalityFactory.remove(Disequalities, DisequalClass);
- } else {
- Disequalities =
- DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
- }
- }
- // Remove the data for the class
- Disequalities = DisequalityFactory.remove(Disequalities, Class);
- DisequalitiesChanged = true;
- }
- };
- // 1. Let's see if dead symbols are trivial and have associated constraints.
- for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
- Constraints) {
- EquivalenceClass Class = ClassConstraintPair.first;
- if (Class.isTriviallyDead(State, SymReaper)) {
- // If this class is trivial, we can remove its constraints right away.
- removeDeadClass(Class);
- }
- }
- // 2. We don't need to track classes for dead symbols.
- for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
- SymbolRef Sym = SymbolClassPair.first;
- if (SymReaper.isDead(Sym)) {
- ClassMapChanged = true;
- NewMap = ClassFactory.remove(NewMap, Sym);
- }
- }
- // 3. Remove dead members from classes and remove dead non-trivial classes
- // and their constraints.
- for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
- ClassMembersMap) {
- EquivalenceClass Class = ClassMembersPair.first;
- SymbolSet LiveMembers = ClassMembersPair.second;
- bool MembersChanged = false;
- for (SymbolRef Member : ClassMembersPair.second) {
- if (SymReaper.isDead(Member)) {
- MembersChanged = true;
- LiveMembers = SetFactory.remove(LiveMembers, Member);
- }
- }
- // Check if the class changed.
- if (!MembersChanged)
- continue;
- MembersMapChanged = true;
- if (LiveMembers.isEmpty()) {
- // The class is dead now, we need to wipe it out of the members map...
- NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
- // ...and remove all of its constraints.
- removeDeadClass(Class);
- } else {
- // We need to change the members associated with the class.
- NewClassMembersMap =
- EMFactory.add(NewClassMembersMap, Class, LiveMembers);
- }
- }
- // 4. Update the state with new maps.
- //
- // Here we try to be humble and update a map only if it really changed.
- if (ClassMapChanged)
- State = State->set<ClassMap>(NewMap);
- if (MembersMapChanged)
- State = State->set<ClassMembers>(NewClassMembersMap);
- if (ConstraintMapChanged)
- State = State->set<ConstraintRange>(Constraints);
- if (DisequalitiesChanged)
- State = State->set<DisequalityMap>(Disequalities);
- assert(EquivalenceClass::isClassDataConsistent(State));
- return State;
- }
- RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
- SymbolRef Sym) {
- return SymbolicRangeInferrer::inferRange(F, State, Sym);
- }
- ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
- SymbolRef Sym,
- RangeSet Range) {
- return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
- }
- //===------------------------------------------------------------------------===
- // assumeSymX methods: protected interface for RangeConstraintManager.
- //===------------------------------------------------------------------------===/
- // The syntax for ranges below is mathematical, using [x, y] for closed ranges
- // and (x, y) for open ranges. These ranges are modular, corresponding with
- // a common treatment of C integer overflow. This means that these methods
- // do not have to worry about overflow; RangeSet::Intersect can handle such a
- // "wraparound" range.
- // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
- // UINT_MAX, 0, 1, and 2.
- ProgramStateRef
- RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
- return St;
- llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
- RangeSet New = getRange(St, Sym);
- New = F.deletePoint(New, Point);
- return setRange(St, Sym, New);
- }
- ProgramStateRef
- RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
- return nullptr;
- // [Int-Adjustment, Int-Adjustment]
- llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
- RangeSet New = getRange(St, Sym);
- New = F.intersect(New, AdjInt);
- return setRange(St, Sym, New);
- }
- RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
- SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- switch (AdjustmentType.testInRange(Int, true)) {
- case APSIntType::RTR_Below:
- return F.getEmptySet();
- case APSIntType::RTR_Within:
- break;
- case APSIntType::RTR_Above:
- return getRange(St, Sym);
- }
- // Special case for Int == Min. This is always false.
- llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
- llvm::APSInt Min = AdjustmentType.getMinValue();
- if (ComparisonVal == Min)
- return F.getEmptySet();
- llvm::APSInt Lower = Min - Adjustment;
- llvm::APSInt Upper = ComparisonVal - Adjustment;
- --Upper;
- RangeSet Result = getRange(St, Sym);
- return F.intersect(Result, Lower, Upper);
- }
- ProgramStateRef
- RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
- return setRange(St, Sym, New);
- }
- RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
- SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- switch (AdjustmentType.testInRange(Int, true)) {
- case APSIntType::RTR_Below:
- return getRange(St, Sym);
- case APSIntType::RTR_Within:
- break;
- case APSIntType::RTR_Above:
- return F.getEmptySet();
- }
- // Special case for Int == Max. This is always false.
- llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
- llvm::APSInt Max = AdjustmentType.getMaxValue();
- if (ComparisonVal == Max)
- return F.getEmptySet();
- llvm::APSInt Lower = ComparisonVal - Adjustment;
- llvm::APSInt Upper = Max - Adjustment;
- ++Lower;
- RangeSet SymRange = getRange(St, Sym);
- return F.intersect(SymRange, Lower, Upper);
- }
- ProgramStateRef
- RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
- return setRange(St, Sym, New);
- }
- RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
- SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- switch (AdjustmentType.testInRange(Int, true)) {
- case APSIntType::RTR_Below:
- return getRange(St, Sym);
- case APSIntType::RTR_Within:
- break;
- case APSIntType::RTR_Above:
- return F.getEmptySet();
- }
- // Special case for Int == Min. This is always feasible.
- llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
- llvm::APSInt Min = AdjustmentType.getMinValue();
- if (ComparisonVal == Min)
- return getRange(St, Sym);
- llvm::APSInt Max = AdjustmentType.getMaxValue();
- llvm::APSInt Lower = ComparisonVal - Adjustment;
- llvm::APSInt Upper = Max - Adjustment;
- RangeSet SymRange = getRange(St, Sym);
- return F.intersect(SymRange, Lower, Upper);
- }
- ProgramStateRef
- RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
- return setRange(St, Sym, New);
- }
- RangeSet
- RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- // Before we do any real work, see if the value can even show up.
- APSIntType AdjustmentType(Adjustment);
- switch (AdjustmentType.testInRange(Int, true)) {
- case APSIntType::RTR_Below:
- return F.getEmptySet();
- case APSIntType::RTR_Within:
- break;
- case APSIntType::RTR_Above:
- return RS();
- }
- // Special case for Int == Max. This is always feasible.
- llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
- llvm::APSInt Max = AdjustmentType.getMaxValue();
- if (ComparisonVal == Max)
- return RS();
- llvm::APSInt Min = AdjustmentType.getMinValue();
- llvm::APSInt Lower = Min - Adjustment;
- llvm::APSInt Upper = ComparisonVal - Adjustment;
- RangeSet Default = RS();
- return F.intersect(Default, Lower, Upper);
- }
- RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
- SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
- }
- ProgramStateRef
- RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment) {
- RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
- return setRange(St, Sym, New);
- }
- ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
- RangeSet New = getSymGERange(State, Sym, From, Adjustment);
- if (New.isEmpty())
- return nullptr;
- RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
- return setRange(State, Sym, Out);
- }
- ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
- RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
- RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
- RangeSet New(F.add(RangeLT, RangeGT));
- return setRange(State, Sym, New);
- }
- //===----------------------------------------------------------------------===//
- // Pretty-printing.
- //===----------------------------------------------------------------------===//
- void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
- const char *NL, unsigned int Space,
- bool IsDot) const {
- printConstraints(Out, State, NL, Space, IsDot);
- printEquivalenceClasses(Out, State, NL, Space, IsDot);
- printDisequalities(Out, State, NL, Space, IsDot);
- }
- void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
- SymbolRef Sym) {
- const RangeSet RS = getRange(State, Sym);
- Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
- RS.dump(Out);
- }
- static std::string toString(const SymbolRef &Sym) {
- std::string S;
- llvm::raw_string_ostream O(S);
- Sym->dumpToStream(O);
- return O.str();
- }
- void RangeConstraintManager::printConstraints(raw_ostream &Out,
- ProgramStateRef State,
- const char *NL,
- unsigned int Space,
- bool IsDot) const {
- ConstraintRangeTy Constraints = State->get<ConstraintRange>();
- Indent(Out, Space, IsDot) << "\"constraints\": ";
- if (Constraints.isEmpty()) {
- Out << "null," << NL;
- return;
- }
- std::map<std::string, RangeSet> OrderedConstraints;
- for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
- SymbolSet ClassMembers = P.first.getClassMembers(State);
- for (const SymbolRef &ClassMember : ClassMembers) {
- bool insertion_took_place;
- std::tie(std::ignore, insertion_took_place) =
- OrderedConstraints.insert({toString(ClassMember), P.second});
- assert(insertion_took_place &&
- "two symbols should not have the same dump");
- }
- }
- ++Space;
- Out << '[' << NL;
- bool First = true;
- for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
- if (First) {
- First = false;
- } else {
- Out << ',';
- Out << NL;
- }
- Indent(Out, Space, IsDot)
- << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
- P.second.dump(Out);
- Out << "\" }";
- }
- Out << NL;
- --Space;
- Indent(Out, Space, IsDot) << "]," << NL;
- }
- static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
- SymbolSet ClassMembers = Class.getClassMembers(State);
- llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
- ClassMembers.end());
- llvm::sort(ClassMembersSorted,
- [](const SymbolRef &LHS, const SymbolRef &RHS) {
- return toString(LHS) < toString(RHS);
- });
- bool FirstMember = true;
- std::string Str;
- llvm::raw_string_ostream Out(Str);
- Out << "[ ";
- for (SymbolRef ClassMember : ClassMembersSorted) {
- if (FirstMember)
- FirstMember = false;
- else
- Out << ", ";
- Out << "\"" << ClassMember << "\"";
- }
- Out << " ]";
- return Out.str();
- }
- void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
- ProgramStateRef State,
- const char *NL,
- unsigned int Space,
- bool IsDot) const {
- ClassMembersTy Members = State->get<ClassMembers>();
- Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
- if (Members.isEmpty()) {
- Out << "null," << NL;
- return;
- }
- std::set<std::string> MembersStr;
- for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
- MembersStr.insert(toString(State, ClassToSymbolSet.first));
- ++Space;
- Out << '[' << NL;
- bool FirstClass = true;
- for (const std::string &Str : MembersStr) {
- if (FirstClass) {
- FirstClass = false;
- } else {
- Out << ',';
- Out << NL;
- }
- Indent(Out, Space, IsDot);
- Out << Str;
- }
- Out << NL;
- --Space;
- Indent(Out, Space, IsDot) << "]," << NL;
- }
- void RangeConstraintManager::printDisequalities(raw_ostream &Out,
- ProgramStateRef State,
- const char *NL,
- unsigned int Space,
- bool IsDot) const {
- DisequalityMapTy Disequalities = State->get<DisequalityMap>();
- Indent(Out, Space, IsDot) << "\"disequality_info\": ";
- if (Disequalities.isEmpty()) {
- Out << "null," << NL;
- return;
- }
- // Transform the disequality info to an ordered map of
- // [string -> (ordered set of strings)]
- using EqClassesStrTy = std::set<std::string>;
- using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
- DisequalityInfoStrTy DisequalityInfoStr;
- for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
- EquivalenceClass Class = ClassToDisEqSet.first;
- ClassSet DisequalClasses = ClassToDisEqSet.second;
- EqClassesStrTy MembersStr;
- for (EquivalenceClass DisEqClass : DisequalClasses)
- MembersStr.insert(toString(State, DisEqClass));
- DisequalityInfoStr.insert({toString(State, Class), MembersStr});
- }
- ++Space;
- Out << '[' << NL;
- bool FirstClass = true;
- for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
- DisequalityInfoStr) {
- const std::string &Class = ClassToDisEqSet.first;
- if (FirstClass) {
- FirstClass = false;
- } else {
- Out << ',';
- Out << NL;
- }
- Indent(Out, Space, IsDot) << "{" << NL;
- unsigned int DisEqSpace = Space + 1;
- Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
- Out << Class;
- const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
- if (!DisequalClasses.empty()) {
- Out << "," << NL;
- Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
- unsigned int DisEqClassSpace = DisEqSpace + 1;
- Indent(Out, DisEqClassSpace, IsDot);
- bool FirstDisEqClass = true;
- for (const std::string &DisEqClass : DisequalClasses) {
- if (FirstDisEqClass) {
- FirstDisEqClass = false;
- } else {
- Out << ',' << NL;
- Indent(Out, DisEqClassSpace, IsDot);
- }
- Out << DisEqClass;
- }
- Out << "]" << NL;
- }
- Indent(Out, Space, IsDot) << "}";
- }
- Out << NL;
- --Space;
- Indent(Out, Space, IsDot) << "]," << NL;
- }
|