xmlregexp.c 218 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271
  1. /*
  2. * regexp.c: generic and extensible Regular Expression engine
  3. *
  4. * Basically designed with the purpose of compiling regexps for
  5. * the variety of validation/schemas mechanisms now available in
  6. * XML related specifications these include:
  7. * - XML-1.0 DTD validation
  8. * - XML Schemas structure part 1
  9. * - XML Schemas Datatypes part 2 especially Appendix F
  10. * - RELAX-NG/TREX i.e. the counter proposal
  11. *
  12. * See Copyright for the status of this software.
  13. *
  14. * Daniel Veillard <veillard@redhat.com>
  15. */
  16. #define IN_LIBXML
  17. #include "libxml.h"
  18. #ifdef LIBXML_REGEXP_ENABLED
  19. /* #define DEBUG_ERR */
  20. #include <stdio.h>
  21. #include <string.h>
  22. #ifdef HAVE_LIMITS_H
  23. #include <limits.h>
  24. #endif
  25. #ifdef HAVE_STDINT_H
  26. #include <stdint.h>
  27. #endif
  28. #include <libxml/tree.h>
  29. #include <libxml/parserInternals.h>
  30. #include <libxml/xmlregexp.h>
  31. #include <libxml/xmlautomata.h>
  32. #include <libxml/xmlunicode.h>
  33. #ifndef INT_MAX
  34. #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
  35. #endif
  36. #ifndef SIZE_MAX
  37. #define SIZE_MAX ((size_t) -1)
  38. #endif
  39. /* #define DEBUG_REGEXP_GRAPH */
  40. /* #define DEBUG_REGEXP_EXEC */
  41. /* #define DEBUG_PUSH */
  42. /* #define DEBUG_COMPACTION */
  43. #define MAX_PUSH 10000000
  44. #ifdef ERROR
  45. #undef ERROR
  46. #endif
  47. #define ERROR(str) \
  48. ctxt->error = XML_REGEXP_COMPILE_ERROR; \
  49. xmlRegexpErrCompile(ctxt, str);
  50. #define NEXT ctxt->cur++
  51. #define CUR (*(ctxt->cur))
  52. #define NXT(index) (ctxt->cur[index])
  53. #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
  54. #define NEXTL(l) ctxt->cur += l;
  55. #define XML_REG_STRING_SEPARATOR '|'
  56. /*
  57. * Need PREV to check on a '-' within a Character Group. May only be used
  58. * when it's guaranteed that cur is not at the beginning of ctxt->string!
  59. */
  60. #define PREV (ctxt->cur[-1])
  61. /**
  62. * TODO:
  63. *
  64. * macro to flag unimplemented blocks
  65. */
  66. #define TODO \
  67. xmlGenericError(xmlGenericErrorContext, \
  68. "Unimplemented block at %s:%d\n", \
  69. __FILE__, __LINE__);
  70. /************************************************************************
  71. * *
  72. * Datatypes and structures *
  73. * *
  74. ************************************************************************/
  75. /*
  76. * Note: the order of the enums below is significant, do not shuffle
  77. */
  78. typedef enum {
  79. XML_REGEXP_EPSILON = 1,
  80. XML_REGEXP_CHARVAL,
  81. XML_REGEXP_RANGES,
  82. XML_REGEXP_SUBREG, /* used for () sub regexps */
  83. XML_REGEXP_STRING,
  84. XML_REGEXP_ANYCHAR, /* . */
  85. XML_REGEXP_ANYSPACE, /* \s */
  86. XML_REGEXP_NOTSPACE, /* \S */
  87. XML_REGEXP_INITNAME, /* \l */
  88. XML_REGEXP_NOTINITNAME, /* \L */
  89. XML_REGEXP_NAMECHAR, /* \c */
  90. XML_REGEXP_NOTNAMECHAR, /* \C */
  91. XML_REGEXP_DECIMAL, /* \d */
  92. XML_REGEXP_NOTDECIMAL, /* \D */
  93. XML_REGEXP_REALCHAR, /* \w */
  94. XML_REGEXP_NOTREALCHAR, /* \W */
  95. XML_REGEXP_LETTER = 100,
  96. XML_REGEXP_LETTER_UPPERCASE,
  97. XML_REGEXP_LETTER_LOWERCASE,
  98. XML_REGEXP_LETTER_TITLECASE,
  99. XML_REGEXP_LETTER_MODIFIER,
  100. XML_REGEXP_LETTER_OTHERS,
  101. XML_REGEXP_MARK,
  102. XML_REGEXP_MARK_NONSPACING,
  103. XML_REGEXP_MARK_SPACECOMBINING,
  104. XML_REGEXP_MARK_ENCLOSING,
  105. XML_REGEXP_NUMBER,
  106. XML_REGEXP_NUMBER_DECIMAL,
  107. XML_REGEXP_NUMBER_LETTER,
  108. XML_REGEXP_NUMBER_OTHERS,
  109. XML_REGEXP_PUNCT,
  110. XML_REGEXP_PUNCT_CONNECTOR,
  111. XML_REGEXP_PUNCT_DASH,
  112. XML_REGEXP_PUNCT_OPEN,
  113. XML_REGEXP_PUNCT_CLOSE,
  114. XML_REGEXP_PUNCT_INITQUOTE,
  115. XML_REGEXP_PUNCT_FINQUOTE,
  116. XML_REGEXP_PUNCT_OTHERS,
  117. XML_REGEXP_SEPAR,
  118. XML_REGEXP_SEPAR_SPACE,
  119. XML_REGEXP_SEPAR_LINE,
  120. XML_REGEXP_SEPAR_PARA,
  121. XML_REGEXP_SYMBOL,
  122. XML_REGEXP_SYMBOL_MATH,
  123. XML_REGEXP_SYMBOL_CURRENCY,
  124. XML_REGEXP_SYMBOL_MODIFIER,
  125. XML_REGEXP_SYMBOL_OTHERS,
  126. XML_REGEXP_OTHER,
  127. XML_REGEXP_OTHER_CONTROL,
  128. XML_REGEXP_OTHER_FORMAT,
  129. XML_REGEXP_OTHER_PRIVATE,
  130. XML_REGEXP_OTHER_NA,
  131. XML_REGEXP_BLOCK_NAME
  132. } xmlRegAtomType;
  133. typedef enum {
  134. XML_REGEXP_QUANT_EPSILON = 1,
  135. XML_REGEXP_QUANT_ONCE,
  136. XML_REGEXP_QUANT_OPT,
  137. XML_REGEXP_QUANT_MULT,
  138. XML_REGEXP_QUANT_PLUS,
  139. XML_REGEXP_QUANT_ONCEONLY,
  140. XML_REGEXP_QUANT_ALL,
  141. XML_REGEXP_QUANT_RANGE
  142. } xmlRegQuantType;
  143. typedef enum {
  144. XML_REGEXP_START_STATE = 1,
  145. XML_REGEXP_FINAL_STATE,
  146. XML_REGEXP_TRANS_STATE,
  147. XML_REGEXP_SINK_STATE,
  148. XML_REGEXP_UNREACH_STATE
  149. } xmlRegStateType;
  150. typedef enum {
  151. XML_REGEXP_MARK_NORMAL = 0,
  152. XML_REGEXP_MARK_START,
  153. XML_REGEXP_MARK_VISITED
  154. } xmlRegMarkedType;
  155. typedef struct _xmlRegRange xmlRegRange;
  156. typedef xmlRegRange *xmlRegRangePtr;
  157. struct _xmlRegRange {
  158. int neg; /* 0 normal, 1 not, 2 exclude */
  159. xmlRegAtomType type;
  160. int start;
  161. int end;
  162. xmlChar *blockName;
  163. };
  164. typedef struct _xmlRegAtom xmlRegAtom;
  165. typedef xmlRegAtom *xmlRegAtomPtr;
  166. typedef struct _xmlAutomataState xmlRegState;
  167. typedef xmlRegState *xmlRegStatePtr;
  168. struct _xmlRegAtom {
  169. int no;
  170. xmlRegAtomType type;
  171. xmlRegQuantType quant;
  172. int min;
  173. int max;
  174. void *valuep;
  175. void *valuep2;
  176. int neg;
  177. int codepoint;
  178. xmlRegStatePtr start;
  179. xmlRegStatePtr start0;
  180. xmlRegStatePtr stop;
  181. int maxRanges;
  182. int nbRanges;
  183. xmlRegRangePtr *ranges;
  184. void *data;
  185. };
  186. typedef struct _xmlRegCounter xmlRegCounter;
  187. typedef xmlRegCounter *xmlRegCounterPtr;
  188. struct _xmlRegCounter {
  189. int min;
  190. int max;
  191. };
  192. typedef struct _xmlRegTrans xmlRegTrans;
  193. typedef xmlRegTrans *xmlRegTransPtr;
  194. struct _xmlRegTrans {
  195. xmlRegAtomPtr atom;
  196. int to;
  197. int counter;
  198. int count;
  199. int nd;
  200. };
  201. struct _xmlAutomataState {
  202. xmlRegStateType type;
  203. xmlRegMarkedType mark;
  204. xmlRegMarkedType markd;
  205. xmlRegMarkedType reached;
  206. int no;
  207. int maxTrans;
  208. int nbTrans;
  209. xmlRegTrans *trans;
  210. /* knowing states pointing to us can speed things up */
  211. int maxTransTo;
  212. int nbTransTo;
  213. int *transTo;
  214. };
  215. typedef struct _xmlAutomata xmlRegParserCtxt;
  216. typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
  217. #define AM_AUTOMATA_RNG 1
  218. struct _xmlAutomata {
  219. xmlChar *string;
  220. xmlChar *cur;
  221. int error;
  222. int neg;
  223. xmlRegStatePtr start;
  224. xmlRegStatePtr end;
  225. xmlRegStatePtr state;
  226. xmlRegAtomPtr atom;
  227. int maxAtoms;
  228. int nbAtoms;
  229. xmlRegAtomPtr *atoms;
  230. int maxStates;
  231. int nbStates;
  232. xmlRegStatePtr *states;
  233. int maxCounters;
  234. int nbCounters;
  235. xmlRegCounter *counters;
  236. int determinist;
  237. int negs;
  238. int flags;
  239. int depth;
  240. };
  241. struct _xmlRegexp {
  242. xmlChar *string;
  243. int nbStates;
  244. xmlRegStatePtr *states;
  245. int nbAtoms;
  246. xmlRegAtomPtr *atoms;
  247. int nbCounters;
  248. xmlRegCounter *counters;
  249. int determinist;
  250. int flags;
  251. /*
  252. * That's the compact form for determinists automatas
  253. */
  254. int nbstates;
  255. int *compact;
  256. void **transdata;
  257. int nbstrings;
  258. xmlChar **stringMap;
  259. };
  260. typedef struct _xmlRegExecRollback xmlRegExecRollback;
  261. typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
  262. struct _xmlRegExecRollback {
  263. xmlRegStatePtr state;/* the current state */
  264. int index; /* the index in the input stack */
  265. int nextbranch; /* the next transition to explore in that state */
  266. int *counts; /* save the automata state if it has some */
  267. };
  268. typedef struct _xmlRegInputToken xmlRegInputToken;
  269. typedef xmlRegInputToken *xmlRegInputTokenPtr;
  270. struct _xmlRegInputToken {
  271. xmlChar *value;
  272. void *data;
  273. };
  274. struct _xmlRegExecCtxt {
  275. int status; /* execution status != 0 indicate an error */
  276. int determinist; /* did we find an indeterministic behaviour */
  277. xmlRegexpPtr comp; /* the compiled regexp */
  278. xmlRegExecCallbacks callback;
  279. void *data;
  280. xmlRegStatePtr state;/* the current state */
  281. int transno; /* the current transition on that state */
  282. int transcount; /* the number of chars in char counted transitions */
  283. /*
  284. * A stack of rollback states
  285. */
  286. int maxRollbacks;
  287. int nbRollbacks;
  288. xmlRegExecRollback *rollbacks;
  289. /*
  290. * The state of the automata if any
  291. */
  292. int *counts;
  293. /*
  294. * The input stack
  295. */
  296. int inputStackMax;
  297. int inputStackNr;
  298. int index;
  299. int *charStack;
  300. const xmlChar *inputString; /* when operating on characters */
  301. xmlRegInputTokenPtr inputStack;/* when operating on strings */
  302. /*
  303. * error handling
  304. */
  305. int errStateNo; /* the error state number */
  306. xmlRegStatePtr errState; /* the error state */
  307. xmlChar *errString; /* the string raising the error */
  308. int *errCounts; /* counters at the error state */
  309. int nbPush;
  310. };
  311. #define REGEXP_ALL_COUNTER 0x123456
  312. #define REGEXP_ALL_LAX_COUNTER 0x123457
  313. static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
  314. static void xmlRegFreeState(xmlRegStatePtr state);
  315. static void xmlRegFreeAtom(xmlRegAtomPtr atom);
  316. static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
  317. static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
  318. static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
  319. int neg, int start, int end, const xmlChar *blockName);
  320. void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
  321. /************************************************************************
  322. * *
  323. * Regexp memory error handler *
  324. * *
  325. ************************************************************************/
  326. /**
  327. * xmlRegexpErrMemory:
  328. * @extra: extra information
  329. *
  330. * Handle an out of memory condition
  331. */
  332. static void
  333. xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
  334. {
  335. const char *regexp = NULL;
  336. if (ctxt != NULL) {
  337. regexp = (const char *) ctxt->string;
  338. ctxt->error = XML_ERR_NO_MEMORY;
  339. }
  340. __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  341. XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
  342. regexp, NULL, 0, 0,
  343. "Memory allocation failed : %s\n", extra);
  344. }
  345. /**
  346. * xmlRegexpErrCompile:
  347. * @extra: extra information
  348. *
  349. * Handle a compilation failure
  350. */
  351. static void
  352. xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  353. {
  354. const char *regexp = NULL;
  355. int idx = 0;
  356. if (ctxt != NULL) {
  357. regexp = (const char *) ctxt->string;
  358. idx = ctxt->cur - ctxt->string;
  359. ctxt->error = XML_REGEXP_COMPILE_ERROR;
  360. }
  361. __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  362. XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
  363. regexp, NULL, idx, 0,
  364. "failed to compile: %s\n", extra);
  365. }
  366. /************************************************************************
  367. * *
  368. * Allocation/Deallocation *
  369. * *
  370. ************************************************************************/
  371. static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
  372. /**
  373. * xmlRegCalloc2:
  374. * @dim1: size of first dimension
  375. * @dim2: size of second dimension
  376. * @elemSize: size of element
  377. *
  378. * Allocate a two-dimensional array and set all elements to zero.
  379. *
  380. * Returns the new array or NULL in case of error.
  381. */
  382. static void*
  383. xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
  384. size_t totalSize;
  385. void *ret;
  386. /* Check for overflow */
  387. if (dim1 > SIZE_MAX / dim2 / elemSize)
  388. return (NULL);
  389. totalSize = dim1 * dim2 * elemSize;
  390. ret = xmlMalloc(totalSize);
  391. if (ret != NULL)
  392. memset(ret, 0, totalSize);
  393. return (ret);
  394. }
  395. /**
  396. * xmlRegEpxFromParse:
  397. * @ctxt: the parser context used to build it
  398. *
  399. * Allocate a new regexp and fill it with the result from the parser
  400. *
  401. * Returns the new regexp or NULL in case of error
  402. */
  403. static xmlRegexpPtr
  404. xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
  405. xmlRegexpPtr ret;
  406. ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
  407. if (ret == NULL) {
  408. xmlRegexpErrMemory(ctxt, "compiling regexp");
  409. return(NULL);
  410. }
  411. memset(ret, 0, sizeof(xmlRegexp));
  412. ret->string = ctxt->string;
  413. ret->nbStates = ctxt->nbStates;
  414. ret->states = ctxt->states;
  415. ret->nbAtoms = ctxt->nbAtoms;
  416. ret->atoms = ctxt->atoms;
  417. ret->nbCounters = ctxt->nbCounters;
  418. ret->counters = ctxt->counters;
  419. ret->determinist = ctxt->determinist;
  420. ret->flags = ctxt->flags;
  421. if (ret->determinist == -1) {
  422. xmlRegexpIsDeterminist(ret);
  423. }
  424. if ((ret->determinist != 0) &&
  425. (ret->nbCounters == 0) &&
  426. (ctxt->negs == 0) &&
  427. (ret->atoms != NULL) &&
  428. (ret->atoms[0] != NULL) &&
  429. (ret->atoms[0]->type == XML_REGEXP_STRING)) {
  430. int i, j, nbstates = 0, nbatoms = 0;
  431. int *stateRemap;
  432. int *stringRemap;
  433. int *transitions;
  434. void **transdata;
  435. xmlChar **stringMap;
  436. xmlChar *value;
  437. /*
  438. * Switch to a compact representation
  439. * 1/ counting the effective number of states left
  440. * 2/ counting the unique number of atoms, and check that
  441. * they are all of the string type
  442. * 3/ build a table state x atom for the transitions
  443. */
  444. stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
  445. if (stateRemap == NULL) {
  446. xmlRegexpErrMemory(ctxt, "compiling regexp");
  447. xmlFree(ret);
  448. return(NULL);
  449. }
  450. for (i = 0;i < ret->nbStates;i++) {
  451. if (ret->states[i] != NULL) {
  452. stateRemap[i] = nbstates;
  453. nbstates++;
  454. } else {
  455. stateRemap[i] = -1;
  456. }
  457. }
  458. #ifdef DEBUG_COMPACTION
  459. printf("Final: %d states\n", nbstates);
  460. #endif
  461. stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
  462. if (stringMap == NULL) {
  463. xmlRegexpErrMemory(ctxt, "compiling regexp");
  464. xmlFree(stateRemap);
  465. xmlFree(ret);
  466. return(NULL);
  467. }
  468. stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
  469. if (stringRemap == NULL) {
  470. xmlRegexpErrMemory(ctxt, "compiling regexp");
  471. xmlFree(stringMap);
  472. xmlFree(stateRemap);
  473. xmlFree(ret);
  474. return(NULL);
  475. }
  476. for (i = 0;i < ret->nbAtoms;i++) {
  477. if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
  478. (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
  479. value = ret->atoms[i]->valuep;
  480. for (j = 0;j < nbatoms;j++) {
  481. if (xmlStrEqual(stringMap[j], value)) {
  482. stringRemap[i] = j;
  483. break;
  484. }
  485. }
  486. if (j >= nbatoms) {
  487. stringRemap[i] = nbatoms;
  488. stringMap[nbatoms] = xmlStrdup(value);
  489. if (stringMap[nbatoms] == NULL) {
  490. for (i = 0;i < nbatoms;i++)
  491. xmlFree(stringMap[i]);
  492. xmlFree(stringRemap);
  493. xmlFree(stringMap);
  494. xmlFree(stateRemap);
  495. xmlFree(ret);
  496. return(NULL);
  497. }
  498. nbatoms++;
  499. }
  500. } else {
  501. xmlFree(stateRemap);
  502. xmlFree(stringRemap);
  503. for (i = 0;i < nbatoms;i++)
  504. xmlFree(stringMap[i]);
  505. xmlFree(stringMap);
  506. xmlFree(ret);
  507. return(NULL);
  508. }
  509. }
  510. #ifdef DEBUG_COMPACTION
  511. printf("Final: %d atoms\n", nbatoms);
  512. #endif
  513. transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
  514. sizeof(int));
  515. if (transitions == NULL) {
  516. xmlFree(stateRemap);
  517. xmlFree(stringRemap);
  518. for (i = 0;i < nbatoms;i++)
  519. xmlFree(stringMap[i]);
  520. xmlFree(stringMap);
  521. xmlFree(ret);
  522. return(NULL);
  523. }
  524. /*
  525. * Allocate the transition table. The first entry for each
  526. * state corresponds to the state type.
  527. */
  528. transdata = NULL;
  529. for (i = 0;i < ret->nbStates;i++) {
  530. int stateno, atomno, targetno, prev;
  531. xmlRegStatePtr state;
  532. xmlRegTransPtr trans;
  533. stateno = stateRemap[i];
  534. if (stateno == -1)
  535. continue;
  536. state = ret->states[i];
  537. transitions[stateno * (nbatoms + 1)] = state->type;
  538. for (j = 0;j < state->nbTrans;j++) {
  539. trans = &(state->trans[j]);
  540. if ((trans->to == -1) || (trans->atom == NULL))
  541. continue;
  542. atomno = stringRemap[trans->atom->no];
  543. if ((trans->atom->data != NULL) && (transdata == NULL)) {
  544. transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
  545. sizeof(void *));
  546. if (transdata == NULL) {
  547. xmlRegexpErrMemory(ctxt, "compiling regexp");
  548. break;
  549. }
  550. }
  551. targetno = stateRemap[trans->to];
  552. /*
  553. * if the same atom can generate transitions to 2 different
  554. * states then it means the automata is not deterministic and
  555. * the compact form can't be used !
  556. */
  557. prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
  558. if (prev != 0) {
  559. if (prev != targetno + 1) {
  560. ret->determinist = 0;
  561. #ifdef DEBUG_COMPACTION
  562. printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
  563. i, j, trans->atom->no, trans->to, atomno, targetno);
  564. printf(" previous to is %d\n", prev);
  565. #endif
  566. if (transdata != NULL)
  567. xmlFree(transdata);
  568. xmlFree(transitions);
  569. xmlFree(stateRemap);
  570. xmlFree(stringRemap);
  571. for (i = 0;i < nbatoms;i++)
  572. xmlFree(stringMap[i]);
  573. xmlFree(stringMap);
  574. goto not_determ;
  575. }
  576. } else {
  577. #if 0
  578. printf("State %d trans %d: atom %d to %d : %d to %d\n",
  579. i, j, trans->atom->no, trans->to, atomno, targetno);
  580. #endif
  581. transitions[stateno * (nbatoms + 1) + atomno + 1] =
  582. targetno + 1; /* to avoid 0 */
  583. if (transdata != NULL)
  584. transdata[stateno * nbatoms + atomno] =
  585. trans->atom->data;
  586. }
  587. }
  588. }
  589. ret->determinist = 1;
  590. #ifdef DEBUG_COMPACTION
  591. /*
  592. * Debug
  593. */
  594. for (i = 0;i < nbstates;i++) {
  595. for (j = 0;j < nbatoms + 1;j++) {
  596. printf("%02d ", transitions[i * (nbatoms + 1) + j]);
  597. }
  598. printf("\n");
  599. }
  600. printf("\n");
  601. #endif
  602. /*
  603. * Cleanup of the old data
  604. */
  605. if (ret->states != NULL) {
  606. for (i = 0;i < ret->nbStates;i++)
  607. xmlRegFreeState(ret->states[i]);
  608. xmlFree(ret->states);
  609. }
  610. ret->states = NULL;
  611. ret->nbStates = 0;
  612. if (ret->atoms != NULL) {
  613. for (i = 0;i < ret->nbAtoms;i++)
  614. xmlRegFreeAtom(ret->atoms[i]);
  615. xmlFree(ret->atoms);
  616. }
  617. ret->atoms = NULL;
  618. ret->nbAtoms = 0;
  619. ret->compact = transitions;
  620. ret->transdata = transdata;
  621. ret->stringMap = stringMap;
  622. ret->nbstrings = nbatoms;
  623. ret->nbstates = nbstates;
  624. xmlFree(stateRemap);
  625. xmlFree(stringRemap);
  626. }
  627. not_determ:
  628. ctxt->string = NULL;
  629. ctxt->nbStates = 0;
  630. ctxt->states = NULL;
  631. ctxt->nbAtoms = 0;
  632. ctxt->atoms = NULL;
  633. ctxt->nbCounters = 0;
  634. ctxt->counters = NULL;
  635. return(ret);
  636. }
  637. /**
  638. * xmlRegNewParserCtxt:
  639. * @string: the string to parse
  640. *
  641. * Allocate a new regexp parser context
  642. *
  643. * Returns the new context or NULL in case of error
  644. */
  645. static xmlRegParserCtxtPtr
  646. xmlRegNewParserCtxt(const xmlChar *string) {
  647. xmlRegParserCtxtPtr ret;
  648. ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
  649. if (ret == NULL)
  650. return(NULL);
  651. memset(ret, 0, sizeof(xmlRegParserCtxt));
  652. if (string != NULL)
  653. ret->string = xmlStrdup(string);
  654. ret->cur = ret->string;
  655. ret->neg = 0;
  656. ret->negs = 0;
  657. ret->error = 0;
  658. ret->determinist = -1;
  659. return(ret);
  660. }
  661. /**
  662. * xmlRegNewRange:
  663. * @ctxt: the regexp parser context
  664. * @neg: is that negative
  665. * @type: the type of range
  666. * @start: the start codepoint
  667. * @end: the end codepoint
  668. *
  669. * Allocate a new regexp range
  670. *
  671. * Returns the new range or NULL in case of error
  672. */
  673. static xmlRegRangePtr
  674. xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
  675. int neg, xmlRegAtomType type, int start, int end) {
  676. xmlRegRangePtr ret;
  677. ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
  678. if (ret == NULL) {
  679. xmlRegexpErrMemory(ctxt, "allocating range");
  680. return(NULL);
  681. }
  682. ret->neg = neg;
  683. ret->type = type;
  684. ret->start = start;
  685. ret->end = end;
  686. return(ret);
  687. }
  688. /**
  689. * xmlRegFreeRange:
  690. * @range: the regexp range
  691. *
  692. * Free a regexp range
  693. */
  694. static void
  695. xmlRegFreeRange(xmlRegRangePtr range) {
  696. if (range == NULL)
  697. return;
  698. if (range->blockName != NULL)
  699. xmlFree(range->blockName);
  700. xmlFree(range);
  701. }
  702. /**
  703. * xmlRegCopyRange:
  704. * @range: the regexp range
  705. *
  706. * Copy a regexp range
  707. *
  708. * Returns the new copy or NULL in case of error.
  709. */
  710. static xmlRegRangePtr
  711. xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
  712. xmlRegRangePtr ret;
  713. if (range == NULL)
  714. return(NULL);
  715. ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
  716. range->end);
  717. if (ret == NULL)
  718. return(NULL);
  719. if (range->blockName != NULL) {
  720. ret->blockName = xmlStrdup(range->blockName);
  721. if (ret->blockName == NULL) {
  722. xmlRegexpErrMemory(ctxt, "allocating range");
  723. xmlRegFreeRange(ret);
  724. return(NULL);
  725. }
  726. }
  727. return(ret);
  728. }
  729. /**
  730. * xmlRegNewAtom:
  731. * @ctxt: the regexp parser context
  732. * @type: the type of atom
  733. *
  734. * Allocate a new atom
  735. *
  736. * Returns the new atom or NULL in case of error
  737. */
  738. static xmlRegAtomPtr
  739. xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
  740. xmlRegAtomPtr ret;
  741. ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  742. if (ret == NULL) {
  743. xmlRegexpErrMemory(ctxt, "allocating atom");
  744. return(NULL);
  745. }
  746. memset(ret, 0, sizeof(xmlRegAtom));
  747. ret->type = type;
  748. ret->quant = XML_REGEXP_QUANT_ONCE;
  749. ret->min = 0;
  750. ret->max = 0;
  751. return(ret);
  752. }
  753. /**
  754. * xmlRegFreeAtom:
  755. * @atom: the regexp atom
  756. *
  757. * Free a regexp atom
  758. */
  759. static void
  760. xmlRegFreeAtom(xmlRegAtomPtr atom) {
  761. int i;
  762. if (atom == NULL)
  763. return;
  764. for (i = 0;i < atom->nbRanges;i++)
  765. xmlRegFreeRange(atom->ranges[i]);
  766. if (atom->ranges != NULL)
  767. xmlFree(atom->ranges);
  768. if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
  769. xmlFree(atom->valuep);
  770. if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
  771. xmlFree(atom->valuep2);
  772. if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
  773. xmlFree(atom->valuep);
  774. xmlFree(atom);
  775. }
  776. /**
  777. * xmlRegCopyAtom:
  778. * @ctxt: the regexp parser context
  779. * @atom: the original atom
  780. *
  781. * Allocate a new regexp range
  782. *
  783. * Returns the new atom or NULL in case of error
  784. */
  785. static xmlRegAtomPtr
  786. xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
  787. xmlRegAtomPtr ret;
  788. ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  789. if (ret == NULL) {
  790. xmlRegexpErrMemory(ctxt, "copying atom");
  791. return(NULL);
  792. }
  793. memset(ret, 0, sizeof(xmlRegAtom));
  794. ret->type = atom->type;
  795. ret->quant = atom->quant;
  796. ret->min = atom->min;
  797. ret->max = atom->max;
  798. if (atom->nbRanges > 0) {
  799. int i;
  800. ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
  801. atom->nbRanges);
  802. if (ret->ranges == NULL) {
  803. xmlRegexpErrMemory(ctxt, "copying atom");
  804. goto error;
  805. }
  806. for (i = 0;i < atom->nbRanges;i++) {
  807. ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
  808. if (ret->ranges[i] == NULL)
  809. goto error;
  810. ret->nbRanges = i + 1;
  811. }
  812. }
  813. return(ret);
  814. error:
  815. xmlRegFreeAtom(ret);
  816. return(NULL);
  817. }
  818. static xmlRegStatePtr
  819. xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
  820. xmlRegStatePtr ret;
  821. ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
  822. if (ret == NULL) {
  823. xmlRegexpErrMemory(ctxt, "allocating state");
  824. return(NULL);
  825. }
  826. memset(ret, 0, sizeof(xmlRegState));
  827. ret->type = XML_REGEXP_TRANS_STATE;
  828. ret->mark = XML_REGEXP_MARK_NORMAL;
  829. return(ret);
  830. }
  831. /**
  832. * xmlRegFreeState:
  833. * @state: the regexp state
  834. *
  835. * Free a regexp state
  836. */
  837. static void
  838. xmlRegFreeState(xmlRegStatePtr state) {
  839. if (state == NULL)
  840. return;
  841. if (state->trans != NULL)
  842. xmlFree(state->trans);
  843. if (state->transTo != NULL)
  844. xmlFree(state->transTo);
  845. xmlFree(state);
  846. }
  847. /**
  848. * xmlRegFreeParserCtxt:
  849. * @ctxt: the regexp parser context
  850. *
  851. * Free a regexp parser context
  852. */
  853. static void
  854. xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  855. int i;
  856. if (ctxt == NULL)
  857. return;
  858. if (ctxt->string != NULL)
  859. xmlFree(ctxt->string);
  860. if (ctxt->states != NULL) {
  861. for (i = 0;i < ctxt->nbStates;i++)
  862. xmlRegFreeState(ctxt->states[i]);
  863. xmlFree(ctxt->states);
  864. }
  865. if (ctxt->atoms != NULL) {
  866. for (i = 0;i < ctxt->nbAtoms;i++)
  867. xmlRegFreeAtom(ctxt->atoms[i]);
  868. xmlFree(ctxt->atoms);
  869. }
  870. if (ctxt->counters != NULL)
  871. xmlFree(ctxt->counters);
  872. xmlFree(ctxt);
  873. }
  874. /************************************************************************
  875. * *
  876. * Display of Data structures *
  877. * *
  878. ************************************************************************/
  879. static void
  880. xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
  881. switch (type) {
  882. case XML_REGEXP_EPSILON:
  883. fprintf(output, "epsilon "); break;
  884. case XML_REGEXP_CHARVAL:
  885. fprintf(output, "charval "); break;
  886. case XML_REGEXP_RANGES:
  887. fprintf(output, "ranges "); break;
  888. case XML_REGEXP_SUBREG:
  889. fprintf(output, "subexpr "); break;
  890. case XML_REGEXP_STRING:
  891. fprintf(output, "string "); break;
  892. case XML_REGEXP_ANYCHAR:
  893. fprintf(output, "anychar "); break;
  894. case XML_REGEXP_ANYSPACE:
  895. fprintf(output, "anyspace "); break;
  896. case XML_REGEXP_NOTSPACE:
  897. fprintf(output, "notspace "); break;
  898. case XML_REGEXP_INITNAME:
  899. fprintf(output, "initname "); break;
  900. case XML_REGEXP_NOTINITNAME:
  901. fprintf(output, "notinitname "); break;
  902. case XML_REGEXP_NAMECHAR:
  903. fprintf(output, "namechar "); break;
  904. case XML_REGEXP_NOTNAMECHAR:
  905. fprintf(output, "notnamechar "); break;
  906. case XML_REGEXP_DECIMAL:
  907. fprintf(output, "decimal "); break;
  908. case XML_REGEXP_NOTDECIMAL:
  909. fprintf(output, "notdecimal "); break;
  910. case XML_REGEXP_REALCHAR:
  911. fprintf(output, "realchar "); break;
  912. case XML_REGEXP_NOTREALCHAR:
  913. fprintf(output, "notrealchar "); break;
  914. case XML_REGEXP_LETTER:
  915. fprintf(output, "LETTER "); break;
  916. case XML_REGEXP_LETTER_UPPERCASE:
  917. fprintf(output, "LETTER_UPPERCASE "); break;
  918. case XML_REGEXP_LETTER_LOWERCASE:
  919. fprintf(output, "LETTER_LOWERCASE "); break;
  920. case XML_REGEXP_LETTER_TITLECASE:
  921. fprintf(output, "LETTER_TITLECASE "); break;
  922. case XML_REGEXP_LETTER_MODIFIER:
  923. fprintf(output, "LETTER_MODIFIER "); break;
  924. case XML_REGEXP_LETTER_OTHERS:
  925. fprintf(output, "LETTER_OTHERS "); break;
  926. case XML_REGEXP_MARK:
  927. fprintf(output, "MARK "); break;
  928. case XML_REGEXP_MARK_NONSPACING:
  929. fprintf(output, "MARK_NONSPACING "); break;
  930. case XML_REGEXP_MARK_SPACECOMBINING:
  931. fprintf(output, "MARK_SPACECOMBINING "); break;
  932. case XML_REGEXP_MARK_ENCLOSING:
  933. fprintf(output, "MARK_ENCLOSING "); break;
  934. case XML_REGEXP_NUMBER:
  935. fprintf(output, "NUMBER "); break;
  936. case XML_REGEXP_NUMBER_DECIMAL:
  937. fprintf(output, "NUMBER_DECIMAL "); break;
  938. case XML_REGEXP_NUMBER_LETTER:
  939. fprintf(output, "NUMBER_LETTER "); break;
  940. case XML_REGEXP_NUMBER_OTHERS:
  941. fprintf(output, "NUMBER_OTHERS "); break;
  942. case XML_REGEXP_PUNCT:
  943. fprintf(output, "PUNCT "); break;
  944. case XML_REGEXP_PUNCT_CONNECTOR:
  945. fprintf(output, "PUNCT_CONNECTOR "); break;
  946. case XML_REGEXP_PUNCT_DASH:
  947. fprintf(output, "PUNCT_DASH "); break;
  948. case XML_REGEXP_PUNCT_OPEN:
  949. fprintf(output, "PUNCT_OPEN "); break;
  950. case XML_REGEXP_PUNCT_CLOSE:
  951. fprintf(output, "PUNCT_CLOSE "); break;
  952. case XML_REGEXP_PUNCT_INITQUOTE:
  953. fprintf(output, "PUNCT_INITQUOTE "); break;
  954. case XML_REGEXP_PUNCT_FINQUOTE:
  955. fprintf(output, "PUNCT_FINQUOTE "); break;
  956. case XML_REGEXP_PUNCT_OTHERS:
  957. fprintf(output, "PUNCT_OTHERS "); break;
  958. case XML_REGEXP_SEPAR:
  959. fprintf(output, "SEPAR "); break;
  960. case XML_REGEXP_SEPAR_SPACE:
  961. fprintf(output, "SEPAR_SPACE "); break;
  962. case XML_REGEXP_SEPAR_LINE:
  963. fprintf(output, "SEPAR_LINE "); break;
  964. case XML_REGEXP_SEPAR_PARA:
  965. fprintf(output, "SEPAR_PARA "); break;
  966. case XML_REGEXP_SYMBOL:
  967. fprintf(output, "SYMBOL "); break;
  968. case XML_REGEXP_SYMBOL_MATH:
  969. fprintf(output, "SYMBOL_MATH "); break;
  970. case XML_REGEXP_SYMBOL_CURRENCY:
  971. fprintf(output, "SYMBOL_CURRENCY "); break;
  972. case XML_REGEXP_SYMBOL_MODIFIER:
  973. fprintf(output, "SYMBOL_MODIFIER "); break;
  974. case XML_REGEXP_SYMBOL_OTHERS:
  975. fprintf(output, "SYMBOL_OTHERS "); break;
  976. case XML_REGEXP_OTHER:
  977. fprintf(output, "OTHER "); break;
  978. case XML_REGEXP_OTHER_CONTROL:
  979. fprintf(output, "OTHER_CONTROL "); break;
  980. case XML_REGEXP_OTHER_FORMAT:
  981. fprintf(output, "OTHER_FORMAT "); break;
  982. case XML_REGEXP_OTHER_PRIVATE:
  983. fprintf(output, "OTHER_PRIVATE "); break;
  984. case XML_REGEXP_OTHER_NA:
  985. fprintf(output, "OTHER_NA "); break;
  986. case XML_REGEXP_BLOCK_NAME:
  987. fprintf(output, "BLOCK "); break;
  988. }
  989. }
  990. static void
  991. xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
  992. switch (type) {
  993. case XML_REGEXP_QUANT_EPSILON:
  994. fprintf(output, "epsilon "); break;
  995. case XML_REGEXP_QUANT_ONCE:
  996. fprintf(output, "once "); break;
  997. case XML_REGEXP_QUANT_OPT:
  998. fprintf(output, "? "); break;
  999. case XML_REGEXP_QUANT_MULT:
  1000. fprintf(output, "* "); break;
  1001. case XML_REGEXP_QUANT_PLUS:
  1002. fprintf(output, "+ "); break;
  1003. case XML_REGEXP_QUANT_RANGE:
  1004. fprintf(output, "range "); break;
  1005. case XML_REGEXP_QUANT_ONCEONLY:
  1006. fprintf(output, "onceonly "); break;
  1007. case XML_REGEXP_QUANT_ALL:
  1008. fprintf(output, "all "); break;
  1009. }
  1010. }
  1011. static void
  1012. xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
  1013. fprintf(output, " range: ");
  1014. if (range->neg)
  1015. fprintf(output, "negative ");
  1016. xmlRegPrintAtomType(output, range->type);
  1017. fprintf(output, "%c - %c\n", range->start, range->end);
  1018. }
  1019. static void
  1020. xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
  1021. fprintf(output, " atom: ");
  1022. if (atom == NULL) {
  1023. fprintf(output, "NULL\n");
  1024. return;
  1025. }
  1026. if (atom->neg)
  1027. fprintf(output, "not ");
  1028. xmlRegPrintAtomType(output, atom->type);
  1029. xmlRegPrintQuantType(output, atom->quant);
  1030. if (atom->quant == XML_REGEXP_QUANT_RANGE)
  1031. fprintf(output, "%d-%d ", atom->min, atom->max);
  1032. if (atom->type == XML_REGEXP_STRING)
  1033. fprintf(output, "'%s' ", (char *) atom->valuep);
  1034. if (atom->type == XML_REGEXP_CHARVAL)
  1035. fprintf(output, "char %c\n", atom->codepoint);
  1036. else if (atom->type == XML_REGEXP_RANGES) {
  1037. int i;
  1038. fprintf(output, "%d entries\n", atom->nbRanges);
  1039. for (i = 0; i < atom->nbRanges;i++)
  1040. xmlRegPrintRange(output, atom->ranges[i]);
  1041. } else if (atom->type == XML_REGEXP_SUBREG) {
  1042. fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
  1043. } else {
  1044. fprintf(output, "\n");
  1045. }
  1046. }
  1047. static void
  1048. xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
  1049. fprintf(output, " trans: ");
  1050. if (trans == NULL) {
  1051. fprintf(output, "NULL\n");
  1052. return;
  1053. }
  1054. if (trans->to < 0) {
  1055. fprintf(output, "removed\n");
  1056. return;
  1057. }
  1058. if (trans->nd != 0) {
  1059. if (trans->nd == 2)
  1060. fprintf(output, "last not determinist, ");
  1061. else
  1062. fprintf(output, "not determinist, ");
  1063. }
  1064. if (trans->counter >= 0) {
  1065. fprintf(output, "counted %d, ", trans->counter);
  1066. }
  1067. if (trans->count == REGEXP_ALL_COUNTER) {
  1068. fprintf(output, "all transition, ");
  1069. } else if (trans->count >= 0) {
  1070. fprintf(output, "count based %d, ", trans->count);
  1071. }
  1072. if (trans->atom == NULL) {
  1073. fprintf(output, "epsilon to %d\n", trans->to);
  1074. return;
  1075. }
  1076. if (trans->atom->type == XML_REGEXP_CHARVAL)
  1077. fprintf(output, "char %c ", trans->atom->codepoint);
  1078. fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
  1079. }
  1080. static void
  1081. xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
  1082. int i;
  1083. fprintf(output, " state: ");
  1084. if (state == NULL) {
  1085. fprintf(output, "NULL\n");
  1086. return;
  1087. }
  1088. if (state->type == XML_REGEXP_START_STATE)
  1089. fprintf(output, "START ");
  1090. if (state->type == XML_REGEXP_FINAL_STATE)
  1091. fprintf(output, "FINAL ");
  1092. fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
  1093. for (i = 0;i < state->nbTrans; i++) {
  1094. xmlRegPrintTrans(output, &(state->trans[i]));
  1095. }
  1096. }
  1097. #ifdef DEBUG_REGEXP_GRAPH
  1098. static void
  1099. xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
  1100. int i;
  1101. fprintf(output, " ctxt: ");
  1102. if (ctxt == NULL) {
  1103. fprintf(output, "NULL\n");
  1104. return;
  1105. }
  1106. fprintf(output, "'%s' ", ctxt->string);
  1107. if (ctxt->error)
  1108. fprintf(output, "error ");
  1109. if (ctxt->neg)
  1110. fprintf(output, "neg ");
  1111. fprintf(output, "\n");
  1112. fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
  1113. for (i = 0;i < ctxt->nbAtoms; i++) {
  1114. fprintf(output, " %02d ", i);
  1115. xmlRegPrintAtom(output, ctxt->atoms[i]);
  1116. }
  1117. if (ctxt->atom != NULL) {
  1118. fprintf(output, "current atom:\n");
  1119. xmlRegPrintAtom(output, ctxt->atom);
  1120. }
  1121. fprintf(output, "%d states:", ctxt->nbStates);
  1122. if (ctxt->start != NULL)
  1123. fprintf(output, " start: %d", ctxt->start->no);
  1124. if (ctxt->end != NULL)
  1125. fprintf(output, " end: %d", ctxt->end->no);
  1126. fprintf(output, "\n");
  1127. for (i = 0;i < ctxt->nbStates; i++) {
  1128. xmlRegPrintState(output, ctxt->states[i]);
  1129. }
  1130. fprintf(output, "%d counters:\n", ctxt->nbCounters);
  1131. for (i = 0;i < ctxt->nbCounters; i++) {
  1132. fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
  1133. ctxt->counters[i].max);
  1134. }
  1135. }
  1136. #endif
  1137. /************************************************************************
  1138. * *
  1139. * Finite Automata structures manipulations *
  1140. * *
  1141. ************************************************************************/
  1142. static void
  1143. xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
  1144. int neg, xmlRegAtomType type, int start, int end,
  1145. xmlChar *blockName) {
  1146. xmlRegRangePtr range;
  1147. if (atom == NULL) {
  1148. ERROR("add range: atom is NULL");
  1149. return;
  1150. }
  1151. if (atom->type != XML_REGEXP_RANGES) {
  1152. ERROR("add range: atom is not ranges");
  1153. return;
  1154. }
  1155. if (atom->maxRanges == 0) {
  1156. atom->maxRanges = 4;
  1157. atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
  1158. sizeof(xmlRegRangePtr));
  1159. if (atom->ranges == NULL) {
  1160. xmlRegexpErrMemory(ctxt, "adding ranges");
  1161. atom->maxRanges = 0;
  1162. return;
  1163. }
  1164. } else if (atom->nbRanges >= atom->maxRanges) {
  1165. xmlRegRangePtr *tmp;
  1166. atom->maxRanges *= 2;
  1167. tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
  1168. sizeof(xmlRegRangePtr));
  1169. if (tmp == NULL) {
  1170. xmlRegexpErrMemory(ctxt, "adding ranges");
  1171. atom->maxRanges /= 2;
  1172. return;
  1173. }
  1174. atom->ranges = tmp;
  1175. }
  1176. range = xmlRegNewRange(ctxt, neg, type, start, end);
  1177. if (range == NULL)
  1178. return;
  1179. range->blockName = blockName;
  1180. atom->ranges[atom->nbRanges++] = range;
  1181. }
  1182. static int
  1183. xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
  1184. if (ctxt->maxCounters == 0) {
  1185. ctxt->maxCounters = 4;
  1186. ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
  1187. sizeof(xmlRegCounter));
  1188. if (ctxt->counters == NULL) {
  1189. xmlRegexpErrMemory(ctxt, "allocating counter");
  1190. ctxt->maxCounters = 0;
  1191. return(-1);
  1192. }
  1193. } else if (ctxt->nbCounters >= ctxt->maxCounters) {
  1194. xmlRegCounter *tmp;
  1195. ctxt->maxCounters *= 2;
  1196. tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
  1197. sizeof(xmlRegCounter));
  1198. if (tmp == NULL) {
  1199. xmlRegexpErrMemory(ctxt, "allocating counter");
  1200. ctxt->maxCounters /= 2;
  1201. return(-1);
  1202. }
  1203. ctxt->counters = tmp;
  1204. }
  1205. ctxt->counters[ctxt->nbCounters].min = -1;
  1206. ctxt->counters[ctxt->nbCounters].max = -1;
  1207. return(ctxt->nbCounters++);
  1208. }
  1209. static int
  1210. xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
  1211. if (atom == NULL) {
  1212. ERROR("atom push: atom is NULL");
  1213. return(-1);
  1214. }
  1215. if (ctxt->maxAtoms == 0) {
  1216. ctxt->maxAtoms = 4;
  1217. ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
  1218. sizeof(xmlRegAtomPtr));
  1219. if (ctxt->atoms == NULL) {
  1220. xmlRegexpErrMemory(ctxt, "pushing atom");
  1221. ctxt->maxAtoms = 0;
  1222. return(-1);
  1223. }
  1224. } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
  1225. xmlRegAtomPtr *tmp;
  1226. ctxt->maxAtoms *= 2;
  1227. tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
  1228. sizeof(xmlRegAtomPtr));
  1229. if (tmp == NULL) {
  1230. xmlRegexpErrMemory(ctxt, "allocating counter");
  1231. ctxt->maxAtoms /= 2;
  1232. return(-1);
  1233. }
  1234. ctxt->atoms = tmp;
  1235. }
  1236. atom->no = ctxt->nbAtoms;
  1237. ctxt->atoms[ctxt->nbAtoms++] = atom;
  1238. return(0);
  1239. }
  1240. static void
  1241. xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
  1242. int from) {
  1243. if (target->maxTransTo == 0) {
  1244. target->maxTransTo = 8;
  1245. target->transTo = (int *) xmlMalloc(target->maxTransTo *
  1246. sizeof(int));
  1247. if (target->transTo == NULL) {
  1248. xmlRegexpErrMemory(ctxt, "adding transition");
  1249. target->maxTransTo = 0;
  1250. return;
  1251. }
  1252. } else if (target->nbTransTo >= target->maxTransTo) {
  1253. int *tmp;
  1254. target->maxTransTo *= 2;
  1255. tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
  1256. sizeof(int));
  1257. if (tmp == NULL) {
  1258. xmlRegexpErrMemory(ctxt, "adding transition");
  1259. target->maxTransTo /= 2;
  1260. return;
  1261. }
  1262. target->transTo = tmp;
  1263. }
  1264. target->transTo[target->nbTransTo] = from;
  1265. target->nbTransTo++;
  1266. }
  1267. static void
  1268. xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
  1269. xmlRegAtomPtr atom, xmlRegStatePtr target,
  1270. int counter, int count) {
  1271. int nrtrans;
  1272. if (state == NULL) {
  1273. ERROR("add state: state is NULL");
  1274. return;
  1275. }
  1276. if (target == NULL) {
  1277. ERROR("add state: target is NULL");
  1278. return;
  1279. }
  1280. /*
  1281. * Other routines follow the philosophy 'When in doubt, add a transition'
  1282. * so we check here whether such a transition is already present and, if
  1283. * so, silently ignore this request.
  1284. */
  1285. for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
  1286. xmlRegTransPtr trans = &(state->trans[nrtrans]);
  1287. if ((trans->atom == atom) &&
  1288. (trans->to == target->no) &&
  1289. (trans->counter == counter) &&
  1290. (trans->count == count)) {
  1291. #ifdef DEBUG_REGEXP_GRAPH
  1292. printf("Ignoring duplicate transition from %d to %d\n",
  1293. state->no, target->no);
  1294. #endif
  1295. return;
  1296. }
  1297. }
  1298. if (state->maxTrans == 0) {
  1299. state->maxTrans = 8;
  1300. state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
  1301. sizeof(xmlRegTrans));
  1302. if (state->trans == NULL) {
  1303. xmlRegexpErrMemory(ctxt, "adding transition");
  1304. state->maxTrans = 0;
  1305. return;
  1306. }
  1307. } else if (state->nbTrans >= state->maxTrans) {
  1308. xmlRegTrans *tmp;
  1309. state->maxTrans *= 2;
  1310. tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
  1311. sizeof(xmlRegTrans));
  1312. if (tmp == NULL) {
  1313. xmlRegexpErrMemory(ctxt, "adding transition");
  1314. state->maxTrans /= 2;
  1315. return;
  1316. }
  1317. state->trans = tmp;
  1318. }
  1319. #ifdef DEBUG_REGEXP_GRAPH
  1320. printf("Add trans from %d to %d ", state->no, target->no);
  1321. if (count == REGEXP_ALL_COUNTER)
  1322. printf("all transition\n");
  1323. else if (count >= 0)
  1324. printf("count based %d\n", count);
  1325. else if (counter >= 0)
  1326. printf("counted %d\n", counter);
  1327. else if (atom == NULL)
  1328. printf("epsilon transition\n");
  1329. else if (atom != NULL)
  1330. xmlRegPrintAtom(stdout, atom);
  1331. #endif
  1332. state->trans[state->nbTrans].atom = atom;
  1333. state->trans[state->nbTrans].to = target->no;
  1334. state->trans[state->nbTrans].counter = counter;
  1335. state->trans[state->nbTrans].count = count;
  1336. state->trans[state->nbTrans].nd = 0;
  1337. state->nbTrans++;
  1338. xmlRegStateAddTransTo(ctxt, target, state->no);
  1339. }
  1340. static int
  1341. xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
  1342. if (state == NULL) return(-1);
  1343. if (ctxt->maxStates == 0) {
  1344. ctxt->maxStates = 4;
  1345. ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
  1346. sizeof(xmlRegStatePtr));
  1347. if (ctxt->states == NULL) {
  1348. xmlRegexpErrMemory(ctxt, "adding state");
  1349. ctxt->maxStates = 0;
  1350. return(-1);
  1351. }
  1352. } else if (ctxt->nbStates >= ctxt->maxStates) {
  1353. xmlRegStatePtr *tmp;
  1354. ctxt->maxStates *= 2;
  1355. tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
  1356. sizeof(xmlRegStatePtr));
  1357. if (tmp == NULL) {
  1358. xmlRegexpErrMemory(ctxt, "adding state");
  1359. ctxt->maxStates /= 2;
  1360. return(-1);
  1361. }
  1362. ctxt->states = tmp;
  1363. }
  1364. state->no = ctxt->nbStates;
  1365. ctxt->states[ctxt->nbStates++] = state;
  1366. return(0);
  1367. }
  1368. /**
  1369. * xmlFAGenerateAllTransition:
  1370. * @ctxt: a regexp parser context
  1371. * @from: the from state
  1372. * @to: the target state or NULL for building a new one
  1373. * @lax:
  1374. *
  1375. */
  1376. static void
  1377. xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
  1378. xmlRegStatePtr from, xmlRegStatePtr to,
  1379. int lax) {
  1380. if (to == NULL) {
  1381. to = xmlRegNewState(ctxt);
  1382. xmlRegStatePush(ctxt, to);
  1383. ctxt->state = to;
  1384. }
  1385. if (lax)
  1386. xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
  1387. else
  1388. xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
  1389. }
  1390. /**
  1391. * xmlFAGenerateEpsilonTransition:
  1392. * @ctxt: a regexp parser context
  1393. * @from: the from state
  1394. * @to: the target state or NULL for building a new one
  1395. *
  1396. */
  1397. static void
  1398. xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
  1399. xmlRegStatePtr from, xmlRegStatePtr to) {
  1400. if (to == NULL) {
  1401. to = xmlRegNewState(ctxt);
  1402. xmlRegStatePush(ctxt, to);
  1403. ctxt->state = to;
  1404. }
  1405. xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
  1406. }
  1407. /**
  1408. * xmlFAGenerateCountedEpsilonTransition:
  1409. * @ctxt: a regexp parser context
  1410. * @from: the from state
  1411. * @to: the target state or NULL for building a new one
  1412. * counter: the counter for that transition
  1413. *
  1414. */
  1415. static void
  1416. xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
  1417. xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
  1418. if (to == NULL) {
  1419. to = xmlRegNewState(ctxt);
  1420. xmlRegStatePush(ctxt, to);
  1421. ctxt->state = to;
  1422. }
  1423. xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
  1424. }
  1425. /**
  1426. * xmlFAGenerateCountedTransition:
  1427. * @ctxt: a regexp parser context
  1428. * @from: the from state
  1429. * @to: the target state or NULL for building a new one
  1430. * counter: the counter for that transition
  1431. *
  1432. */
  1433. static void
  1434. xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
  1435. xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
  1436. if (to == NULL) {
  1437. to = xmlRegNewState(ctxt);
  1438. xmlRegStatePush(ctxt, to);
  1439. ctxt->state = to;
  1440. }
  1441. xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
  1442. }
  1443. /**
  1444. * xmlFAGenerateTransitions:
  1445. * @ctxt: a regexp parser context
  1446. * @from: the from state
  1447. * @to: the target state or NULL for building a new one
  1448. * @atom: the atom generating the transition
  1449. *
  1450. * Returns 0 if success and -1 in case of error.
  1451. */
  1452. static int
  1453. xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
  1454. xmlRegStatePtr to, xmlRegAtomPtr atom) {
  1455. xmlRegStatePtr end;
  1456. int nullable = 0;
  1457. if (atom == NULL) {
  1458. ERROR("generate transition: atom == NULL");
  1459. return(-1);
  1460. }
  1461. if (atom->type == XML_REGEXP_SUBREG) {
  1462. /*
  1463. * this is a subexpression handling one should not need to
  1464. * create a new node except for XML_REGEXP_QUANT_RANGE.
  1465. */
  1466. if (xmlRegAtomPush(ctxt, atom) < 0) {
  1467. return(-1);
  1468. }
  1469. if ((to != NULL) && (atom->stop != to) &&
  1470. (atom->quant != XML_REGEXP_QUANT_RANGE)) {
  1471. /*
  1472. * Generate an epsilon transition to link to the target
  1473. */
  1474. xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
  1475. #ifdef DV
  1476. } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
  1477. (atom->quant != XML_REGEXP_QUANT_ONCE)) {
  1478. to = xmlRegNewState(ctxt);
  1479. xmlRegStatePush(ctxt, to);
  1480. ctxt->state = to;
  1481. xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
  1482. #endif
  1483. }
  1484. switch (atom->quant) {
  1485. case XML_REGEXP_QUANT_OPT:
  1486. atom->quant = XML_REGEXP_QUANT_ONCE;
  1487. /*
  1488. * transition done to the state after end of atom.
  1489. * 1. set transition from atom start to new state
  1490. * 2. set transition from atom end to this state.
  1491. */
  1492. if (to == NULL) {
  1493. xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
  1494. xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
  1495. ctxt->state);
  1496. } else {
  1497. xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
  1498. }
  1499. break;
  1500. case XML_REGEXP_QUANT_MULT:
  1501. atom->quant = XML_REGEXP_QUANT_ONCE;
  1502. xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
  1503. xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
  1504. break;
  1505. case XML_REGEXP_QUANT_PLUS:
  1506. atom->quant = XML_REGEXP_QUANT_ONCE;
  1507. xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
  1508. break;
  1509. case XML_REGEXP_QUANT_RANGE: {
  1510. int counter;
  1511. xmlRegStatePtr inter, newstate;
  1512. /*
  1513. * create the final state now if needed
  1514. */
  1515. if (to != NULL) {
  1516. newstate = to;
  1517. } else {
  1518. newstate = xmlRegNewState(ctxt);
  1519. xmlRegStatePush(ctxt, newstate);
  1520. }
  1521. /*
  1522. * The principle here is to use counted transition
  1523. * to avoid explosion in the number of states in the
  1524. * graph. This is clearly more complex but should not
  1525. * be exploitable at runtime.
  1526. */
  1527. if ((atom->min == 0) && (atom->start0 == NULL)) {
  1528. xmlRegAtomPtr copy;
  1529. /*
  1530. * duplicate a transition based on atom to count next
  1531. * occurrences after 1. We cannot loop to atom->start
  1532. * directly because we need an epsilon transition to
  1533. * newstate.
  1534. */
  1535. /* ???? For some reason it seems we never reach that
  1536. case, I suppose this got optimized out before when
  1537. building the automata */
  1538. copy = xmlRegCopyAtom(ctxt, atom);
  1539. if (copy == NULL)
  1540. return(-1);
  1541. copy->quant = XML_REGEXP_QUANT_ONCE;
  1542. copy->min = 0;
  1543. copy->max = 0;
  1544. if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
  1545. < 0)
  1546. return(-1);
  1547. inter = ctxt->state;
  1548. counter = xmlRegGetCounter(ctxt);
  1549. ctxt->counters[counter].min = atom->min - 1;
  1550. ctxt->counters[counter].max = atom->max - 1;
  1551. /* count the number of times we see it again */
  1552. xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
  1553. atom->stop, counter);
  1554. /* allow a way out based on the count */
  1555. xmlFAGenerateCountedTransition(ctxt, inter,
  1556. newstate, counter);
  1557. /* and also allow a direct exit for 0 */
  1558. xmlFAGenerateEpsilonTransition(ctxt, atom->start,
  1559. newstate);
  1560. } else {
  1561. /*
  1562. * either we need the atom at least once or there
  1563. * is an atom->start0 allowing to easily plug the
  1564. * epsilon transition.
  1565. */
  1566. counter = xmlRegGetCounter(ctxt);
  1567. ctxt->counters[counter].min = atom->min - 1;
  1568. ctxt->counters[counter].max = atom->max - 1;
  1569. /* count the number of times we see it again */
  1570. xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
  1571. atom->start, counter);
  1572. /* allow a way out based on the count */
  1573. xmlFAGenerateCountedTransition(ctxt, atom->stop,
  1574. newstate, counter);
  1575. /* and if needed allow a direct exit for 0 */
  1576. if (atom->min == 0)
  1577. xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
  1578. newstate);
  1579. }
  1580. atom->min = 0;
  1581. atom->max = 0;
  1582. atom->quant = XML_REGEXP_QUANT_ONCE;
  1583. ctxt->state = newstate;
  1584. }
  1585. default:
  1586. break;
  1587. }
  1588. return(0);
  1589. }
  1590. if ((atom->min == 0) && (atom->max == 0) &&
  1591. (atom->quant == XML_REGEXP_QUANT_RANGE)) {
  1592. /*
  1593. * we can discard the atom and generate an epsilon transition instead
  1594. */
  1595. if (to == NULL) {
  1596. to = xmlRegNewState(ctxt);
  1597. if (to != NULL)
  1598. xmlRegStatePush(ctxt, to);
  1599. else {
  1600. return(-1);
  1601. }
  1602. }
  1603. xmlFAGenerateEpsilonTransition(ctxt, from, to);
  1604. ctxt->state = to;
  1605. xmlRegFreeAtom(atom);
  1606. return(0);
  1607. }
  1608. if (to == NULL) {
  1609. to = xmlRegNewState(ctxt);
  1610. if (to != NULL)
  1611. xmlRegStatePush(ctxt, to);
  1612. else {
  1613. return(-1);
  1614. }
  1615. }
  1616. end = to;
  1617. if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
  1618. (atom->quant == XML_REGEXP_QUANT_PLUS)) {
  1619. /*
  1620. * Do not pollute the target state by adding transitions from
  1621. * it as it is likely to be the shared target of multiple branches.
  1622. * So isolate with an epsilon transition.
  1623. */
  1624. xmlRegStatePtr tmp;
  1625. tmp = xmlRegNewState(ctxt);
  1626. if (tmp != NULL)
  1627. xmlRegStatePush(ctxt, tmp);
  1628. else {
  1629. return(-1);
  1630. }
  1631. xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
  1632. to = tmp;
  1633. }
  1634. if (xmlRegAtomPush(ctxt, atom) < 0) {
  1635. return(-1);
  1636. }
  1637. if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
  1638. (atom->min == 0) && (atom->max > 0)) {
  1639. nullable = 1;
  1640. atom->min = 1;
  1641. if (atom->max == 1)
  1642. atom->quant = XML_REGEXP_QUANT_OPT;
  1643. }
  1644. xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
  1645. ctxt->state = end;
  1646. switch (atom->quant) {
  1647. case XML_REGEXP_QUANT_OPT:
  1648. atom->quant = XML_REGEXP_QUANT_ONCE;
  1649. xmlFAGenerateEpsilonTransition(ctxt, from, to);
  1650. break;
  1651. case XML_REGEXP_QUANT_MULT:
  1652. atom->quant = XML_REGEXP_QUANT_ONCE;
  1653. xmlFAGenerateEpsilonTransition(ctxt, from, to);
  1654. xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
  1655. break;
  1656. case XML_REGEXP_QUANT_PLUS:
  1657. atom->quant = XML_REGEXP_QUANT_ONCE;
  1658. xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
  1659. break;
  1660. case XML_REGEXP_QUANT_RANGE:
  1661. if (nullable)
  1662. xmlFAGenerateEpsilonTransition(ctxt, from, to);
  1663. break;
  1664. default:
  1665. break;
  1666. }
  1667. return(0);
  1668. }
  1669. /**
  1670. * xmlFAReduceEpsilonTransitions:
  1671. * @ctxt: a regexp parser context
  1672. * @fromnr: the from state
  1673. * @tonr: the to state
  1674. * @counter: should that transition be associated to a counted
  1675. *
  1676. */
  1677. static void
  1678. xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
  1679. int tonr, int counter) {
  1680. int transnr;
  1681. xmlRegStatePtr from;
  1682. xmlRegStatePtr to;
  1683. #ifdef DEBUG_REGEXP_GRAPH
  1684. printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
  1685. #endif
  1686. from = ctxt->states[fromnr];
  1687. if (from == NULL)
  1688. return;
  1689. to = ctxt->states[tonr];
  1690. if (to == NULL)
  1691. return;
  1692. if ((to->mark == XML_REGEXP_MARK_START) ||
  1693. (to->mark == XML_REGEXP_MARK_VISITED))
  1694. return;
  1695. to->mark = XML_REGEXP_MARK_VISITED;
  1696. if (to->type == XML_REGEXP_FINAL_STATE) {
  1697. #ifdef DEBUG_REGEXP_GRAPH
  1698. printf("State %d is final, so %d becomes final\n", tonr, fromnr);
  1699. #endif
  1700. from->type = XML_REGEXP_FINAL_STATE;
  1701. }
  1702. for (transnr = 0;transnr < to->nbTrans;transnr++) {
  1703. if (to->trans[transnr].to < 0)
  1704. continue;
  1705. if (to->trans[transnr].atom == NULL) {
  1706. /*
  1707. * Don't remove counted transitions
  1708. * Don't loop either
  1709. */
  1710. if (to->trans[transnr].to != fromnr) {
  1711. if (to->trans[transnr].count >= 0) {
  1712. int newto = to->trans[transnr].to;
  1713. xmlRegStateAddTrans(ctxt, from, NULL,
  1714. ctxt->states[newto],
  1715. -1, to->trans[transnr].count);
  1716. } else {
  1717. #ifdef DEBUG_REGEXP_GRAPH
  1718. printf("Found epsilon trans %d from %d to %d\n",
  1719. transnr, tonr, to->trans[transnr].to);
  1720. #endif
  1721. if (to->trans[transnr].counter >= 0) {
  1722. xmlFAReduceEpsilonTransitions(ctxt, fromnr,
  1723. to->trans[transnr].to,
  1724. to->trans[transnr].counter);
  1725. } else {
  1726. xmlFAReduceEpsilonTransitions(ctxt, fromnr,
  1727. to->trans[transnr].to,
  1728. counter);
  1729. }
  1730. }
  1731. }
  1732. } else {
  1733. int newto = to->trans[transnr].to;
  1734. if (to->trans[transnr].counter >= 0) {
  1735. xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
  1736. ctxt->states[newto],
  1737. to->trans[transnr].counter, -1);
  1738. } else {
  1739. xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
  1740. ctxt->states[newto], counter, -1);
  1741. }
  1742. }
  1743. }
  1744. to->mark = XML_REGEXP_MARK_NORMAL;
  1745. }
  1746. /**
  1747. * xmlFAEliminateSimpleEpsilonTransitions:
  1748. * @ctxt: a regexp parser context
  1749. *
  1750. * Eliminating general epsilon transitions can get costly in the general
  1751. * algorithm due to the large amount of generated new transitions and
  1752. * associated comparisons. However for simple epsilon transition used just
  1753. * to separate building blocks when generating the automata this can be
  1754. * reduced to state elimination:
  1755. * - if there exists an epsilon from X to Y
  1756. * - if there is no other transition from X
  1757. * then X and Y are semantically equivalent and X can be eliminated
  1758. * If X is the start state then make Y the start state, else replace the
  1759. * target of all transitions to X by transitions to Y.
  1760. *
  1761. * If X is a final state, skip it.
  1762. * Otherwise it would be necessary to manipulate counters for this case when
  1763. * eliminating state 2:
  1764. * State 1 has a transition with an atom to state 2.
  1765. * State 2 is final and has an epsilon transition to state 1.
  1766. */
  1767. static void
  1768. xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  1769. int statenr, i, j, newto;
  1770. xmlRegStatePtr state, tmp;
  1771. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  1772. state = ctxt->states[statenr];
  1773. if (state == NULL)
  1774. continue;
  1775. if (state->nbTrans != 1)
  1776. continue;
  1777. if (state->type == XML_REGEXP_UNREACH_STATE ||
  1778. state->type == XML_REGEXP_FINAL_STATE)
  1779. continue;
  1780. /* is the only transition out a basic transition */
  1781. if ((state->trans[0].atom == NULL) &&
  1782. (state->trans[0].to >= 0) &&
  1783. (state->trans[0].to != statenr) &&
  1784. (state->trans[0].counter < 0) &&
  1785. (state->trans[0].count < 0)) {
  1786. newto = state->trans[0].to;
  1787. if (state->type == XML_REGEXP_START_STATE) {
  1788. #ifdef DEBUG_REGEXP_GRAPH
  1789. printf("Found simple epsilon trans from start %d to %d\n",
  1790. statenr, newto);
  1791. #endif
  1792. } else {
  1793. #ifdef DEBUG_REGEXP_GRAPH
  1794. printf("Found simple epsilon trans from %d to %d\n",
  1795. statenr, newto);
  1796. #endif
  1797. for (i = 0;i < state->nbTransTo;i++) {
  1798. tmp = ctxt->states[state->transTo[i]];
  1799. for (j = 0;j < tmp->nbTrans;j++) {
  1800. if (tmp->trans[j].to == statenr) {
  1801. #ifdef DEBUG_REGEXP_GRAPH
  1802. printf("Changed transition %d on %d to go to %d\n",
  1803. j, tmp->no, newto);
  1804. #endif
  1805. tmp->trans[j].to = -1;
  1806. xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
  1807. ctxt->states[newto],
  1808. tmp->trans[j].counter,
  1809. tmp->trans[j].count);
  1810. }
  1811. }
  1812. }
  1813. if (state->type == XML_REGEXP_FINAL_STATE)
  1814. ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
  1815. /* eliminate the transition completely */
  1816. state->nbTrans = 0;
  1817. state->type = XML_REGEXP_UNREACH_STATE;
  1818. }
  1819. }
  1820. }
  1821. }
  1822. /**
  1823. * xmlFAEliminateEpsilonTransitions:
  1824. * @ctxt: a regexp parser context
  1825. *
  1826. */
  1827. static void
  1828. xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  1829. int statenr, transnr;
  1830. xmlRegStatePtr state;
  1831. int has_epsilon;
  1832. if (ctxt->states == NULL) return;
  1833. /*
  1834. * Eliminate simple epsilon transition and the associated unreachable
  1835. * states.
  1836. */
  1837. xmlFAEliminateSimpleEpsilonTransitions(ctxt);
  1838. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  1839. state = ctxt->states[statenr];
  1840. if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
  1841. #ifdef DEBUG_REGEXP_GRAPH
  1842. printf("Removed unreachable state %d\n", statenr);
  1843. #endif
  1844. xmlRegFreeState(state);
  1845. ctxt->states[statenr] = NULL;
  1846. }
  1847. }
  1848. has_epsilon = 0;
  1849. /*
  1850. * Build the completed transitions bypassing the epsilons
  1851. * Use a marking algorithm to avoid loops
  1852. * Mark sink states too.
  1853. * Process from the latest states backward to the start when
  1854. * there is long cascading epsilon chains this minimize the
  1855. * recursions and transition compares when adding the new ones
  1856. */
  1857. for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
  1858. state = ctxt->states[statenr];
  1859. if (state == NULL)
  1860. continue;
  1861. if ((state->nbTrans == 0) &&
  1862. (state->type != XML_REGEXP_FINAL_STATE)) {
  1863. state->type = XML_REGEXP_SINK_STATE;
  1864. }
  1865. for (transnr = 0;transnr < state->nbTrans;transnr++) {
  1866. if ((state->trans[transnr].atom == NULL) &&
  1867. (state->trans[transnr].to >= 0)) {
  1868. if (state->trans[transnr].to == statenr) {
  1869. state->trans[transnr].to = -1;
  1870. #ifdef DEBUG_REGEXP_GRAPH
  1871. printf("Removed loopback epsilon trans %d on %d\n",
  1872. transnr, statenr);
  1873. #endif
  1874. } else if (state->trans[transnr].count < 0) {
  1875. int newto = state->trans[transnr].to;
  1876. #ifdef DEBUG_REGEXP_GRAPH
  1877. printf("Found epsilon trans %d from %d to %d\n",
  1878. transnr, statenr, newto);
  1879. #endif
  1880. has_epsilon = 1;
  1881. state->trans[transnr].to = -2;
  1882. state->mark = XML_REGEXP_MARK_START;
  1883. xmlFAReduceEpsilonTransitions(ctxt, statenr,
  1884. newto, state->trans[transnr].counter);
  1885. state->mark = XML_REGEXP_MARK_NORMAL;
  1886. #ifdef DEBUG_REGEXP_GRAPH
  1887. } else {
  1888. printf("Found counted transition %d on %d\n",
  1889. transnr, statenr);
  1890. #endif
  1891. }
  1892. }
  1893. }
  1894. }
  1895. /*
  1896. * Eliminate the epsilon transitions
  1897. */
  1898. if (has_epsilon) {
  1899. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  1900. state = ctxt->states[statenr];
  1901. if (state == NULL)
  1902. continue;
  1903. for (transnr = 0;transnr < state->nbTrans;transnr++) {
  1904. xmlRegTransPtr trans = &(state->trans[transnr]);
  1905. if ((trans->atom == NULL) &&
  1906. (trans->count < 0) &&
  1907. (trans->to >= 0)) {
  1908. trans->to = -1;
  1909. }
  1910. }
  1911. }
  1912. }
  1913. /*
  1914. * Use this pass to detect unreachable states too
  1915. */
  1916. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  1917. state = ctxt->states[statenr];
  1918. if (state != NULL)
  1919. state->reached = XML_REGEXP_MARK_NORMAL;
  1920. }
  1921. state = ctxt->states[0];
  1922. if (state != NULL)
  1923. state->reached = XML_REGEXP_MARK_START;
  1924. while (state != NULL) {
  1925. xmlRegStatePtr target = NULL;
  1926. state->reached = XML_REGEXP_MARK_VISITED;
  1927. /*
  1928. * Mark all states reachable from the current reachable state
  1929. */
  1930. for (transnr = 0;transnr < state->nbTrans;transnr++) {
  1931. if ((state->trans[transnr].to >= 0) &&
  1932. ((state->trans[transnr].atom != NULL) ||
  1933. (state->trans[transnr].count >= 0))) {
  1934. int newto = state->trans[transnr].to;
  1935. if (ctxt->states[newto] == NULL)
  1936. continue;
  1937. if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
  1938. ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
  1939. target = ctxt->states[newto];
  1940. }
  1941. }
  1942. }
  1943. /*
  1944. * find the next accessible state not explored
  1945. */
  1946. if (target == NULL) {
  1947. for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
  1948. state = ctxt->states[statenr];
  1949. if ((state != NULL) && (state->reached ==
  1950. XML_REGEXP_MARK_START)) {
  1951. target = state;
  1952. break;
  1953. }
  1954. }
  1955. }
  1956. state = target;
  1957. }
  1958. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  1959. state = ctxt->states[statenr];
  1960. if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
  1961. #ifdef DEBUG_REGEXP_GRAPH
  1962. printf("Removed unreachable state %d\n", statenr);
  1963. #endif
  1964. xmlRegFreeState(state);
  1965. ctxt->states[statenr] = NULL;
  1966. }
  1967. }
  1968. }
  1969. static int
  1970. xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
  1971. int ret = 0;
  1972. if ((range1->type == XML_REGEXP_RANGES) ||
  1973. (range2->type == XML_REGEXP_RANGES) ||
  1974. (range2->type == XML_REGEXP_SUBREG) ||
  1975. (range1->type == XML_REGEXP_SUBREG) ||
  1976. (range1->type == XML_REGEXP_STRING) ||
  1977. (range2->type == XML_REGEXP_STRING))
  1978. return(-1);
  1979. /* put them in order */
  1980. if (range1->type > range2->type) {
  1981. xmlRegRangePtr tmp;
  1982. tmp = range1;
  1983. range1 = range2;
  1984. range2 = tmp;
  1985. }
  1986. if ((range1->type == XML_REGEXP_ANYCHAR) ||
  1987. (range2->type == XML_REGEXP_ANYCHAR)) {
  1988. ret = 1;
  1989. } else if ((range1->type == XML_REGEXP_EPSILON) ||
  1990. (range2->type == XML_REGEXP_EPSILON)) {
  1991. return(0);
  1992. } else if (range1->type == range2->type) {
  1993. if (range1->type != XML_REGEXP_CHARVAL)
  1994. ret = 1;
  1995. else if ((range1->end < range2->start) ||
  1996. (range2->end < range1->start))
  1997. ret = 0;
  1998. else
  1999. ret = 1;
  2000. } else if (range1->type == XML_REGEXP_CHARVAL) {
  2001. int codepoint;
  2002. int neg = 0;
  2003. /*
  2004. * just check all codepoints in the range for acceptance,
  2005. * this is usually way cheaper since done only once at
  2006. * compilation than testing over and over at runtime or
  2007. * pushing too many states when evaluating.
  2008. */
  2009. if (((range1->neg == 0) && (range2->neg != 0)) ||
  2010. ((range1->neg != 0) && (range2->neg == 0)))
  2011. neg = 1;
  2012. for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
  2013. ret = xmlRegCheckCharacterRange(range2->type, codepoint,
  2014. 0, range2->start, range2->end,
  2015. range2->blockName);
  2016. if (ret < 0)
  2017. return(-1);
  2018. if (((neg == 1) && (ret == 0)) ||
  2019. ((neg == 0) && (ret == 1)))
  2020. return(1);
  2021. }
  2022. return(0);
  2023. } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
  2024. (range2->type == XML_REGEXP_BLOCK_NAME)) {
  2025. if (range1->type == range2->type) {
  2026. ret = xmlStrEqual(range1->blockName, range2->blockName);
  2027. } else {
  2028. /*
  2029. * comparing a block range with anything else is way
  2030. * too costly, and maintaining the table is like too much
  2031. * memory too, so let's force the automata to save state
  2032. * here.
  2033. */
  2034. return(1);
  2035. }
  2036. } else if ((range1->type < XML_REGEXP_LETTER) ||
  2037. (range2->type < XML_REGEXP_LETTER)) {
  2038. if ((range1->type == XML_REGEXP_ANYSPACE) &&
  2039. (range2->type == XML_REGEXP_NOTSPACE))
  2040. ret = 0;
  2041. else if ((range1->type == XML_REGEXP_INITNAME) &&
  2042. (range2->type == XML_REGEXP_NOTINITNAME))
  2043. ret = 0;
  2044. else if ((range1->type == XML_REGEXP_NAMECHAR) &&
  2045. (range2->type == XML_REGEXP_NOTNAMECHAR))
  2046. ret = 0;
  2047. else if ((range1->type == XML_REGEXP_DECIMAL) &&
  2048. (range2->type == XML_REGEXP_NOTDECIMAL))
  2049. ret = 0;
  2050. else if ((range1->type == XML_REGEXP_REALCHAR) &&
  2051. (range2->type == XML_REGEXP_NOTREALCHAR))
  2052. ret = 0;
  2053. else {
  2054. /* same thing to limit complexity */
  2055. return(1);
  2056. }
  2057. } else {
  2058. ret = 0;
  2059. /* range1->type < range2->type here */
  2060. switch (range1->type) {
  2061. case XML_REGEXP_LETTER:
  2062. /* all disjoint except in the subgroups */
  2063. if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
  2064. (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
  2065. (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
  2066. (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
  2067. (range2->type == XML_REGEXP_LETTER_OTHERS))
  2068. ret = 1;
  2069. break;
  2070. case XML_REGEXP_MARK:
  2071. if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
  2072. (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
  2073. (range2->type == XML_REGEXP_MARK_ENCLOSING))
  2074. ret = 1;
  2075. break;
  2076. case XML_REGEXP_NUMBER:
  2077. if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
  2078. (range2->type == XML_REGEXP_NUMBER_LETTER) ||
  2079. (range2->type == XML_REGEXP_NUMBER_OTHERS))
  2080. ret = 1;
  2081. break;
  2082. case XML_REGEXP_PUNCT:
  2083. if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
  2084. (range2->type == XML_REGEXP_PUNCT_DASH) ||
  2085. (range2->type == XML_REGEXP_PUNCT_OPEN) ||
  2086. (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
  2087. (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
  2088. (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
  2089. (range2->type == XML_REGEXP_PUNCT_OTHERS))
  2090. ret = 1;
  2091. break;
  2092. case XML_REGEXP_SEPAR:
  2093. if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
  2094. (range2->type == XML_REGEXP_SEPAR_LINE) ||
  2095. (range2->type == XML_REGEXP_SEPAR_PARA))
  2096. ret = 1;
  2097. break;
  2098. case XML_REGEXP_SYMBOL:
  2099. if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
  2100. (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
  2101. (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
  2102. (range2->type == XML_REGEXP_SYMBOL_OTHERS))
  2103. ret = 1;
  2104. break;
  2105. case XML_REGEXP_OTHER:
  2106. if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
  2107. (range2->type == XML_REGEXP_OTHER_FORMAT) ||
  2108. (range2->type == XML_REGEXP_OTHER_PRIVATE))
  2109. ret = 1;
  2110. break;
  2111. default:
  2112. if ((range2->type >= XML_REGEXP_LETTER) &&
  2113. (range2->type < XML_REGEXP_BLOCK_NAME))
  2114. ret = 0;
  2115. else {
  2116. /* safety net ! */
  2117. return(1);
  2118. }
  2119. }
  2120. }
  2121. if (((range1->neg == 0) && (range2->neg != 0)) ||
  2122. ((range1->neg != 0) && (range2->neg == 0)))
  2123. ret = !ret;
  2124. return(ret);
  2125. }
  2126. /**
  2127. * xmlFACompareAtomTypes:
  2128. * @type1: an atom type
  2129. * @type2: an atom type
  2130. *
  2131. * Compares two atoms type to check whether they intersect in some ways,
  2132. * this is used by xmlFACompareAtoms only
  2133. *
  2134. * Returns 1 if they may intersect and 0 otherwise
  2135. */
  2136. static int
  2137. xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
  2138. if ((type1 == XML_REGEXP_EPSILON) ||
  2139. (type1 == XML_REGEXP_CHARVAL) ||
  2140. (type1 == XML_REGEXP_RANGES) ||
  2141. (type1 == XML_REGEXP_SUBREG) ||
  2142. (type1 == XML_REGEXP_STRING) ||
  2143. (type1 == XML_REGEXP_ANYCHAR))
  2144. return(1);
  2145. if ((type2 == XML_REGEXP_EPSILON) ||
  2146. (type2 == XML_REGEXP_CHARVAL) ||
  2147. (type2 == XML_REGEXP_RANGES) ||
  2148. (type2 == XML_REGEXP_SUBREG) ||
  2149. (type2 == XML_REGEXP_STRING) ||
  2150. (type2 == XML_REGEXP_ANYCHAR))
  2151. return(1);
  2152. if (type1 == type2) return(1);
  2153. /* simplify subsequent compares by making sure type1 < type2 */
  2154. if (type1 > type2) {
  2155. xmlRegAtomType tmp = type1;
  2156. type1 = type2;
  2157. type2 = tmp;
  2158. }
  2159. switch (type1) {
  2160. case XML_REGEXP_ANYSPACE: /* \s */
  2161. /* can't be a letter, number, mark, punctuation, symbol */
  2162. if ((type2 == XML_REGEXP_NOTSPACE) ||
  2163. ((type2 >= XML_REGEXP_LETTER) &&
  2164. (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
  2165. ((type2 >= XML_REGEXP_NUMBER) &&
  2166. (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
  2167. ((type2 >= XML_REGEXP_MARK) &&
  2168. (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
  2169. ((type2 >= XML_REGEXP_PUNCT) &&
  2170. (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
  2171. ((type2 >= XML_REGEXP_SYMBOL) &&
  2172. (type2 <= XML_REGEXP_SYMBOL_OTHERS))
  2173. ) return(0);
  2174. break;
  2175. case XML_REGEXP_NOTSPACE: /* \S */
  2176. break;
  2177. case XML_REGEXP_INITNAME: /* \l */
  2178. /* can't be a number, mark, separator, punctuation, symbol or other */
  2179. if ((type2 == XML_REGEXP_NOTINITNAME) ||
  2180. ((type2 >= XML_REGEXP_NUMBER) &&
  2181. (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
  2182. ((type2 >= XML_REGEXP_MARK) &&
  2183. (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
  2184. ((type2 >= XML_REGEXP_SEPAR) &&
  2185. (type2 <= XML_REGEXP_SEPAR_PARA)) ||
  2186. ((type2 >= XML_REGEXP_PUNCT) &&
  2187. (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
  2188. ((type2 >= XML_REGEXP_SYMBOL) &&
  2189. (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
  2190. ((type2 >= XML_REGEXP_OTHER) &&
  2191. (type2 <= XML_REGEXP_OTHER_NA))
  2192. ) return(0);
  2193. break;
  2194. case XML_REGEXP_NOTINITNAME: /* \L */
  2195. break;
  2196. case XML_REGEXP_NAMECHAR: /* \c */
  2197. /* can't be a mark, separator, punctuation, symbol or other */
  2198. if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
  2199. ((type2 >= XML_REGEXP_MARK) &&
  2200. (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
  2201. ((type2 >= XML_REGEXP_PUNCT) &&
  2202. (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
  2203. ((type2 >= XML_REGEXP_SEPAR) &&
  2204. (type2 <= XML_REGEXP_SEPAR_PARA)) ||
  2205. ((type2 >= XML_REGEXP_SYMBOL) &&
  2206. (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
  2207. ((type2 >= XML_REGEXP_OTHER) &&
  2208. (type2 <= XML_REGEXP_OTHER_NA))
  2209. ) return(0);
  2210. break;
  2211. case XML_REGEXP_NOTNAMECHAR: /* \C */
  2212. break;
  2213. case XML_REGEXP_DECIMAL: /* \d */
  2214. /* can't be a letter, mark, separator, punctuation, symbol or other */
  2215. if ((type2 == XML_REGEXP_NOTDECIMAL) ||
  2216. (type2 == XML_REGEXP_REALCHAR) ||
  2217. ((type2 >= XML_REGEXP_LETTER) &&
  2218. (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
  2219. ((type2 >= XML_REGEXP_MARK) &&
  2220. (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
  2221. ((type2 >= XML_REGEXP_PUNCT) &&
  2222. (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
  2223. ((type2 >= XML_REGEXP_SEPAR) &&
  2224. (type2 <= XML_REGEXP_SEPAR_PARA)) ||
  2225. ((type2 >= XML_REGEXP_SYMBOL) &&
  2226. (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
  2227. ((type2 >= XML_REGEXP_OTHER) &&
  2228. (type2 <= XML_REGEXP_OTHER_NA))
  2229. )return(0);
  2230. break;
  2231. case XML_REGEXP_NOTDECIMAL: /* \D */
  2232. break;
  2233. case XML_REGEXP_REALCHAR: /* \w */
  2234. /* can't be a mark, separator, punctuation, symbol or other */
  2235. if ((type2 == XML_REGEXP_NOTDECIMAL) ||
  2236. ((type2 >= XML_REGEXP_MARK) &&
  2237. (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
  2238. ((type2 >= XML_REGEXP_PUNCT) &&
  2239. (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
  2240. ((type2 >= XML_REGEXP_SEPAR) &&
  2241. (type2 <= XML_REGEXP_SEPAR_PARA)) ||
  2242. ((type2 >= XML_REGEXP_SYMBOL) &&
  2243. (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
  2244. ((type2 >= XML_REGEXP_OTHER) &&
  2245. (type2 <= XML_REGEXP_OTHER_NA))
  2246. )return(0);
  2247. break;
  2248. case XML_REGEXP_NOTREALCHAR: /* \W */
  2249. break;
  2250. /*
  2251. * at that point we know both type 1 and type2 are from
  2252. * character categories are ordered and are different,
  2253. * it becomes simple because this is a partition
  2254. */
  2255. case XML_REGEXP_LETTER:
  2256. if (type2 <= XML_REGEXP_LETTER_OTHERS)
  2257. return(1);
  2258. return(0);
  2259. case XML_REGEXP_LETTER_UPPERCASE:
  2260. case XML_REGEXP_LETTER_LOWERCASE:
  2261. case XML_REGEXP_LETTER_TITLECASE:
  2262. case XML_REGEXP_LETTER_MODIFIER:
  2263. case XML_REGEXP_LETTER_OTHERS:
  2264. return(0);
  2265. case XML_REGEXP_MARK:
  2266. if (type2 <= XML_REGEXP_MARK_ENCLOSING)
  2267. return(1);
  2268. return(0);
  2269. case XML_REGEXP_MARK_NONSPACING:
  2270. case XML_REGEXP_MARK_SPACECOMBINING:
  2271. case XML_REGEXP_MARK_ENCLOSING:
  2272. return(0);
  2273. case XML_REGEXP_NUMBER:
  2274. if (type2 <= XML_REGEXP_NUMBER_OTHERS)
  2275. return(1);
  2276. return(0);
  2277. case XML_REGEXP_NUMBER_DECIMAL:
  2278. case XML_REGEXP_NUMBER_LETTER:
  2279. case XML_REGEXP_NUMBER_OTHERS:
  2280. return(0);
  2281. case XML_REGEXP_PUNCT:
  2282. if (type2 <= XML_REGEXP_PUNCT_OTHERS)
  2283. return(1);
  2284. return(0);
  2285. case XML_REGEXP_PUNCT_CONNECTOR:
  2286. case XML_REGEXP_PUNCT_DASH:
  2287. case XML_REGEXP_PUNCT_OPEN:
  2288. case XML_REGEXP_PUNCT_CLOSE:
  2289. case XML_REGEXP_PUNCT_INITQUOTE:
  2290. case XML_REGEXP_PUNCT_FINQUOTE:
  2291. case XML_REGEXP_PUNCT_OTHERS:
  2292. return(0);
  2293. case XML_REGEXP_SEPAR:
  2294. if (type2 <= XML_REGEXP_SEPAR_PARA)
  2295. return(1);
  2296. return(0);
  2297. case XML_REGEXP_SEPAR_SPACE:
  2298. case XML_REGEXP_SEPAR_LINE:
  2299. case XML_REGEXP_SEPAR_PARA:
  2300. return(0);
  2301. case XML_REGEXP_SYMBOL:
  2302. if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
  2303. return(1);
  2304. return(0);
  2305. case XML_REGEXP_SYMBOL_MATH:
  2306. case XML_REGEXP_SYMBOL_CURRENCY:
  2307. case XML_REGEXP_SYMBOL_MODIFIER:
  2308. case XML_REGEXP_SYMBOL_OTHERS:
  2309. return(0);
  2310. case XML_REGEXP_OTHER:
  2311. if (type2 <= XML_REGEXP_OTHER_NA)
  2312. return(1);
  2313. return(0);
  2314. case XML_REGEXP_OTHER_CONTROL:
  2315. case XML_REGEXP_OTHER_FORMAT:
  2316. case XML_REGEXP_OTHER_PRIVATE:
  2317. case XML_REGEXP_OTHER_NA:
  2318. return(0);
  2319. default:
  2320. break;
  2321. }
  2322. return(1);
  2323. }
  2324. /**
  2325. * xmlFAEqualAtoms:
  2326. * @atom1: an atom
  2327. * @atom2: an atom
  2328. * @deep: if not set only compare string pointers
  2329. *
  2330. * Compares two atoms to check whether they are the same exactly
  2331. * this is used to remove equivalent transitions
  2332. *
  2333. * Returns 1 if same and 0 otherwise
  2334. */
  2335. static int
  2336. xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
  2337. int ret = 0;
  2338. if (atom1 == atom2)
  2339. return(1);
  2340. if ((atom1 == NULL) || (atom2 == NULL))
  2341. return(0);
  2342. if (atom1->type != atom2->type)
  2343. return(0);
  2344. switch (atom1->type) {
  2345. case XML_REGEXP_EPSILON:
  2346. ret = 0;
  2347. break;
  2348. case XML_REGEXP_STRING:
  2349. if (!deep)
  2350. ret = (atom1->valuep == atom2->valuep);
  2351. else
  2352. ret = xmlStrEqual((xmlChar *)atom1->valuep,
  2353. (xmlChar *)atom2->valuep);
  2354. break;
  2355. case XML_REGEXP_CHARVAL:
  2356. ret = (atom1->codepoint == atom2->codepoint);
  2357. break;
  2358. case XML_REGEXP_RANGES:
  2359. /* too hard to do in the general case */
  2360. ret = 0;
  2361. default:
  2362. break;
  2363. }
  2364. return(ret);
  2365. }
  2366. /**
  2367. * xmlFACompareAtoms:
  2368. * @atom1: an atom
  2369. * @atom2: an atom
  2370. * @deep: if not set only compare string pointers
  2371. *
  2372. * Compares two atoms to check whether they intersect in some ways,
  2373. * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
  2374. *
  2375. * Returns 1 if yes and 0 otherwise
  2376. */
  2377. static int
  2378. xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
  2379. int ret = 1;
  2380. if (atom1 == atom2)
  2381. return(1);
  2382. if ((atom1 == NULL) || (atom2 == NULL))
  2383. return(0);
  2384. if ((atom1->type == XML_REGEXP_ANYCHAR) ||
  2385. (atom2->type == XML_REGEXP_ANYCHAR))
  2386. return(1);
  2387. if (atom1->type > atom2->type) {
  2388. xmlRegAtomPtr tmp;
  2389. tmp = atom1;
  2390. atom1 = atom2;
  2391. atom2 = tmp;
  2392. }
  2393. if (atom1->type != atom2->type) {
  2394. ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
  2395. /* if they can't intersect at the type level break now */
  2396. if (ret == 0)
  2397. return(0);
  2398. }
  2399. switch (atom1->type) {
  2400. case XML_REGEXP_STRING:
  2401. if (!deep)
  2402. ret = (atom1->valuep != atom2->valuep);
  2403. else {
  2404. xmlChar *val1 = (xmlChar *)atom1->valuep;
  2405. xmlChar *val2 = (xmlChar *)atom2->valuep;
  2406. int compound1 = (xmlStrchr(val1, '|') != NULL);
  2407. int compound2 = (xmlStrchr(val2, '|') != NULL);
  2408. /* Ignore negative match flag for ##other namespaces */
  2409. if (compound1 != compound2)
  2410. return(0);
  2411. ret = xmlRegStrEqualWildcard(val1, val2);
  2412. }
  2413. break;
  2414. case XML_REGEXP_EPSILON:
  2415. goto not_determinist;
  2416. case XML_REGEXP_CHARVAL:
  2417. if (atom2->type == XML_REGEXP_CHARVAL) {
  2418. ret = (atom1->codepoint == atom2->codepoint);
  2419. } else {
  2420. ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
  2421. if (ret < 0)
  2422. ret = 1;
  2423. }
  2424. break;
  2425. case XML_REGEXP_RANGES:
  2426. if (atom2->type == XML_REGEXP_RANGES) {
  2427. int i, j, res;
  2428. xmlRegRangePtr r1, r2;
  2429. /*
  2430. * need to check that none of the ranges eventually matches
  2431. */
  2432. for (i = 0;i < atom1->nbRanges;i++) {
  2433. for (j = 0;j < atom2->nbRanges;j++) {
  2434. r1 = atom1->ranges[i];
  2435. r2 = atom2->ranges[j];
  2436. res = xmlFACompareRanges(r1, r2);
  2437. if (res == 1) {
  2438. ret = 1;
  2439. goto done;
  2440. }
  2441. }
  2442. }
  2443. ret = 0;
  2444. }
  2445. break;
  2446. default:
  2447. goto not_determinist;
  2448. }
  2449. done:
  2450. if (atom1->neg != atom2->neg) {
  2451. ret = !ret;
  2452. }
  2453. if (ret == 0)
  2454. return(0);
  2455. not_determinist:
  2456. return(1);
  2457. }
  2458. /**
  2459. * xmlFARecurseDeterminism:
  2460. * @ctxt: a regexp parser context
  2461. *
  2462. * Check whether the associated regexp is determinist,
  2463. * should be called after xmlFAEliminateEpsilonTransitions()
  2464. *
  2465. */
  2466. static int
  2467. xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
  2468. int to, xmlRegAtomPtr atom) {
  2469. int ret = 1;
  2470. int res;
  2471. int transnr, nbTrans;
  2472. xmlRegTransPtr t1;
  2473. int deep = 1;
  2474. if (state == NULL)
  2475. return(ret);
  2476. if (state->markd == XML_REGEXP_MARK_VISITED)
  2477. return(ret);
  2478. if (ctxt->flags & AM_AUTOMATA_RNG)
  2479. deep = 0;
  2480. /*
  2481. * don't recurse on transitions potentially added in the course of
  2482. * the elimination.
  2483. */
  2484. nbTrans = state->nbTrans;
  2485. for (transnr = 0;transnr < nbTrans;transnr++) {
  2486. t1 = &(state->trans[transnr]);
  2487. /*
  2488. * check transitions conflicting with the one looked at
  2489. */
  2490. if (t1->atom == NULL) {
  2491. if (t1->to < 0)
  2492. continue;
  2493. state->markd = XML_REGEXP_MARK_VISITED;
  2494. res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
  2495. to, atom);
  2496. if (res == 0) {
  2497. ret = 0;
  2498. /* t1->nd = 1; */
  2499. }
  2500. continue;
  2501. }
  2502. if (t1->to != to)
  2503. continue;
  2504. if (xmlFACompareAtoms(t1->atom, atom, deep)) {
  2505. ret = 0;
  2506. /* mark the transition as non-deterministic */
  2507. t1->nd = 1;
  2508. }
  2509. }
  2510. return(ret);
  2511. }
  2512. /**
  2513. * xmlFAFinishRecurseDeterminism:
  2514. * @ctxt: a regexp parser context
  2515. *
  2516. * Reset flags after checking determinism.
  2517. */
  2518. static void
  2519. xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
  2520. int transnr, nbTrans;
  2521. if (state == NULL)
  2522. return;
  2523. if (state->markd != XML_REGEXP_MARK_VISITED)
  2524. return;
  2525. state->markd = 0;
  2526. nbTrans = state->nbTrans;
  2527. for (transnr = 0; transnr < nbTrans; transnr++) {
  2528. xmlRegTransPtr t1 = &state->trans[transnr];
  2529. if ((t1->atom == NULL) && (t1->to >= 0))
  2530. xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
  2531. }
  2532. }
  2533. /**
  2534. * xmlFAComputesDeterminism:
  2535. * @ctxt: a regexp parser context
  2536. *
  2537. * Check whether the associated regexp is determinist,
  2538. * should be called after xmlFAEliminateEpsilonTransitions()
  2539. *
  2540. */
  2541. static int
  2542. xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
  2543. int statenr, transnr;
  2544. xmlRegStatePtr state;
  2545. xmlRegTransPtr t1, t2, last;
  2546. int i;
  2547. int ret = 1;
  2548. int deep = 1;
  2549. #ifdef DEBUG_REGEXP_GRAPH
  2550. printf("xmlFAComputesDeterminism\n");
  2551. xmlRegPrintCtxt(stdout, ctxt);
  2552. #endif
  2553. if (ctxt->determinist != -1)
  2554. return(ctxt->determinist);
  2555. if (ctxt->flags & AM_AUTOMATA_RNG)
  2556. deep = 0;
  2557. /*
  2558. * First cleanup the automata removing cancelled transitions
  2559. */
  2560. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  2561. state = ctxt->states[statenr];
  2562. if (state == NULL)
  2563. continue;
  2564. if (state->nbTrans < 2)
  2565. continue;
  2566. for (transnr = 0;transnr < state->nbTrans;transnr++) {
  2567. t1 = &(state->trans[transnr]);
  2568. /*
  2569. * Determinism checks in case of counted or all transitions
  2570. * will have to be handled separately
  2571. */
  2572. if (t1->atom == NULL) {
  2573. /* t1->nd = 1; */
  2574. continue;
  2575. }
  2576. if (t1->to == -1) /* eliminated */
  2577. continue;
  2578. for (i = 0;i < transnr;i++) {
  2579. t2 = &(state->trans[i]);
  2580. if (t2->to == -1) /* eliminated */
  2581. continue;
  2582. if (t2->atom != NULL) {
  2583. if (t1->to == t2->to) {
  2584. /*
  2585. * Here we use deep because we want to keep the
  2586. * transitions which indicate a conflict
  2587. */
  2588. if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
  2589. (t1->counter == t2->counter) &&
  2590. (t1->count == t2->count))
  2591. t2->to = -1; /* eliminated */
  2592. }
  2593. }
  2594. }
  2595. }
  2596. }
  2597. /*
  2598. * Check for all states that there aren't 2 transitions
  2599. * with the same atom and a different target.
  2600. */
  2601. for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
  2602. state = ctxt->states[statenr];
  2603. if (state == NULL)
  2604. continue;
  2605. if (state->nbTrans < 2)
  2606. continue;
  2607. last = NULL;
  2608. for (transnr = 0;transnr < state->nbTrans;transnr++) {
  2609. t1 = &(state->trans[transnr]);
  2610. /*
  2611. * Determinism checks in case of counted or all transitions
  2612. * will have to be handled separately
  2613. */
  2614. if (t1->atom == NULL) {
  2615. continue;
  2616. }
  2617. if (t1->to == -1) /* eliminated */
  2618. continue;
  2619. for (i = 0;i < transnr;i++) {
  2620. t2 = &(state->trans[i]);
  2621. if (t2->to == -1) /* eliminated */
  2622. continue;
  2623. if (t2->atom != NULL) {
  2624. /*
  2625. * But here we don't use deep because we want to
  2626. * find transitions which indicate a conflict
  2627. */
  2628. if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
  2629. ret = 0;
  2630. /* mark the transitions as non-deterministic ones */
  2631. t1->nd = 1;
  2632. t2->nd = 1;
  2633. last = t1;
  2634. }
  2635. } else if (t1->to != -1) {
  2636. /*
  2637. * do the closure in case of remaining specific
  2638. * epsilon transitions like choices or all
  2639. */
  2640. ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
  2641. t2->to, t2->atom);
  2642. xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
  2643. /* don't shortcut the computation so all non deterministic
  2644. transition get marked down
  2645. if (ret == 0)
  2646. return(0);
  2647. */
  2648. if (ret == 0) {
  2649. t1->nd = 1;
  2650. /* t2->nd = 1; */
  2651. last = t1;
  2652. }
  2653. }
  2654. }
  2655. /* don't shortcut the computation so all non deterministic
  2656. transition get marked down
  2657. if (ret == 0)
  2658. break; */
  2659. }
  2660. /*
  2661. * mark specifically the last non-deterministic transition
  2662. * from a state since there is no need to set-up rollback
  2663. * from it
  2664. */
  2665. if (last != NULL) {
  2666. last->nd = 2;
  2667. }
  2668. /* don't shortcut the computation so all non deterministic
  2669. transition get marked down
  2670. if (ret == 0)
  2671. break; */
  2672. }
  2673. ctxt->determinist = ret;
  2674. return(ret);
  2675. }
  2676. /************************************************************************
  2677. * *
  2678. * Routines to check input against transition atoms *
  2679. * *
  2680. ************************************************************************/
  2681. static int
  2682. xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
  2683. int start, int end, const xmlChar *blockName) {
  2684. int ret = 0;
  2685. switch (type) {
  2686. case XML_REGEXP_STRING:
  2687. case XML_REGEXP_SUBREG:
  2688. case XML_REGEXP_RANGES:
  2689. case XML_REGEXP_EPSILON:
  2690. return(-1);
  2691. case XML_REGEXP_ANYCHAR:
  2692. ret = ((codepoint != '\n') && (codepoint != '\r'));
  2693. break;
  2694. case XML_REGEXP_CHARVAL:
  2695. ret = ((codepoint >= start) && (codepoint <= end));
  2696. break;
  2697. case XML_REGEXP_NOTSPACE:
  2698. neg = !neg;
  2699. /* Falls through. */
  2700. case XML_REGEXP_ANYSPACE:
  2701. ret = ((codepoint == '\n') || (codepoint == '\r') ||
  2702. (codepoint == '\t') || (codepoint == ' '));
  2703. break;
  2704. case XML_REGEXP_NOTINITNAME:
  2705. neg = !neg;
  2706. /* Falls through. */
  2707. case XML_REGEXP_INITNAME:
  2708. ret = (IS_LETTER(codepoint) ||
  2709. (codepoint == '_') || (codepoint == ':'));
  2710. break;
  2711. case XML_REGEXP_NOTNAMECHAR:
  2712. neg = !neg;
  2713. /* Falls through. */
  2714. case XML_REGEXP_NAMECHAR:
  2715. ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
  2716. (codepoint == '.') || (codepoint == '-') ||
  2717. (codepoint == '_') || (codepoint == ':') ||
  2718. IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
  2719. break;
  2720. case XML_REGEXP_NOTDECIMAL:
  2721. neg = !neg;
  2722. /* Falls through. */
  2723. case XML_REGEXP_DECIMAL:
  2724. ret = xmlUCSIsCatNd(codepoint);
  2725. break;
  2726. case XML_REGEXP_REALCHAR:
  2727. neg = !neg;
  2728. /* Falls through. */
  2729. case XML_REGEXP_NOTREALCHAR:
  2730. ret = xmlUCSIsCatP(codepoint);
  2731. if (ret == 0)
  2732. ret = xmlUCSIsCatZ(codepoint);
  2733. if (ret == 0)
  2734. ret = xmlUCSIsCatC(codepoint);
  2735. break;
  2736. case XML_REGEXP_LETTER:
  2737. ret = xmlUCSIsCatL(codepoint);
  2738. break;
  2739. case XML_REGEXP_LETTER_UPPERCASE:
  2740. ret = xmlUCSIsCatLu(codepoint);
  2741. break;
  2742. case XML_REGEXP_LETTER_LOWERCASE:
  2743. ret = xmlUCSIsCatLl(codepoint);
  2744. break;
  2745. case XML_REGEXP_LETTER_TITLECASE:
  2746. ret = xmlUCSIsCatLt(codepoint);
  2747. break;
  2748. case XML_REGEXP_LETTER_MODIFIER:
  2749. ret = xmlUCSIsCatLm(codepoint);
  2750. break;
  2751. case XML_REGEXP_LETTER_OTHERS:
  2752. ret = xmlUCSIsCatLo(codepoint);
  2753. break;
  2754. case XML_REGEXP_MARK:
  2755. ret = xmlUCSIsCatM(codepoint);
  2756. break;
  2757. case XML_REGEXP_MARK_NONSPACING:
  2758. ret = xmlUCSIsCatMn(codepoint);
  2759. break;
  2760. case XML_REGEXP_MARK_SPACECOMBINING:
  2761. ret = xmlUCSIsCatMc(codepoint);
  2762. break;
  2763. case XML_REGEXP_MARK_ENCLOSING:
  2764. ret = xmlUCSIsCatMe(codepoint);
  2765. break;
  2766. case XML_REGEXP_NUMBER:
  2767. ret = xmlUCSIsCatN(codepoint);
  2768. break;
  2769. case XML_REGEXP_NUMBER_DECIMAL:
  2770. ret = xmlUCSIsCatNd(codepoint);
  2771. break;
  2772. case XML_REGEXP_NUMBER_LETTER:
  2773. ret = xmlUCSIsCatNl(codepoint);
  2774. break;
  2775. case XML_REGEXP_NUMBER_OTHERS:
  2776. ret = xmlUCSIsCatNo(codepoint);
  2777. break;
  2778. case XML_REGEXP_PUNCT:
  2779. ret = xmlUCSIsCatP(codepoint);
  2780. break;
  2781. case XML_REGEXP_PUNCT_CONNECTOR:
  2782. ret = xmlUCSIsCatPc(codepoint);
  2783. break;
  2784. case XML_REGEXP_PUNCT_DASH:
  2785. ret = xmlUCSIsCatPd(codepoint);
  2786. break;
  2787. case XML_REGEXP_PUNCT_OPEN:
  2788. ret = xmlUCSIsCatPs(codepoint);
  2789. break;
  2790. case XML_REGEXP_PUNCT_CLOSE:
  2791. ret = xmlUCSIsCatPe(codepoint);
  2792. break;
  2793. case XML_REGEXP_PUNCT_INITQUOTE:
  2794. ret = xmlUCSIsCatPi(codepoint);
  2795. break;
  2796. case XML_REGEXP_PUNCT_FINQUOTE:
  2797. ret = xmlUCSIsCatPf(codepoint);
  2798. break;
  2799. case XML_REGEXP_PUNCT_OTHERS:
  2800. ret = xmlUCSIsCatPo(codepoint);
  2801. break;
  2802. case XML_REGEXP_SEPAR:
  2803. ret = xmlUCSIsCatZ(codepoint);
  2804. break;
  2805. case XML_REGEXP_SEPAR_SPACE:
  2806. ret = xmlUCSIsCatZs(codepoint);
  2807. break;
  2808. case XML_REGEXP_SEPAR_LINE:
  2809. ret = xmlUCSIsCatZl(codepoint);
  2810. break;
  2811. case XML_REGEXP_SEPAR_PARA:
  2812. ret = xmlUCSIsCatZp(codepoint);
  2813. break;
  2814. case XML_REGEXP_SYMBOL:
  2815. ret = xmlUCSIsCatS(codepoint);
  2816. break;
  2817. case XML_REGEXP_SYMBOL_MATH:
  2818. ret = xmlUCSIsCatSm(codepoint);
  2819. break;
  2820. case XML_REGEXP_SYMBOL_CURRENCY:
  2821. ret = xmlUCSIsCatSc(codepoint);
  2822. break;
  2823. case XML_REGEXP_SYMBOL_MODIFIER:
  2824. ret = xmlUCSIsCatSk(codepoint);
  2825. break;
  2826. case XML_REGEXP_SYMBOL_OTHERS:
  2827. ret = xmlUCSIsCatSo(codepoint);
  2828. break;
  2829. case XML_REGEXP_OTHER:
  2830. ret = xmlUCSIsCatC(codepoint);
  2831. break;
  2832. case XML_REGEXP_OTHER_CONTROL:
  2833. ret = xmlUCSIsCatCc(codepoint);
  2834. break;
  2835. case XML_REGEXP_OTHER_FORMAT:
  2836. ret = xmlUCSIsCatCf(codepoint);
  2837. break;
  2838. case XML_REGEXP_OTHER_PRIVATE:
  2839. ret = xmlUCSIsCatCo(codepoint);
  2840. break;
  2841. case XML_REGEXP_OTHER_NA:
  2842. /* ret = xmlUCSIsCatCn(codepoint); */
  2843. /* Seems it doesn't exist anymore in recent Unicode releases */
  2844. ret = 0;
  2845. break;
  2846. case XML_REGEXP_BLOCK_NAME:
  2847. ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
  2848. break;
  2849. }
  2850. if (neg)
  2851. return(!ret);
  2852. return(ret);
  2853. }
  2854. static int
  2855. xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
  2856. int i, ret = 0;
  2857. xmlRegRangePtr range;
  2858. if ((atom == NULL) || (!IS_CHAR(codepoint)))
  2859. return(-1);
  2860. switch (atom->type) {
  2861. case XML_REGEXP_SUBREG:
  2862. case XML_REGEXP_EPSILON:
  2863. return(-1);
  2864. case XML_REGEXP_CHARVAL:
  2865. return(codepoint == atom->codepoint);
  2866. case XML_REGEXP_RANGES: {
  2867. int accept = 0;
  2868. for (i = 0;i < atom->nbRanges;i++) {
  2869. range = atom->ranges[i];
  2870. if (range->neg == 2) {
  2871. ret = xmlRegCheckCharacterRange(range->type, codepoint,
  2872. 0, range->start, range->end,
  2873. range->blockName);
  2874. if (ret != 0)
  2875. return(0); /* excluded char */
  2876. } else if (range->neg) {
  2877. ret = xmlRegCheckCharacterRange(range->type, codepoint,
  2878. 0, range->start, range->end,
  2879. range->blockName);
  2880. if (ret == 0)
  2881. accept = 1;
  2882. else
  2883. return(0);
  2884. } else {
  2885. ret = xmlRegCheckCharacterRange(range->type, codepoint,
  2886. 0, range->start, range->end,
  2887. range->blockName);
  2888. if (ret != 0)
  2889. accept = 1; /* might still be excluded */
  2890. }
  2891. }
  2892. return(accept);
  2893. }
  2894. case XML_REGEXP_STRING:
  2895. printf("TODO: XML_REGEXP_STRING\n");
  2896. return(-1);
  2897. case XML_REGEXP_ANYCHAR:
  2898. case XML_REGEXP_ANYSPACE:
  2899. case XML_REGEXP_NOTSPACE:
  2900. case XML_REGEXP_INITNAME:
  2901. case XML_REGEXP_NOTINITNAME:
  2902. case XML_REGEXP_NAMECHAR:
  2903. case XML_REGEXP_NOTNAMECHAR:
  2904. case XML_REGEXP_DECIMAL:
  2905. case XML_REGEXP_NOTDECIMAL:
  2906. case XML_REGEXP_REALCHAR:
  2907. case XML_REGEXP_NOTREALCHAR:
  2908. case XML_REGEXP_LETTER:
  2909. case XML_REGEXP_LETTER_UPPERCASE:
  2910. case XML_REGEXP_LETTER_LOWERCASE:
  2911. case XML_REGEXP_LETTER_TITLECASE:
  2912. case XML_REGEXP_LETTER_MODIFIER:
  2913. case XML_REGEXP_LETTER_OTHERS:
  2914. case XML_REGEXP_MARK:
  2915. case XML_REGEXP_MARK_NONSPACING:
  2916. case XML_REGEXP_MARK_SPACECOMBINING:
  2917. case XML_REGEXP_MARK_ENCLOSING:
  2918. case XML_REGEXP_NUMBER:
  2919. case XML_REGEXP_NUMBER_DECIMAL:
  2920. case XML_REGEXP_NUMBER_LETTER:
  2921. case XML_REGEXP_NUMBER_OTHERS:
  2922. case XML_REGEXP_PUNCT:
  2923. case XML_REGEXP_PUNCT_CONNECTOR:
  2924. case XML_REGEXP_PUNCT_DASH:
  2925. case XML_REGEXP_PUNCT_OPEN:
  2926. case XML_REGEXP_PUNCT_CLOSE:
  2927. case XML_REGEXP_PUNCT_INITQUOTE:
  2928. case XML_REGEXP_PUNCT_FINQUOTE:
  2929. case XML_REGEXP_PUNCT_OTHERS:
  2930. case XML_REGEXP_SEPAR:
  2931. case XML_REGEXP_SEPAR_SPACE:
  2932. case XML_REGEXP_SEPAR_LINE:
  2933. case XML_REGEXP_SEPAR_PARA:
  2934. case XML_REGEXP_SYMBOL:
  2935. case XML_REGEXP_SYMBOL_MATH:
  2936. case XML_REGEXP_SYMBOL_CURRENCY:
  2937. case XML_REGEXP_SYMBOL_MODIFIER:
  2938. case XML_REGEXP_SYMBOL_OTHERS:
  2939. case XML_REGEXP_OTHER:
  2940. case XML_REGEXP_OTHER_CONTROL:
  2941. case XML_REGEXP_OTHER_FORMAT:
  2942. case XML_REGEXP_OTHER_PRIVATE:
  2943. case XML_REGEXP_OTHER_NA:
  2944. case XML_REGEXP_BLOCK_NAME:
  2945. ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
  2946. (const xmlChar *)atom->valuep);
  2947. if (atom->neg)
  2948. ret = !ret;
  2949. break;
  2950. }
  2951. return(ret);
  2952. }
  2953. /************************************************************************
  2954. * *
  2955. * Saving and restoring state of an execution context *
  2956. * *
  2957. ************************************************************************/
  2958. #ifdef DEBUG_REGEXP_EXEC
  2959. static void
  2960. xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
  2961. printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
  2962. if (exec->inputStack != NULL) {
  2963. int i;
  2964. printf(": ");
  2965. for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
  2966. printf("%s ", (const char *)
  2967. exec->inputStack[exec->inputStackNr - (i + 1)].value);
  2968. } else {
  2969. printf(": %s", &(exec->inputString[exec->index]));
  2970. }
  2971. printf("\n");
  2972. }
  2973. #endif
  2974. static void
  2975. xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
  2976. #ifdef DEBUG_REGEXP_EXEC
  2977. printf("saving ");
  2978. exec->transno++;
  2979. xmlFARegDebugExec(exec);
  2980. exec->transno--;
  2981. #endif
  2982. #ifdef MAX_PUSH
  2983. if (exec->nbPush > MAX_PUSH) {
  2984. return;
  2985. }
  2986. exec->nbPush++;
  2987. #endif
  2988. if (exec->maxRollbacks == 0) {
  2989. exec->maxRollbacks = 4;
  2990. exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
  2991. sizeof(xmlRegExecRollback));
  2992. if (exec->rollbacks == NULL) {
  2993. xmlRegexpErrMemory(NULL, "saving regexp");
  2994. exec->maxRollbacks = 0;
  2995. return;
  2996. }
  2997. memset(exec->rollbacks, 0,
  2998. exec->maxRollbacks * sizeof(xmlRegExecRollback));
  2999. } else if (exec->nbRollbacks >= exec->maxRollbacks) {
  3000. xmlRegExecRollback *tmp;
  3001. int len = exec->maxRollbacks;
  3002. exec->maxRollbacks *= 2;
  3003. tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
  3004. exec->maxRollbacks * sizeof(xmlRegExecRollback));
  3005. if (tmp == NULL) {
  3006. xmlRegexpErrMemory(NULL, "saving regexp");
  3007. exec->maxRollbacks /= 2;
  3008. return;
  3009. }
  3010. exec->rollbacks = tmp;
  3011. tmp = &exec->rollbacks[len];
  3012. memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
  3013. }
  3014. exec->rollbacks[exec->nbRollbacks].state = exec->state;
  3015. exec->rollbacks[exec->nbRollbacks].index = exec->index;
  3016. exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
  3017. if (exec->comp->nbCounters > 0) {
  3018. if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
  3019. exec->rollbacks[exec->nbRollbacks].counts = (int *)
  3020. xmlMalloc(exec->comp->nbCounters * sizeof(int));
  3021. if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
  3022. xmlRegexpErrMemory(NULL, "saving regexp");
  3023. exec->status = -5;
  3024. return;
  3025. }
  3026. }
  3027. memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
  3028. exec->comp->nbCounters * sizeof(int));
  3029. }
  3030. exec->nbRollbacks++;
  3031. }
  3032. static void
  3033. xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
  3034. if (exec->nbRollbacks <= 0) {
  3035. exec->status = -1;
  3036. #ifdef DEBUG_REGEXP_EXEC
  3037. printf("rollback failed on empty stack\n");
  3038. #endif
  3039. return;
  3040. }
  3041. exec->nbRollbacks--;
  3042. exec->state = exec->rollbacks[exec->nbRollbacks].state;
  3043. exec->index = exec->rollbacks[exec->nbRollbacks].index;
  3044. exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
  3045. if (exec->comp->nbCounters > 0) {
  3046. if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
  3047. fprintf(stderr, "exec save: allocation failed");
  3048. exec->status = -6;
  3049. return;
  3050. }
  3051. if (exec->counts) {
  3052. memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
  3053. exec->comp->nbCounters * sizeof(int));
  3054. }
  3055. }
  3056. #ifdef DEBUG_REGEXP_EXEC
  3057. printf("restored ");
  3058. xmlFARegDebugExec(exec);
  3059. #endif
  3060. }
  3061. /************************************************************************
  3062. * *
  3063. * Verifier, running an input against a compiled regexp *
  3064. * *
  3065. ************************************************************************/
  3066. static int
  3067. xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
  3068. xmlRegExecCtxt execval;
  3069. xmlRegExecCtxtPtr exec = &execval;
  3070. int ret, codepoint = 0, len, deter;
  3071. exec->inputString = content;
  3072. exec->index = 0;
  3073. exec->nbPush = 0;
  3074. exec->determinist = 1;
  3075. exec->maxRollbacks = 0;
  3076. exec->nbRollbacks = 0;
  3077. exec->rollbacks = NULL;
  3078. exec->status = 0;
  3079. exec->comp = comp;
  3080. exec->state = comp->states[0];
  3081. exec->transno = 0;
  3082. exec->transcount = 0;
  3083. exec->inputStack = NULL;
  3084. exec->inputStackMax = 0;
  3085. if (comp->nbCounters > 0) {
  3086. exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
  3087. if (exec->counts == NULL) {
  3088. xmlRegexpErrMemory(NULL, "running regexp");
  3089. return(-1);
  3090. }
  3091. memset(exec->counts, 0, comp->nbCounters * sizeof(int));
  3092. } else
  3093. exec->counts = NULL;
  3094. while ((exec->status == 0) && (exec->state != NULL) &&
  3095. ((exec->inputString[exec->index] != 0) ||
  3096. ((exec->state != NULL) &&
  3097. (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
  3098. xmlRegTransPtr trans;
  3099. xmlRegAtomPtr atom;
  3100. /*
  3101. * If end of input on non-terminal state, rollback, however we may
  3102. * still have epsilon like transition for counted transitions
  3103. * on counters, in that case don't break too early. Additionally,
  3104. * if we are working on a range like "AB{0,2}", where B is not present,
  3105. * we don't want to break.
  3106. */
  3107. len = 1;
  3108. if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
  3109. /*
  3110. * if there is a transition, we must check if
  3111. * atom allows minOccurs of 0
  3112. */
  3113. if (exec->transno < exec->state->nbTrans) {
  3114. trans = &exec->state->trans[exec->transno];
  3115. if (trans->to >=0) {
  3116. atom = trans->atom;
  3117. if (!((atom->min == 0) && (atom->max > 0)))
  3118. goto rollback;
  3119. }
  3120. } else
  3121. goto rollback;
  3122. }
  3123. exec->transcount = 0;
  3124. for (;exec->transno < exec->state->nbTrans;exec->transno++) {
  3125. trans = &exec->state->trans[exec->transno];
  3126. if (trans->to < 0)
  3127. continue;
  3128. atom = trans->atom;
  3129. ret = 0;
  3130. deter = 1;
  3131. if (trans->count >= 0) {
  3132. int count;
  3133. xmlRegCounterPtr counter;
  3134. if (exec->counts == NULL) {
  3135. exec->status = -1;
  3136. goto error;
  3137. }
  3138. /*
  3139. * A counted transition.
  3140. */
  3141. count = exec->counts[trans->count];
  3142. counter = &exec->comp->counters[trans->count];
  3143. #ifdef DEBUG_REGEXP_EXEC
  3144. printf("testing count %d: val %d, min %d, max %d\n",
  3145. trans->count, count, counter->min, counter->max);
  3146. #endif
  3147. ret = ((count >= counter->min) && (count <= counter->max));
  3148. if ((ret) && (counter->min != counter->max))
  3149. deter = 0;
  3150. } else if (atom == NULL) {
  3151. fprintf(stderr, "epsilon transition left at runtime\n");
  3152. exec->status = -2;
  3153. break;
  3154. } else if (exec->inputString[exec->index] != 0) {
  3155. codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
  3156. ret = xmlRegCheckCharacter(atom, codepoint);
  3157. if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
  3158. xmlRegStatePtr to = comp->states[trans->to];
  3159. /*
  3160. * this is a multiple input sequence
  3161. * If there is a counter associated increment it now.
  3162. * before potentially saving and rollback
  3163. * do not increment if the counter is already over the
  3164. * maximum limit in which case get to next transition
  3165. */
  3166. if (trans->counter >= 0) {
  3167. xmlRegCounterPtr counter;
  3168. if ((exec->counts == NULL) ||
  3169. (exec->comp == NULL) ||
  3170. (exec->comp->counters == NULL)) {
  3171. exec->status = -1;
  3172. goto error;
  3173. }
  3174. counter = &exec->comp->counters[trans->counter];
  3175. if (exec->counts[trans->counter] >= counter->max)
  3176. continue; /* for loop on transitions */
  3177. #ifdef DEBUG_REGEXP_EXEC
  3178. printf("Increasing count %d\n", trans->counter);
  3179. #endif
  3180. exec->counts[trans->counter]++;
  3181. }
  3182. if (exec->state->nbTrans > exec->transno + 1) {
  3183. xmlFARegExecSave(exec);
  3184. }
  3185. exec->transcount = 1;
  3186. do {
  3187. /*
  3188. * Try to progress as much as possible on the input
  3189. */
  3190. if (exec->transcount == atom->max) {
  3191. break;
  3192. }
  3193. exec->index += len;
  3194. /*
  3195. * End of input: stop here
  3196. */
  3197. if (exec->inputString[exec->index] == 0) {
  3198. exec->index -= len;
  3199. break;
  3200. }
  3201. if (exec->transcount >= atom->min) {
  3202. int transno = exec->transno;
  3203. xmlRegStatePtr state = exec->state;
  3204. /*
  3205. * The transition is acceptable save it
  3206. */
  3207. exec->transno = -1; /* trick */
  3208. exec->state = to;
  3209. xmlFARegExecSave(exec);
  3210. exec->transno = transno;
  3211. exec->state = state;
  3212. }
  3213. codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
  3214. len);
  3215. ret = xmlRegCheckCharacter(atom, codepoint);
  3216. exec->transcount++;
  3217. } while (ret == 1);
  3218. if (exec->transcount < atom->min)
  3219. ret = 0;
  3220. /*
  3221. * If the last check failed but one transition was found
  3222. * possible, rollback
  3223. */
  3224. if (ret < 0)
  3225. ret = 0;
  3226. if (ret == 0) {
  3227. goto rollback;
  3228. }
  3229. if (trans->counter >= 0) {
  3230. if (exec->counts == NULL) {
  3231. exec->status = -1;
  3232. goto error;
  3233. }
  3234. #ifdef DEBUG_REGEXP_EXEC
  3235. printf("Decreasing count %d\n", trans->counter);
  3236. #endif
  3237. exec->counts[trans->counter]--;
  3238. }
  3239. } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
  3240. /*
  3241. * we don't match on the codepoint, but minOccurs of 0
  3242. * says that's ok. Setting len to 0 inhibits stepping
  3243. * over the codepoint.
  3244. */
  3245. exec->transcount = 1;
  3246. len = 0;
  3247. ret = 1;
  3248. }
  3249. } else if ((atom->min == 0) && (atom->max > 0)) {
  3250. /* another spot to match when minOccurs is 0 */
  3251. exec->transcount = 1;
  3252. len = 0;
  3253. ret = 1;
  3254. }
  3255. if (ret == 1) {
  3256. if ((trans->nd == 1) ||
  3257. ((trans->count >= 0) && (deter == 0) &&
  3258. (exec->state->nbTrans > exec->transno + 1))) {
  3259. #ifdef DEBUG_REGEXP_EXEC
  3260. if (trans->nd == 1)
  3261. printf("Saving on nd transition atom %d for %c at %d\n",
  3262. trans->atom->no, codepoint, exec->index);
  3263. else
  3264. printf("Saving on counted transition count %d for %c at %d\n",
  3265. trans->count, codepoint, exec->index);
  3266. #endif
  3267. xmlFARegExecSave(exec);
  3268. }
  3269. if (trans->counter >= 0) {
  3270. xmlRegCounterPtr counter;
  3271. /* make sure we don't go over the counter maximum value */
  3272. if ((exec->counts == NULL) ||
  3273. (exec->comp == NULL) ||
  3274. (exec->comp->counters == NULL)) {
  3275. exec->status = -1;
  3276. goto error;
  3277. }
  3278. counter = &exec->comp->counters[trans->counter];
  3279. if (exec->counts[trans->counter] >= counter->max)
  3280. continue; /* for loop on transitions */
  3281. #ifdef DEBUG_REGEXP_EXEC
  3282. printf("Increasing count %d\n", trans->counter);
  3283. #endif
  3284. exec->counts[trans->counter]++;
  3285. }
  3286. if ((trans->count >= 0) &&
  3287. (trans->count < REGEXP_ALL_COUNTER)) {
  3288. if (exec->counts == NULL) {
  3289. exec->status = -1;
  3290. goto error;
  3291. }
  3292. #ifdef DEBUG_REGEXP_EXEC
  3293. printf("resetting count %d on transition\n",
  3294. trans->count);
  3295. #endif
  3296. exec->counts[trans->count] = 0;
  3297. }
  3298. #ifdef DEBUG_REGEXP_EXEC
  3299. printf("entering state %d\n", trans->to);
  3300. #endif
  3301. exec->state = comp->states[trans->to];
  3302. exec->transno = 0;
  3303. if (trans->atom != NULL) {
  3304. exec->index += len;
  3305. }
  3306. goto progress;
  3307. } else if (ret < 0) {
  3308. exec->status = -4;
  3309. break;
  3310. }
  3311. }
  3312. if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
  3313. rollback:
  3314. /*
  3315. * Failed to find a way out
  3316. */
  3317. exec->determinist = 0;
  3318. #ifdef DEBUG_REGEXP_EXEC
  3319. printf("rollback from state %d on %d:%c\n", exec->state->no,
  3320. codepoint,codepoint);
  3321. #endif
  3322. xmlFARegExecRollBack(exec);
  3323. }
  3324. progress:
  3325. continue;
  3326. }
  3327. error:
  3328. if (exec->rollbacks != NULL) {
  3329. if (exec->counts != NULL) {
  3330. int i;
  3331. for (i = 0;i < exec->maxRollbacks;i++)
  3332. if (exec->rollbacks[i].counts != NULL)
  3333. xmlFree(exec->rollbacks[i].counts);
  3334. }
  3335. xmlFree(exec->rollbacks);
  3336. }
  3337. if (exec->state == NULL)
  3338. return(-1);
  3339. if (exec->counts != NULL)
  3340. xmlFree(exec->counts);
  3341. if (exec->status == 0)
  3342. return(1);
  3343. if (exec->status == -1) {
  3344. if (exec->nbPush > MAX_PUSH)
  3345. return(-1);
  3346. return(0);
  3347. }
  3348. return(exec->status);
  3349. }
  3350. /************************************************************************
  3351. * *
  3352. * Progressive interface to the verifier one atom at a time *
  3353. * *
  3354. ************************************************************************/
  3355. #ifdef DEBUG_ERR
  3356. static void testerr(xmlRegExecCtxtPtr exec);
  3357. #endif
  3358. /**
  3359. * xmlRegNewExecCtxt:
  3360. * @comp: a precompiled regular expression
  3361. * @callback: a callback function used for handling progresses in the
  3362. * automata matching phase
  3363. * @data: the context data associated to the callback in this context
  3364. *
  3365. * Build a context used for progressive evaluation of a regexp.
  3366. *
  3367. * Returns the new context
  3368. */
  3369. xmlRegExecCtxtPtr
  3370. xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
  3371. xmlRegExecCtxtPtr exec;
  3372. if (comp == NULL)
  3373. return(NULL);
  3374. if ((comp->compact == NULL) && (comp->states == NULL))
  3375. return(NULL);
  3376. exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
  3377. if (exec == NULL) {
  3378. xmlRegexpErrMemory(NULL, "creating execution context");
  3379. return(NULL);
  3380. }
  3381. memset(exec, 0, sizeof(xmlRegExecCtxt));
  3382. exec->inputString = NULL;
  3383. exec->index = 0;
  3384. exec->determinist = 1;
  3385. exec->maxRollbacks = 0;
  3386. exec->nbRollbacks = 0;
  3387. exec->rollbacks = NULL;
  3388. exec->status = 0;
  3389. exec->comp = comp;
  3390. if (comp->compact == NULL)
  3391. exec->state = comp->states[0];
  3392. exec->transno = 0;
  3393. exec->transcount = 0;
  3394. exec->callback = callback;
  3395. exec->data = data;
  3396. if (comp->nbCounters > 0) {
  3397. /*
  3398. * For error handling, exec->counts is allocated twice the size
  3399. * the second half is used to store the data in case of rollback
  3400. */
  3401. exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
  3402. * 2);
  3403. if (exec->counts == NULL) {
  3404. xmlRegexpErrMemory(NULL, "creating execution context");
  3405. xmlFree(exec);
  3406. return(NULL);
  3407. }
  3408. memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
  3409. exec->errCounts = &exec->counts[comp->nbCounters];
  3410. } else {
  3411. exec->counts = NULL;
  3412. exec->errCounts = NULL;
  3413. }
  3414. exec->inputStackMax = 0;
  3415. exec->inputStackNr = 0;
  3416. exec->inputStack = NULL;
  3417. exec->errStateNo = -1;
  3418. exec->errString = NULL;
  3419. exec->nbPush = 0;
  3420. return(exec);
  3421. }
  3422. /**
  3423. * xmlRegFreeExecCtxt:
  3424. * @exec: a regular expression evaluation context
  3425. *
  3426. * Free the structures associated to a regular expression evaluation context.
  3427. */
  3428. void
  3429. xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
  3430. if (exec == NULL)
  3431. return;
  3432. if (exec->rollbacks != NULL) {
  3433. if (exec->counts != NULL) {
  3434. int i;
  3435. for (i = 0;i < exec->maxRollbacks;i++)
  3436. if (exec->rollbacks[i].counts != NULL)
  3437. xmlFree(exec->rollbacks[i].counts);
  3438. }
  3439. xmlFree(exec->rollbacks);
  3440. }
  3441. if (exec->counts != NULL)
  3442. xmlFree(exec->counts);
  3443. if (exec->inputStack != NULL) {
  3444. int i;
  3445. for (i = 0;i < exec->inputStackNr;i++) {
  3446. if (exec->inputStack[i].value != NULL)
  3447. xmlFree(exec->inputStack[i].value);
  3448. }
  3449. xmlFree(exec->inputStack);
  3450. }
  3451. if (exec->errString != NULL)
  3452. xmlFree(exec->errString);
  3453. xmlFree(exec);
  3454. }
  3455. static void
  3456. xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  3457. void *data) {
  3458. #ifdef DEBUG_PUSH
  3459. printf("saving value: %d:%s\n", exec->inputStackNr, value);
  3460. #endif
  3461. if (exec->inputStackMax == 0) {
  3462. exec->inputStackMax = 4;
  3463. exec->inputStack = (xmlRegInputTokenPtr)
  3464. xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
  3465. if (exec->inputStack == NULL) {
  3466. xmlRegexpErrMemory(NULL, "pushing input string");
  3467. exec->inputStackMax = 0;
  3468. return;
  3469. }
  3470. } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
  3471. xmlRegInputTokenPtr tmp;
  3472. exec->inputStackMax *= 2;
  3473. tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
  3474. exec->inputStackMax * sizeof(xmlRegInputToken));
  3475. if (tmp == NULL) {
  3476. xmlRegexpErrMemory(NULL, "pushing input string");
  3477. exec->inputStackMax /= 2;
  3478. return;
  3479. }
  3480. exec->inputStack = tmp;
  3481. }
  3482. exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
  3483. exec->inputStack[exec->inputStackNr].data = data;
  3484. exec->inputStackNr++;
  3485. exec->inputStack[exec->inputStackNr].value = NULL;
  3486. exec->inputStack[exec->inputStackNr].data = NULL;
  3487. }
  3488. /**
  3489. * xmlRegStrEqualWildcard:
  3490. * @expStr: the string to be evaluated
  3491. * @valStr: the validation string
  3492. *
  3493. * Checks if both strings are equal or have the same content. "*"
  3494. * can be used as a wildcard in @valStr; "|" is used as a separator of
  3495. * substrings in both @expStr and @valStr.
  3496. *
  3497. * Returns 1 if the comparison is satisfied and the number of substrings
  3498. * is equal, 0 otherwise.
  3499. */
  3500. static int
  3501. xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
  3502. if (expStr == valStr) return(1);
  3503. if (expStr == NULL) return(0);
  3504. if (valStr == NULL) return(0);
  3505. do {
  3506. /*
  3507. * Eval if we have a wildcard for the current item.
  3508. */
  3509. if (*expStr != *valStr) {
  3510. /* if one of them starts with a wildcard make valStr be it */
  3511. if (*valStr == '*') {
  3512. const xmlChar *tmp;
  3513. tmp = valStr;
  3514. valStr = expStr;
  3515. expStr = tmp;
  3516. }
  3517. if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
  3518. do {
  3519. if (*valStr == XML_REG_STRING_SEPARATOR)
  3520. break;
  3521. valStr++;
  3522. } while (*valStr != 0);
  3523. continue;
  3524. } else
  3525. return(0);
  3526. }
  3527. expStr++;
  3528. valStr++;
  3529. } while (*valStr != 0);
  3530. if (*expStr != 0)
  3531. return (0);
  3532. else
  3533. return (1);
  3534. }
  3535. /**
  3536. * xmlRegCompactPushString:
  3537. * @exec: a regexp execution context
  3538. * @comp: the precompiled exec with a compact table
  3539. * @value: a string token input
  3540. * @data: data associated to the token to reuse in callbacks
  3541. *
  3542. * Push one input token in the execution context
  3543. *
  3544. * Returns: 1 if the regexp reached a final state, 0 if non-final, and
  3545. * a negative value in case of error.
  3546. */
  3547. static int
  3548. xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
  3549. xmlRegexpPtr comp,
  3550. const xmlChar *value,
  3551. void *data) {
  3552. int state = exec->index;
  3553. int i, target;
  3554. if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
  3555. return(-1);
  3556. if (value == NULL) {
  3557. /*
  3558. * are we at a final state ?
  3559. */
  3560. if (comp->compact[state * (comp->nbstrings + 1)] ==
  3561. XML_REGEXP_FINAL_STATE)
  3562. return(1);
  3563. return(0);
  3564. }
  3565. #ifdef DEBUG_PUSH
  3566. printf("value pushed: %s\n", value);
  3567. #endif
  3568. /*
  3569. * Examine all outside transitions from current state
  3570. */
  3571. for (i = 0;i < comp->nbstrings;i++) {
  3572. target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
  3573. if ((target > 0) && (target <= comp->nbstates)) {
  3574. target--; /* to avoid 0 */
  3575. if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
  3576. exec->index = target;
  3577. if ((exec->callback != NULL) && (comp->transdata != NULL)) {
  3578. exec->callback(exec->data, value,
  3579. comp->transdata[state * comp->nbstrings + i], data);
  3580. }
  3581. #ifdef DEBUG_PUSH
  3582. printf("entering state %d\n", target);
  3583. #endif
  3584. if (comp->compact[target * (comp->nbstrings + 1)] ==
  3585. XML_REGEXP_SINK_STATE)
  3586. goto error;
  3587. if (comp->compact[target * (comp->nbstrings + 1)] ==
  3588. XML_REGEXP_FINAL_STATE)
  3589. return(1);
  3590. return(0);
  3591. }
  3592. }
  3593. }
  3594. /*
  3595. * Failed to find an exit transition out from current state for the
  3596. * current token
  3597. */
  3598. #ifdef DEBUG_PUSH
  3599. printf("failed to find a transition for %s on state %d\n", value, state);
  3600. #endif
  3601. error:
  3602. if (exec->errString != NULL)
  3603. xmlFree(exec->errString);
  3604. exec->errString = xmlStrdup(value);
  3605. exec->errStateNo = state;
  3606. exec->status = -1;
  3607. #ifdef DEBUG_ERR
  3608. testerr(exec);
  3609. #endif
  3610. return(-1);
  3611. }
  3612. /**
  3613. * xmlRegExecPushStringInternal:
  3614. * @exec: a regexp execution context or NULL to indicate the end
  3615. * @value: a string token input
  3616. * @data: data associated to the token to reuse in callbacks
  3617. * @compound: value was assembled from 2 strings
  3618. *
  3619. * Push one input token in the execution context
  3620. *
  3621. * Returns: 1 if the regexp reached a final state, 0 if non-final, and
  3622. * a negative value in case of error.
  3623. */
  3624. static int
  3625. xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
  3626. void *data, int compound) {
  3627. xmlRegTransPtr trans;
  3628. xmlRegAtomPtr atom;
  3629. int ret;
  3630. int final = 0;
  3631. int progress = 1;
  3632. if (exec == NULL)
  3633. return(-1);
  3634. if (exec->comp == NULL)
  3635. return(-1);
  3636. if (exec->status != 0)
  3637. return(exec->status);
  3638. if (exec->comp->compact != NULL)
  3639. return(xmlRegCompactPushString(exec, exec->comp, value, data));
  3640. if (value == NULL) {
  3641. if (exec->state->type == XML_REGEXP_FINAL_STATE)
  3642. return(1);
  3643. final = 1;
  3644. }
  3645. #ifdef DEBUG_PUSH
  3646. printf("value pushed: %s\n", value);
  3647. #endif
  3648. /*
  3649. * If we have an active rollback stack push the new value there
  3650. * and get back to where we were left
  3651. */
  3652. if ((value != NULL) && (exec->inputStackNr > 0)) {
  3653. xmlFARegExecSaveInputString(exec, value, data);
  3654. value = exec->inputStack[exec->index].value;
  3655. data = exec->inputStack[exec->index].data;
  3656. #ifdef DEBUG_PUSH
  3657. printf("value loaded: %s\n", value);
  3658. #endif
  3659. }
  3660. while ((exec->status == 0) &&
  3661. ((value != NULL) ||
  3662. ((final == 1) &&
  3663. (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
  3664. /*
  3665. * End of input on non-terminal state, rollback, however we may
  3666. * still have epsilon like transition for counted transitions
  3667. * on counters, in that case don't break too early.
  3668. */
  3669. if ((value == NULL) && (exec->counts == NULL))
  3670. goto rollback;
  3671. exec->transcount = 0;
  3672. for (;exec->transno < exec->state->nbTrans;exec->transno++) {
  3673. trans = &exec->state->trans[exec->transno];
  3674. if (trans->to < 0)
  3675. continue;
  3676. atom = trans->atom;
  3677. ret = 0;
  3678. if (trans->count == REGEXP_ALL_LAX_COUNTER) {
  3679. int i;
  3680. int count;
  3681. xmlRegTransPtr t;
  3682. xmlRegCounterPtr counter;
  3683. ret = 0;
  3684. #ifdef DEBUG_PUSH
  3685. printf("testing all lax %d\n", trans->count);
  3686. #endif
  3687. /*
  3688. * Check all counted transitions from the current state
  3689. */
  3690. if ((value == NULL) && (final)) {
  3691. ret = 1;
  3692. } else if (value != NULL) {
  3693. for (i = 0;i < exec->state->nbTrans;i++) {
  3694. t = &exec->state->trans[i];
  3695. if ((t->counter < 0) || (t == trans))
  3696. continue;
  3697. counter = &exec->comp->counters[t->counter];
  3698. count = exec->counts[t->counter];
  3699. if ((count < counter->max) &&
  3700. (t->atom != NULL) &&
  3701. (xmlStrEqual(value, t->atom->valuep))) {
  3702. ret = 0;
  3703. break;
  3704. }
  3705. if ((count >= counter->min) &&
  3706. (count < counter->max) &&
  3707. (t->atom != NULL) &&
  3708. (xmlStrEqual(value, t->atom->valuep))) {
  3709. ret = 1;
  3710. break;
  3711. }
  3712. }
  3713. }
  3714. } else if (trans->count == REGEXP_ALL_COUNTER) {
  3715. int i;
  3716. int count;
  3717. xmlRegTransPtr t;
  3718. xmlRegCounterPtr counter;
  3719. ret = 1;
  3720. #ifdef DEBUG_PUSH
  3721. printf("testing all %d\n", trans->count);
  3722. #endif
  3723. /*
  3724. * Check all counted transitions from the current state
  3725. */
  3726. for (i = 0;i < exec->state->nbTrans;i++) {
  3727. t = &exec->state->trans[i];
  3728. if ((t->counter < 0) || (t == trans))
  3729. continue;
  3730. counter = &exec->comp->counters[t->counter];
  3731. count = exec->counts[t->counter];
  3732. if ((count < counter->min) || (count > counter->max)) {
  3733. ret = 0;
  3734. break;
  3735. }
  3736. }
  3737. } else if (trans->count >= 0) {
  3738. int count;
  3739. xmlRegCounterPtr counter;
  3740. /*
  3741. * A counted transition.
  3742. */
  3743. count = exec->counts[trans->count];
  3744. counter = &exec->comp->counters[trans->count];
  3745. #ifdef DEBUG_PUSH
  3746. printf("testing count %d: val %d, min %d, max %d\n",
  3747. trans->count, count, counter->min, counter->max);
  3748. #endif
  3749. ret = ((count >= counter->min) && (count <= counter->max));
  3750. } else if (atom == NULL) {
  3751. fprintf(stderr, "epsilon transition left at runtime\n");
  3752. exec->status = -2;
  3753. break;
  3754. } else if (value != NULL) {
  3755. ret = xmlRegStrEqualWildcard(atom->valuep, value);
  3756. if (atom->neg) {
  3757. ret = !ret;
  3758. if (!compound)
  3759. ret = 0;
  3760. }
  3761. if ((ret == 1) && (trans->counter >= 0)) {
  3762. xmlRegCounterPtr counter;
  3763. int count;
  3764. count = exec->counts[trans->counter];
  3765. counter = &exec->comp->counters[trans->counter];
  3766. if (count >= counter->max)
  3767. ret = 0;
  3768. }
  3769. if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
  3770. xmlRegStatePtr to = exec->comp->states[trans->to];
  3771. /*
  3772. * this is a multiple input sequence
  3773. */
  3774. if (exec->state->nbTrans > exec->transno + 1) {
  3775. if (exec->inputStackNr <= 0) {
  3776. xmlFARegExecSaveInputString(exec, value, data);
  3777. }
  3778. xmlFARegExecSave(exec);
  3779. }
  3780. exec->transcount = 1;
  3781. do {
  3782. /*
  3783. * Try to progress as much as possible on the input
  3784. */
  3785. if (exec->transcount == atom->max) {
  3786. break;
  3787. }
  3788. exec->index++;
  3789. value = exec->inputStack[exec->index].value;
  3790. data = exec->inputStack[exec->index].data;
  3791. #ifdef DEBUG_PUSH
  3792. printf("value loaded: %s\n", value);
  3793. #endif
  3794. /*
  3795. * End of input: stop here
  3796. */
  3797. if (value == NULL) {
  3798. exec->index --;
  3799. break;
  3800. }
  3801. if (exec->transcount >= atom->min) {
  3802. int transno = exec->transno;
  3803. xmlRegStatePtr state = exec->state;
  3804. /*
  3805. * The transition is acceptable save it
  3806. */
  3807. exec->transno = -1; /* trick */
  3808. exec->state = to;
  3809. if (exec->inputStackNr <= 0) {
  3810. xmlFARegExecSaveInputString(exec, value, data);
  3811. }
  3812. xmlFARegExecSave(exec);
  3813. exec->transno = transno;
  3814. exec->state = state;
  3815. }
  3816. ret = xmlStrEqual(value, atom->valuep);
  3817. exec->transcount++;
  3818. } while (ret == 1);
  3819. if (exec->transcount < atom->min)
  3820. ret = 0;
  3821. /*
  3822. * If the last check failed but one transition was found
  3823. * possible, rollback
  3824. */
  3825. if (ret < 0)
  3826. ret = 0;
  3827. if (ret == 0) {
  3828. goto rollback;
  3829. }
  3830. }
  3831. }
  3832. if (ret == 1) {
  3833. if ((exec->callback != NULL) && (atom != NULL) &&
  3834. (data != NULL)) {
  3835. exec->callback(exec->data, atom->valuep,
  3836. atom->data, data);
  3837. }
  3838. if (exec->state->nbTrans > exec->transno + 1) {
  3839. if (exec->inputStackNr <= 0) {
  3840. xmlFARegExecSaveInputString(exec, value, data);
  3841. }
  3842. xmlFARegExecSave(exec);
  3843. }
  3844. if (trans->counter >= 0) {
  3845. #ifdef DEBUG_PUSH
  3846. printf("Increasing count %d\n", trans->counter);
  3847. #endif
  3848. exec->counts[trans->counter]++;
  3849. }
  3850. if ((trans->count >= 0) &&
  3851. (trans->count < REGEXP_ALL_COUNTER)) {
  3852. #ifdef DEBUG_REGEXP_EXEC
  3853. printf("resetting count %d on transition\n",
  3854. trans->count);
  3855. #endif
  3856. exec->counts[trans->count] = 0;
  3857. }
  3858. #ifdef DEBUG_PUSH
  3859. printf("entering state %d\n", trans->to);
  3860. #endif
  3861. if ((exec->comp->states[trans->to] != NULL) &&
  3862. (exec->comp->states[trans->to]->type ==
  3863. XML_REGEXP_SINK_STATE)) {
  3864. /*
  3865. * entering a sink state, save the current state as error
  3866. * state.
  3867. */
  3868. if (exec->errString != NULL)
  3869. xmlFree(exec->errString);
  3870. exec->errString = xmlStrdup(value);
  3871. exec->errState = exec->state;
  3872. memcpy(exec->errCounts, exec->counts,
  3873. exec->comp->nbCounters * sizeof(int));
  3874. }
  3875. exec->state = exec->comp->states[trans->to];
  3876. exec->transno = 0;
  3877. if (trans->atom != NULL) {
  3878. if (exec->inputStack != NULL) {
  3879. exec->index++;
  3880. if (exec->index < exec->inputStackNr) {
  3881. value = exec->inputStack[exec->index].value;
  3882. data = exec->inputStack[exec->index].data;
  3883. #ifdef DEBUG_PUSH
  3884. printf("value loaded: %s\n", value);
  3885. #endif
  3886. } else {
  3887. value = NULL;
  3888. data = NULL;
  3889. #ifdef DEBUG_PUSH
  3890. printf("end of input\n");
  3891. #endif
  3892. }
  3893. } else {
  3894. value = NULL;
  3895. data = NULL;
  3896. #ifdef DEBUG_PUSH
  3897. printf("end of input\n");
  3898. #endif
  3899. }
  3900. }
  3901. goto progress;
  3902. } else if (ret < 0) {
  3903. exec->status = -4;
  3904. break;
  3905. }
  3906. }
  3907. if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
  3908. rollback:
  3909. /*
  3910. * if we didn't yet rollback on the current input
  3911. * store the current state as the error state.
  3912. */
  3913. if ((progress) && (exec->state != NULL) &&
  3914. (exec->state->type != XML_REGEXP_SINK_STATE)) {
  3915. progress = 0;
  3916. if (exec->errString != NULL)
  3917. xmlFree(exec->errString);
  3918. exec->errString = xmlStrdup(value);
  3919. exec->errState = exec->state;
  3920. if (exec->comp->nbCounters)
  3921. memcpy(exec->errCounts, exec->counts,
  3922. exec->comp->nbCounters * sizeof(int));
  3923. }
  3924. /*
  3925. * Failed to find a way out
  3926. */
  3927. exec->determinist = 0;
  3928. xmlFARegExecRollBack(exec);
  3929. if ((exec->inputStack != NULL ) && (exec->status == 0)) {
  3930. value = exec->inputStack[exec->index].value;
  3931. data = exec->inputStack[exec->index].data;
  3932. #ifdef DEBUG_PUSH
  3933. printf("value loaded: %s\n", value);
  3934. #endif
  3935. }
  3936. }
  3937. continue;
  3938. progress:
  3939. progress = 1;
  3940. continue;
  3941. }
  3942. if (exec->status == 0) {
  3943. return(exec->state->type == XML_REGEXP_FINAL_STATE);
  3944. }
  3945. #ifdef DEBUG_ERR
  3946. if (exec->status < 0) {
  3947. testerr(exec);
  3948. }
  3949. #endif
  3950. return(exec->status);
  3951. }
  3952. /**
  3953. * xmlRegExecPushString:
  3954. * @exec: a regexp execution context or NULL to indicate the end
  3955. * @value: a string token input
  3956. * @data: data associated to the token to reuse in callbacks
  3957. *
  3958. * Push one input token in the execution context
  3959. *
  3960. * Returns: 1 if the regexp reached a final state, 0 if non-final, and
  3961. * a negative value in case of error.
  3962. */
  3963. int
  3964. xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  3965. void *data) {
  3966. return(xmlRegExecPushStringInternal(exec, value, data, 0));
  3967. }
  3968. /**
  3969. * xmlRegExecPushString2:
  3970. * @exec: a regexp execution context or NULL to indicate the end
  3971. * @value: the first string token input
  3972. * @value2: the second string token input
  3973. * @data: data associated to the token to reuse in callbacks
  3974. *
  3975. * Push one input token in the execution context
  3976. *
  3977. * Returns: 1 if the regexp reached a final state, 0 if non-final, and
  3978. * a negative value in case of error.
  3979. */
  3980. int
  3981. xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
  3982. const xmlChar *value2, void *data) {
  3983. xmlChar buf[150];
  3984. int lenn, lenp, ret;
  3985. xmlChar *str;
  3986. if (exec == NULL)
  3987. return(-1);
  3988. if (exec->comp == NULL)
  3989. return(-1);
  3990. if (exec->status != 0)
  3991. return(exec->status);
  3992. if (value2 == NULL)
  3993. return(xmlRegExecPushString(exec, value, data));
  3994. lenn = strlen((char *) value2);
  3995. lenp = strlen((char *) value);
  3996. if (150 < lenn + lenp + 2) {
  3997. str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
  3998. if (str == NULL) {
  3999. exec->status = -1;
  4000. return(-1);
  4001. }
  4002. } else {
  4003. str = buf;
  4004. }
  4005. memcpy(&str[0], value, lenp);
  4006. str[lenp] = XML_REG_STRING_SEPARATOR;
  4007. memcpy(&str[lenp + 1], value2, lenn);
  4008. str[lenn + lenp + 1] = 0;
  4009. if (exec->comp->compact != NULL)
  4010. ret = xmlRegCompactPushString(exec, exec->comp, str, data);
  4011. else
  4012. ret = xmlRegExecPushStringInternal(exec, str, data, 1);
  4013. if (str != buf)
  4014. xmlFree(str);
  4015. return(ret);
  4016. }
  4017. /**
  4018. * xmlRegExecGetValues:
  4019. * @exec: a regexp execution context
  4020. * @err: error extraction or normal one
  4021. * @nbval: pointer to the number of accepted values IN/OUT
  4022. * @nbneg: return number of negative transitions
  4023. * @values: pointer to the array of acceptable values
  4024. * @terminal: return value if this was a terminal state
  4025. *
  4026. * Extract information from the regexp execution, internal routine to
  4027. * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
  4028. *
  4029. * Returns: 0 in case of success or -1 in case of error.
  4030. */
  4031. static int
  4032. xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
  4033. int *nbval, int *nbneg,
  4034. xmlChar **values, int *terminal) {
  4035. int maxval;
  4036. int nb = 0;
  4037. if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
  4038. (values == NULL) || (*nbval <= 0))
  4039. return(-1);
  4040. maxval = *nbval;
  4041. *nbval = 0;
  4042. *nbneg = 0;
  4043. if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
  4044. xmlRegexpPtr comp;
  4045. int target, i, state;
  4046. comp = exec->comp;
  4047. if (err) {
  4048. if (exec->errStateNo == -1) return(-1);
  4049. state = exec->errStateNo;
  4050. } else {
  4051. state = exec->index;
  4052. }
  4053. if (terminal != NULL) {
  4054. if (comp->compact[state * (comp->nbstrings + 1)] ==
  4055. XML_REGEXP_FINAL_STATE)
  4056. *terminal = 1;
  4057. else
  4058. *terminal = 0;
  4059. }
  4060. for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
  4061. target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
  4062. if ((target > 0) && (target <= comp->nbstates) &&
  4063. (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
  4064. XML_REGEXP_SINK_STATE)) {
  4065. values[nb++] = comp->stringMap[i];
  4066. (*nbval)++;
  4067. }
  4068. }
  4069. for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
  4070. target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
  4071. if ((target > 0) && (target <= comp->nbstates) &&
  4072. (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
  4073. XML_REGEXP_SINK_STATE)) {
  4074. values[nb++] = comp->stringMap[i];
  4075. (*nbneg)++;
  4076. }
  4077. }
  4078. } else {
  4079. int transno;
  4080. xmlRegTransPtr trans;
  4081. xmlRegAtomPtr atom;
  4082. xmlRegStatePtr state;
  4083. if (terminal != NULL) {
  4084. if (exec->state->type == XML_REGEXP_FINAL_STATE)
  4085. *terminal = 1;
  4086. else
  4087. *terminal = 0;
  4088. }
  4089. if (err) {
  4090. if (exec->errState == NULL) return(-1);
  4091. state = exec->errState;
  4092. } else {
  4093. if (exec->state == NULL) return(-1);
  4094. state = exec->state;
  4095. }
  4096. for (transno = 0;
  4097. (transno < state->nbTrans) && (nb < maxval);
  4098. transno++) {
  4099. trans = &state->trans[transno];
  4100. if (trans->to < 0)
  4101. continue;
  4102. atom = trans->atom;
  4103. if ((atom == NULL) || (atom->valuep == NULL))
  4104. continue;
  4105. if (trans->count == REGEXP_ALL_LAX_COUNTER) {
  4106. /* this should not be reached but ... */
  4107. TODO;
  4108. } else if (trans->count == REGEXP_ALL_COUNTER) {
  4109. /* this should not be reached but ... */
  4110. TODO;
  4111. } else if (trans->counter >= 0) {
  4112. xmlRegCounterPtr counter = NULL;
  4113. int count;
  4114. if (err)
  4115. count = exec->errCounts[trans->counter];
  4116. else
  4117. count = exec->counts[trans->counter];
  4118. if (exec->comp != NULL)
  4119. counter = &exec->comp->counters[trans->counter];
  4120. if ((counter == NULL) || (count < counter->max)) {
  4121. if (atom->neg)
  4122. values[nb++] = (xmlChar *) atom->valuep2;
  4123. else
  4124. values[nb++] = (xmlChar *) atom->valuep;
  4125. (*nbval)++;
  4126. }
  4127. } else {
  4128. if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
  4129. (exec->comp->states[trans->to]->type !=
  4130. XML_REGEXP_SINK_STATE)) {
  4131. if (atom->neg)
  4132. values[nb++] = (xmlChar *) atom->valuep2;
  4133. else
  4134. values[nb++] = (xmlChar *) atom->valuep;
  4135. (*nbval)++;
  4136. }
  4137. }
  4138. }
  4139. for (transno = 0;
  4140. (transno < state->nbTrans) && (nb < maxval);
  4141. transno++) {
  4142. trans = &state->trans[transno];
  4143. if (trans->to < 0)
  4144. continue;
  4145. atom = trans->atom;
  4146. if ((atom == NULL) || (atom->valuep == NULL))
  4147. continue;
  4148. if (trans->count == REGEXP_ALL_LAX_COUNTER) {
  4149. continue;
  4150. } else if (trans->count == REGEXP_ALL_COUNTER) {
  4151. continue;
  4152. } else if (trans->counter >= 0) {
  4153. continue;
  4154. } else {
  4155. if ((exec->comp->states[trans->to] != NULL) &&
  4156. (exec->comp->states[trans->to]->type ==
  4157. XML_REGEXP_SINK_STATE)) {
  4158. if (atom->neg)
  4159. values[nb++] = (xmlChar *) atom->valuep2;
  4160. else
  4161. values[nb++] = (xmlChar *) atom->valuep;
  4162. (*nbneg)++;
  4163. }
  4164. }
  4165. }
  4166. }
  4167. return(0);
  4168. }
  4169. /**
  4170. * xmlRegExecNextValues:
  4171. * @exec: a regexp execution context
  4172. * @nbval: pointer to the number of accepted values IN/OUT
  4173. * @nbneg: return number of negative transitions
  4174. * @values: pointer to the array of acceptable values
  4175. * @terminal: return value if this was a terminal state
  4176. *
  4177. * Extract information from the regexp execution,
  4178. * the parameter @values must point to an array of @nbval string pointers
  4179. * on return nbval will contain the number of possible strings in that
  4180. * state and the @values array will be updated with them. The string values
  4181. * returned will be freed with the @exec context and don't need to be
  4182. * deallocated.
  4183. *
  4184. * Returns: 0 in case of success or -1 in case of error.
  4185. */
  4186. int
  4187. xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
  4188. xmlChar **values, int *terminal) {
  4189. return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
  4190. }
  4191. /**
  4192. * xmlRegExecErrInfo:
  4193. * @exec: a regexp execution context generating an error
  4194. * @string: return value for the error string
  4195. * @nbval: pointer to the number of accepted values IN/OUT
  4196. * @nbneg: return number of negative transitions
  4197. * @values: pointer to the array of acceptable values
  4198. * @terminal: return value if this was a terminal state
  4199. *
  4200. * Extract error information from the regexp execution, the parameter
  4201. * @string will be updated with the value pushed and not accepted,
  4202. * the parameter @values must point to an array of @nbval string pointers
  4203. * on return nbval will contain the number of possible strings in that
  4204. * state and the @values array will be updated with them. The string values
  4205. * returned will be freed with the @exec context and don't need to be
  4206. * deallocated.
  4207. *
  4208. * Returns: 0 in case of success or -1 in case of error.
  4209. */
  4210. int
  4211. xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
  4212. int *nbval, int *nbneg, xmlChar **values, int *terminal) {
  4213. if (exec == NULL)
  4214. return(-1);
  4215. if (string != NULL) {
  4216. if (exec->status != 0)
  4217. *string = exec->errString;
  4218. else
  4219. *string = NULL;
  4220. }
  4221. return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
  4222. }
  4223. #ifdef DEBUG_ERR
  4224. static void testerr(xmlRegExecCtxtPtr exec) {
  4225. const xmlChar *string;
  4226. xmlChar *values[5];
  4227. int nb = 5;
  4228. int nbneg;
  4229. int terminal;
  4230. xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
  4231. }
  4232. #endif
  4233. #if 0
  4234. static int
  4235. xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
  4236. xmlRegTransPtr trans;
  4237. xmlRegAtomPtr atom;
  4238. int ret;
  4239. int codepoint, len;
  4240. if (exec == NULL)
  4241. return(-1);
  4242. if (exec->status != 0)
  4243. return(exec->status);
  4244. while ((exec->status == 0) &&
  4245. ((exec->inputString[exec->index] != 0) ||
  4246. (exec->state->type != XML_REGEXP_FINAL_STATE))) {
  4247. /*
  4248. * End of input on non-terminal state, rollback, however we may
  4249. * still have epsilon like transition for counted transitions
  4250. * on counters, in that case don't break too early.
  4251. */
  4252. if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
  4253. goto rollback;
  4254. exec->transcount = 0;
  4255. for (;exec->transno < exec->state->nbTrans;exec->transno++) {
  4256. trans = &exec->state->trans[exec->transno];
  4257. if (trans->to < 0)
  4258. continue;
  4259. atom = trans->atom;
  4260. ret = 0;
  4261. if (trans->count >= 0) {
  4262. int count;
  4263. xmlRegCounterPtr counter;
  4264. /*
  4265. * A counted transition.
  4266. */
  4267. count = exec->counts[trans->count];
  4268. counter = &exec->comp->counters[trans->count];
  4269. #ifdef DEBUG_REGEXP_EXEC
  4270. printf("testing count %d: val %d, min %d, max %d\n",
  4271. trans->count, count, counter->min, counter->max);
  4272. #endif
  4273. ret = ((count >= counter->min) && (count <= counter->max));
  4274. } else if (atom == NULL) {
  4275. fprintf(stderr, "epsilon transition left at runtime\n");
  4276. exec->status = -2;
  4277. break;
  4278. } else if (exec->inputString[exec->index] != 0) {
  4279. codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
  4280. ret = xmlRegCheckCharacter(atom, codepoint);
  4281. if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
  4282. xmlRegStatePtr to = exec->comp->states[trans->to];
  4283. /*
  4284. * this is a multiple input sequence
  4285. */
  4286. if (exec->state->nbTrans > exec->transno + 1) {
  4287. xmlFARegExecSave(exec);
  4288. }
  4289. exec->transcount = 1;
  4290. do {
  4291. /*
  4292. * Try to progress as much as possible on the input
  4293. */
  4294. if (exec->transcount == atom->max) {
  4295. break;
  4296. }
  4297. exec->index += len;
  4298. /*
  4299. * End of input: stop here
  4300. */
  4301. if (exec->inputString[exec->index] == 0) {
  4302. exec->index -= len;
  4303. break;
  4304. }
  4305. if (exec->transcount >= atom->min) {
  4306. int transno = exec->transno;
  4307. xmlRegStatePtr state = exec->state;
  4308. /*
  4309. * The transition is acceptable save it
  4310. */
  4311. exec->transno = -1; /* trick */
  4312. exec->state = to;
  4313. xmlFARegExecSave(exec);
  4314. exec->transno = transno;
  4315. exec->state = state;
  4316. }
  4317. codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
  4318. len);
  4319. ret = xmlRegCheckCharacter(atom, codepoint);
  4320. exec->transcount++;
  4321. } while (ret == 1);
  4322. if (exec->transcount < atom->min)
  4323. ret = 0;
  4324. /*
  4325. * If the last check failed but one transition was found
  4326. * possible, rollback
  4327. */
  4328. if (ret < 0)
  4329. ret = 0;
  4330. if (ret == 0) {
  4331. goto rollback;
  4332. }
  4333. }
  4334. }
  4335. if (ret == 1) {
  4336. if (exec->state->nbTrans > exec->transno + 1) {
  4337. xmlFARegExecSave(exec);
  4338. }
  4339. /*
  4340. * restart count for expressions like this ((abc){2})*
  4341. */
  4342. if (trans->count >= 0) {
  4343. #ifdef DEBUG_REGEXP_EXEC
  4344. printf("Reset count %d\n", trans->count);
  4345. #endif
  4346. exec->counts[trans->count] = 0;
  4347. }
  4348. if (trans->counter >= 0) {
  4349. #ifdef DEBUG_REGEXP_EXEC
  4350. printf("Increasing count %d\n", trans->counter);
  4351. #endif
  4352. exec->counts[trans->counter]++;
  4353. }
  4354. #ifdef DEBUG_REGEXP_EXEC
  4355. printf("entering state %d\n", trans->to);
  4356. #endif
  4357. exec->state = exec->comp->states[trans->to];
  4358. exec->transno = 0;
  4359. if (trans->atom != NULL) {
  4360. exec->index += len;
  4361. }
  4362. goto progress;
  4363. } else if (ret < 0) {
  4364. exec->status = -4;
  4365. break;
  4366. }
  4367. }
  4368. if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
  4369. rollback:
  4370. /*
  4371. * Failed to find a way out
  4372. */
  4373. exec->determinist = 0;
  4374. xmlFARegExecRollBack(exec);
  4375. }
  4376. progress:
  4377. continue;
  4378. }
  4379. }
  4380. #endif
  4381. /************************************************************************
  4382. * *
  4383. * Parser for the Schemas Datatype Regular Expressions *
  4384. * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
  4385. * *
  4386. ************************************************************************/
  4387. /**
  4388. * xmlFAIsChar:
  4389. * @ctxt: a regexp parser context
  4390. *
  4391. * [10] Char ::= [^.\?*+()|#x5B#x5D]
  4392. */
  4393. static int
  4394. xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
  4395. int cur;
  4396. int len;
  4397. cur = CUR_SCHAR(ctxt->cur, len);
  4398. if ((cur == '.') || (cur == '\\') || (cur == '?') ||
  4399. (cur == '*') || (cur == '+') || (cur == '(') ||
  4400. (cur == ')') || (cur == '|') || (cur == 0x5B) ||
  4401. (cur == 0x5D) || (cur == 0))
  4402. return(-1);
  4403. return(cur);
  4404. }
  4405. /**
  4406. * xmlFAParseCharProp:
  4407. * @ctxt: a regexp parser context
  4408. *
  4409. * [27] charProp ::= IsCategory | IsBlock
  4410. * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
  4411. * Separators | Symbols | Others
  4412. * [29] Letters ::= 'L' [ultmo]?
  4413. * [30] Marks ::= 'M' [nce]?
  4414. * [31] Numbers ::= 'N' [dlo]?
  4415. * [32] Punctuation ::= 'P' [cdseifo]?
  4416. * [33] Separators ::= 'Z' [slp]?
  4417. * [34] Symbols ::= 'S' [mcko]?
  4418. * [35] Others ::= 'C' [cfon]?
  4419. * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
  4420. */
  4421. static void
  4422. xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
  4423. int cur;
  4424. xmlRegAtomType type = (xmlRegAtomType) 0;
  4425. xmlChar *blockName = NULL;
  4426. cur = CUR;
  4427. if (cur == 'L') {
  4428. NEXT;
  4429. cur = CUR;
  4430. if (cur == 'u') {
  4431. NEXT;
  4432. type = XML_REGEXP_LETTER_UPPERCASE;
  4433. } else if (cur == 'l') {
  4434. NEXT;
  4435. type = XML_REGEXP_LETTER_LOWERCASE;
  4436. } else if (cur == 't') {
  4437. NEXT;
  4438. type = XML_REGEXP_LETTER_TITLECASE;
  4439. } else if (cur == 'm') {
  4440. NEXT;
  4441. type = XML_REGEXP_LETTER_MODIFIER;
  4442. } else if (cur == 'o') {
  4443. NEXT;
  4444. type = XML_REGEXP_LETTER_OTHERS;
  4445. } else {
  4446. type = XML_REGEXP_LETTER;
  4447. }
  4448. } else if (cur == 'M') {
  4449. NEXT;
  4450. cur = CUR;
  4451. if (cur == 'n') {
  4452. NEXT;
  4453. /* nonspacing */
  4454. type = XML_REGEXP_MARK_NONSPACING;
  4455. } else if (cur == 'c') {
  4456. NEXT;
  4457. /* spacing combining */
  4458. type = XML_REGEXP_MARK_SPACECOMBINING;
  4459. } else if (cur == 'e') {
  4460. NEXT;
  4461. /* enclosing */
  4462. type = XML_REGEXP_MARK_ENCLOSING;
  4463. } else {
  4464. /* all marks */
  4465. type = XML_REGEXP_MARK;
  4466. }
  4467. } else if (cur == 'N') {
  4468. NEXT;
  4469. cur = CUR;
  4470. if (cur == 'd') {
  4471. NEXT;
  4472. /* digital */
  4473. type = XML_REGEXP_NUMBER_DECIMAL;
  4474. } else if (cur == 'l') {
  4475. NEXT;
  4476. /* letter */
  4477. type = XML_REGEXP_NUMBER_LETTER;
  4478. } else if (cur == 'o') {
  4479. NEXT;
  4480. /* other */
  4481. type = XML_REGEXP_NUMBER_OTHERS;
  4482. } else {
  4483. /* all numbers */
  4484. type = XML_REGEXP_NUMBER;
  4485. }
  4486. } else if (cur == 'P') {
  4487. NEXT;
  4488. cur = CUR;
  4489. if (cur == 'c') {
  4490. NEXT;
  4491. /* connector */
  4492. type = XML_REGEXP_PUNCT_CONNECTOR;
  4493. } else if (cur == 'd') {
  4494. NEXT;
  4495. /* dash */
  4496. type = XML_REGEXP_PUNCT_DASH;
  4497. } else if (cur == 's') {
  4498. NEXT;
  4499. /* open */
  4500. type = XML_REGEXP_PUNCT_OPEN;
  4501. } else if (cur == 'e') {
  4502. NEXT;
  4503. /* close */
  4504. type = XML_REGEXP_PUNCT_CLOSE;
  4505. } else if (cur == 'i') {
  4506. NEXT;
  4507. /* initial quote */
  4508. type = XML_REGEXP_PUNCT_INITQUOTE;
  4509. } else if (cur == 'f') {
  4510. NEXT;
  4511. /* final quote */
  4512. type = XML_REGEXP_PUNCT_FINQUOTE;
  4513. } else if (cur == 'o') {
  4514. NEXT;
  4515. /* other */
  4516. type = XML_REGEXP_PUNCT_OTHERS;
  4517. } else {
  4518. /* all punctuation */
  4519. type = XML_REGEXP_PUNCT;
  4520. }
  4521. } else if (cur == 'Z') {
  4522. NEXT;
  4523. cur = CUR;
  4524. if (cur == 's') {
  4525. NEXT;
  4526. /* space */
  4527. type = XML_REGEXP_SEPAR_SPACE;
  4528. } else if (cur == 'l') {
  4529. NEXT;
  4530. /* line */
  4531. type = XML_REGEXP_SEPAR_LINE;
  4532. } else if (cur == 'p') {
  4533. NEXT;
  4534. /* paragraph */
  4535. type = XML_REGEXP_SEPAR_PARA;
  4536. } else {
  4537. /* all separators */
  4538. type = XML_REGEXP_SEPAR;
  4539. }
  4540. } else if (cur == 'S') {
  4541. NEXT;
  4542. cur = CUR;
  4543. if (cur == 'm') {
  4544. NEXT;
  4545. type = XML_REGEXP_SYMBOL_MATH;
  4546. /* math */
  4547. } else if (cur == 'c') {
  4548. NEXT;
  4549. type = XML_REGEXP_SYMBOL_CURRENCY;
  4550. /* currency */
  4551. } else if (cur == 'k') {
  4552. NEXT;
  4553. type = XML_REGEXP_SYMBOL_MODIFIER;
  4554. /* modifiers */
  4555. } else if (cur == 'o') {
  4556. NEXT;
  4557. type = XML_REGEXP_SYMBOL_OTHERS;
  4558. /* other */
  4559. } else {
  4560. /* all symbols */
  4561. type = XML_REGEXP_SYMBOL;
  4562. }
  4563. } else if (cur == 'C') {
  4564. NEXT;
  4565. cur = CUR;
  4566. if (cur == 'c') {
  4567. NEXT;
  4568. /* control */
  4569. type = XML_REGEXP_OTHER_CONTROL;
  4570. } else if (cur == 'f') {
  4571. NEXT;
  4572. /* format */
  4573. type = XML_REGEXP_OTHER_FORMAT;
  4574. } else if (cur == 'o') {
  4575. NEXT;
  4576. /* private use */
  4577. type = XML_REGEXP_OTHER_PRIVATE;
  4578. } else if (cur == 'n') {
  4579. NEXT;
  4580. /* not assigned */
  4581. type = XML_REGEXP_OTHER_NA;
  4582. } else {
  4583. /* all others */
  4584. type = XML_REGEXP_OTHER;
  4585. }
  4586. } else if (cur == 'I') {
  4587. const xmlChar *start;
  4588. NEXT;
  4589. cur = CUR;
  4590. if (cur != 's') {
  4591. ERROR("IsXXXX expected");
  4592. return;
  4593. }
  4594. NEXT;
  4595. start = ctxt->cur;
  4596. cur = CUR;
  4597. if (((cur >= 'a') && (cur <= 'z')) ||
  4598. ((cur >= 'A') && (cur <= 'Z')) ||
  4599. ((cur >= '0') && (cur <= '9')) ||
  4600. (cur == 0x2D)) {
  4601. NEXT;
  4602. cur = CUR;
  4603. while (((cur >= 'a') && (cur <= 'z')) ||
  4604. ((cur >= 'A') && (cur <= 'Z')) ||
  4605. ((cur >= '0') && (cur <= '9')) ||
  4606. (cur == 0x2D)) {
  4607. NEXT;
  4608. cur = CUR;
  4609. }
  4610. }
  4611. type = XML_REGEXP_BLOCK_NAME;
  4612. blockName = xmlStrndup(start, ctxt->cur - start);
  4613. } else {
  4614. ERROR("Unknown char property");
  4615. return;
  4616. }
  4617. if (ctxt->atom == NULL) {
  4618. ctxt->atom = xmlRegNewAtom(ctxt, type);
  4619. if (ctxt->atom != NULL)
  4620. ctxt->atom->valuep = blockName;
  4621. } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
  4622. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4623. type, 0, 0, blockName);
  4624. }
  4625. }
  4626. /**
  4627. * xmlFAParseCharClassEsc:
  4628. * @ctxt: a regexp parser context
  4629. *
  4630. * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
  4631. * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
  4632. * [25] catEsc ::= '\p{' charProp '}'
  4633. * [26] complEsc ::= '\P{' charProp '}'
  4634. * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
  4635. */
  4636. static void
  4637. xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
  4638. int cur;
  4639. if (CUR == '.') {
  4640. if (ctxt->atom == NULL) {
  4641. ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
  4642. } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
  4643. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4644. XML_REGEXP_ANYCHAR, 0, 0, NULL);
  4645. }
  4646. NEXT;
  4647. return;
  4648. }
  4649. if (CUR != '\\') {
  4650. ERROR("Escaped sequence: expecting \\");
  4651. return;
  4652. }
  4653. NEXT;
  4654. cur = CUR;
  4655. if (cur == 'p') {
  4656. NEXT;
  4657. if (CUR != '{') {
  4658. ERROR("Expecting '{'");
  4659. return;
  4660. }
  4661. NEXT;
  4662. xmlFAParseCharProp(ctxt);
  4663. if (CUR != '}') {
  4664. ERROR("Expecting '}'");
  4665. return;
  4666. }
  4667. NEXT;
  4668. } else if (cur == 'P') {
  4669. NEXT;
  4670. if (CUR != '{') {
  4671. ERROR("Expecting '{'");
  4672. return;
  4673. }
  4674. NEXT;
  4675. xmlFAParseCharProp(ctxt);
  4676. if (ctxt->atom != NULL)
  4677. ctxt->atom->neg = 1;
  4678. if (CUR != '}') {
  4679. ERROR("Expecting '}'");
  4680. return;
  4681. }
  4682. NEXT;
  4683. } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
  4684. (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
  4685. (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
  4686. (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
  4687. (cur == 0x5E)) {
  4688. if (ctxt->atom == NULL) {
  4689. ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
  4690. if (ctxt->atom != NULL) {
  4691. switch (cur) {
  4692. case 'n':
  4693. ctxt->atom->codepoint = '\n';
  4694. break;
  4695. case 'r':
  4696. ctxt->atom->codepoint = '\r';
  4697. break;
  4698. case 't':
  4699. ctxt->atom->codepoint = '\t';
  4700. break;
  4701. default:
  4702. ctxt->atom->codepoint = cur;
  4703. }
  4704. }
  4705. } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
  4706. switch (cur) {
  4707. case 'n':
  4708. cur = '\n';
  4709. break;
  4710. case 'r':
  4711. cur = '\r';
  4712. break;
  4713. case 't':
  4714. cur = '\t';
  4715. break;
  4716. }
  4717. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4718. XML_REGEXP_CHARVAL, cur, cur, NULL);
  4719. }
  4720. NEXT;
  4721. } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
  4722. (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
  4723. (cur == 'w') || (cur == 'W')) {
  4724. xmlRegAtomType type = XML_REGEXP_ANYSPACE;
  4725. switch (cur) {
  4726. case 's':
  4727. type = XML_REGEXP_ANYSPACE;
  4728. break;
  4729. case 'S':
  4730. type = XML_REGEXP_NOTSPACE;
  4731. break;
  4732. case 'i':
  4733. type = XML_REGEXP_INITNAME;
  4734. break;
  4735. case 'I':
  4736. type = XML_REGEXP_NOTINITNAME;
  4737. break;
  4738. case 'c':
  4739. type = XML_REGEXP_NAMECHAR;
  4740. break;
  4741. case 'C':
  4742. type = XML_REGEXP_NOTNAMECHAR;
  4743. break;
  4744. case 'd':
  4745. type = XML_REGEXP_DECIMAL;
  4746. break;
  4747. case 'D':
  4748. type = XML_REGEXP_NOTDECIMAL;
  4749. break;
  4750. case 'w':
  4751. type = XML_REGEXP_REALCHAR;
  4752. break;
  4753. case 'W':
  4754. type = XML_REGEXP_NOTREALCHAR;
  4755. break;
  4756. }
  4757. NEXT;
  4758. if (ctxt->atom == NULL) {
  4759. ctxt->atom = xmlRegNewAtom(ctxt, type);
  4760. } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
  4761. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4762. type, 0, 0, NULL);
  4763. }
  4764. } else {
  4765. ERROR("Wrong escape sequence, misuse of character '\\'");
  4766. }
  4767. }
  4768. /**
  4769. * xmlFAParseCharRange:
  4770. * @ctxt: a regexp parser context
  4771. *
  4772. * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
  4773. * [18] seRange ::= charOrEsc '-' charOrEsc
  4774. * [20] charOrEsc ::= XmlChar | SingleCharEsc
  4775. * [21] XmlChar ::= [^\#x2D#x5B#x5D]
  4776. * [22] XmlCharIncDash ::= [^\#x5B#x5D]
  4777. */
  4778. static void
  4779. xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
  4780. int cur, len;
  4781. int start = -1;
  4782. int end = -1;
  4783. if (CUR == '\0') {
  4784. ERROR("Expecting ']'");
  4785. return;
  4786. }
  4787. cur = CUR;
  4788. if (cur == '\\') {
  4789. NEXT;
  4790. cur = CUR;
  4791. switch (cur) {
  4792. case 'n': start = 0xA; break;
  4793. case 'r': start = 0xD; break;
  4794. case 't': start = 0x9; break;
  4795. case '\\': case '|': case '.': case '-': case '^': case '?':
  4796. case '*': case '+': case '{': case '}': case '(': case ')':
  4797. case '[': case ']':
  4798. start = cur; break;
  4799. default:
  4800. ERROR("Invalid escape value");
  4801. return;
  4802. }
  4803. end = start;
  4804. len = 1;
  4805. } else if ((cur != 0x5B) && (cur != 0x5D)) {
  4806. end = start = CUR_SCHAR(ctxt->cur, len);
  4807. } else {
  4808. ERROR("Expecting a char range");
  4809. return;
  4810. }
  4811. /*
  4812. * Since we are "inside" a range, we can assume ctxt->cur is past
  4813. * the start of ctxt->string, and PREV should be safe
  4814. */
  4815. if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
  4816. NEXTL(len);
  4817. return;
  4818. }
  4819. NEXTL(len);
  4820. cur = CUR;
  4821. if ((cur != '-') || (NXT(1) == ']')) {
  4822. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4823. XML_REGEXP_CHARVAL, start, end, NULL);
  4824. return;
  4825. }
  4826. NEXT;
  4827. cur = CUR;
  4828. if (cur == '\\') {
  4829. NEXT;
  4830. cur = CUR;
  4831. switch (cur) {
  4832. case 'n': end = 0xA; break;
  4833. case 'r': end = 0xD; break;
  4834. case 't': end = 0x9; break;
  4835. case '\\': case '|': case '.': case '-': case '^': case '?':
  4836. case '*': case '+': case '{': case '}': case '(': case ')':
  4837. case '[': case ']':
  4838. end = cur; break;
  4839. default:
  4840. ERROR("Invalid escape value");
  4841. return;
  4842. }
  4843. len = 1;
  4844. } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) {
  4845. end = CUR_SCHAR(ctxt->cur, len);
  4846. } else {
  4847. ERROR("Expecting the end of a char range");
  4848. return;
  4849. }
  4850. /* TODO check that the values are acceptable character ranges for XML */
  4851. if (end < start) {
  4852. ERROR("End of range is before start of range");
  4853. } else {
  4854. NEXTL(len);
  4855. xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
  4856. XML_REGEXP_CHARVAL, start, end, NULL);
  4857. }
  4858. return;
  4859. }
  4860. /**
  4861. * xmlFAParsePosCharGroup:
  4862. * @ctxt: a regexp parser context
  4863. *
  4864. * [14] posCharGroup ::= ( charRange | charClassEsc )+
  4865. */
  4866. static void
  4867. xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
  4868. do {
  4869. if (CUR == '\\') {
  4870. xmlFAParseCharClassEsc(ctxt);
  4871. } else {
  4872. xmlFAParseCharRange(ctxt);
  4873. }
  4874. } while ((CUR != ']') && (CUR != '-') &&
  4875. (CUR != 0) && (ctxt->error == 0));
  4876. }
  4877. /**
  4878. * xmlFAParseCharGroup:
  4879. * @ctxt: a regexp parser context
  4880. *
  4881. * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
  4882. * [15] negCharGroup ::= '^' posCharGroup
  4883. * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
  4884. * [12] charClassExpr ::= '[' charGroup ']'
  4885. */
  4886. static void
  4887. xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
  4888. int neg = ctxt->neg;
  4889. if (CUR == '^') {
  4890. NEXT;
  4891. ctxt->neg = !ctxt->neg;
  4892. xmlFAParsePosCharGroup(ctxt);
  4893. ctxt->neg = neg;
  4894. }
  4895. while ((CUR != ']') && (ctxt->error == 0)) {
  4896. if ((CUR == '-') && (NXT(1) == '[')) {
  4897. NEXT; /* eat the '-' */
  4898. NEXT; /* eat the '[' */
  4899. ctxt->neg = 2;
  4900. xmlFAParseCharGroup(ctxt);
  4901. ctxt->neg = neg;
  4902. if (CUR == ']') {
  4903. NEXT;
  4904. } else {
  4905. ERROR("charClassExpr: ']' expected");
  4906. }
  4907. break;
  4908. } else {
  4909. xmlFAParsePosCharGroup(ctxt);
  4910. }
  4911. }
  4912. }
  4913. /**
  4914. * xmlFAParseCharClass:
  4915. * @ctxt: a regexp parser context
  4916. *
  4917. * [11] charClass ::= charClassEsc | charClassExpr
  4918. * [12] charClassExpr ::= '[' charGroup ']'
  4919. */
  4920. static void
  4921. xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
  4922. if (CUR == '[') {
  4923. NEXT;
  4924. ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
  4925. if (ctxt->atom == NULL)
  4926. return;
  4927. xmlFAParseCharGroup(ctxt);
  4928. if (CUR == ']') {
  4929. NEXT;
  4930. } else {
  4931. ERROR("xmlFAParseCharClass: ']' expected");
  4932. }
  4933. } else {
  4934. xmlFAParseCharClassEsc(ctxt);
  4935. }
  4936. }
  4937. /**
  4938. * xmlFAParseQuantExact:
  4939. * @ctxt: a regexp parser context
  4940. *
  4941. * [8] QuantExact ::= [0-9]+
  4942. *
  4943. * Returns 0 if success or -1 in case of error
  4944. */
  4945. static int
  4946. xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
  4947. int ret = 0;
  4948. int ok = 0;
  4949. int overflow = 0;
  4950. while ((CUR >= '0') && (CUR <= '9')) {
  4951. if (ret > INT_MAX / 10) {
  4952. overflow = 1;
  4953. } else {
  4954. int digit = CUR - '0';
  4955. ret *= 10;
  4956. if (ret > INT_MAX - digit)
  4957. overflow = 1;
  4958. else
  4959. ret += digit;
  4960. }
  4961. ok = 1;
  4962. NEXT;
  4963. }
  4964. if ((ok != 1) || (overflow == 1)) {
  4965. return(-1);
  4966. }
  4967. return(ret);
  4968. }
  4969. /**
  4970. * xmlFAParseQuantifier:
  4971. * @ctxt: a regexp parser context
  4972. *
  4973. * [4] quantifier ::= [?*+] | ( '{' quantity '}' )
  4974. * [5] quantity ::= quantRange | quantMin | QuantExact
  4975. * [6] quantRange ::= QuantExact ',' QuantExact
  4976. * [7] quantMin ::= QuantExact ','
  4977. * [8] QuantExact ::= [0-9]+
  4978. */
  4979. static int
  4980. xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
  4981. int cur;
  4982. cur = CUR;
  4983. if ((cur == '?') || (cur == '*') || (cur == '+')) {
  4984. if (ctxt->atom != NULL) {
  4985. if (cur == '?')
  4986. ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
  4987. else if (cur == '*')
  4988. ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
  4989. else if (cur == '+')
  4990. ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
  4991. }
  4992. NEXT;
  4993. return(1);
  4994. }
  4995. if (cur == '{') {
  4996. int min = 0, max = 0;
  4997. NEXT;
  4998. cur = xmlFAParseQuantExact(ctxt);
  4999. if (cur >= 0)
  5000. min = cur;
  5001. else {
  5002. ERROR("Improper quantifier");
  5003. }
  5004. if (CUR == ',') {
  5005. NEXT;
  5006. if (CUR == '}')
  5007. max = INT_MAX;
  5008. else {
  5009. cur = xmlFAParseQuantExact(ctxt);
  5010. if (cur >= 0)
  5011. max = cur;
  5012. else {
  5013. ERROR("Improper quantifier");
  5014. }
  5015. }
  5016. }
  5017. if (CUR == '}') {
  5018. NEXT;
  5019. } else {
  5020. ERROR("Unterminated quantifier");
  5021. }
  5022. if (max == 0)
  5023. max = min;
  5024. if (ctxt->atom != NULL) {
  5025. ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
  5026. ctxt->atom->min = min;
  5027. ctxt->atom->max = max;
  5028. }
  5029. return(1);
  5030. }
  5031. return(0);
  5032. }
  5033. /**
  5034. * xmlFAParseAtom:
  5035. * @ctxt: a regexp parser context
  5036. *
  5037. * [9] atom ::= Char | charClass | ( '(' regExp ')' )
  5038. */
  5039. static int
  5040. xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
  5041. int codepoint, len;
  5042. codepoint = xmlFAIsChar(ctxt);
  5043. if (codepoint > 0) {
  5044. ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
  5045. if (ctxt->atom == NULL)
  5046. return(-1);
  5047. codepoint = CUR_SCHAR(ctxt->cur, len);
  5048. ctxt->atom->codepoint = codepoint;
  5049. NEXTL(len);
  5050. return(1);
  5051. } else if (CUR == '|') {
  5052. return(0);
  5053. } else if (CUR == 0) {
  5054. return(0);
  5055. } else if (CUR == ')') {
  5056. return(0);
  5057. } else if (CUR == '(') {
  5058. xmlRegStatePtr start, oldend, start0;
  5059. NEXT;
  5060. if (ctxt->depth >= 50) {
  5061. ERROR("xmlFAParseAtom: maximum nesting depth exceeded");
  5062. return(-1);
  5063. }
  5064. /*
  5065. * this extra Epsilon transition is needed if we count with 0 allowed
  5066. * unfortunately this can't be known at that point
  5067. */
  5068. xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
  5069. start0 = ctxt->state;
  5070. xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
  5071. start = ctxt->state;
  5072. oldend = ctxt->end;
  5073. ctxt->end = NULL;
  5074. ctxt->atom = NULL;
  5075. ctxt->depth++;
  5076. xmlFAParseRegExp(ctxt, 0);
  5077. ctxt->depth--;
  5078. if (CUR == ')') {
  5079. NEXT;
  5080. } else {
  5081. ERROR("xmlFAParseAtom: expecting ')'");
  5082. }
  5083. ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
  5084. if (ctxt->atom == NULL)
  5085. return(-1);
  5086. ctxt->atom->start = start;
  5087. ctxt->atom->start0 = start0;
  5088. ctxt->atom->stop = ctxt->state;
  5089. ctxt->end = oldend;
  5090. return(1);
  5091. } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
  5092. xmlFAParseCharClass(ctxt);
  5093. return(1);
  5094. }
  5095. return(0);
  5096. }
  5097. /**
  5098. * xmlFAParsePiece:
  5099. * @ctxt: a regexp parser context
  5100. *
  5101. * [3] piece ::= atom quantifier?
  5102. */
  5103. static int
  5104. xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
  5105. int ret;
  5106. ctxt->atom = NULL;
  5107. ret = xmlFAParseAtom(ctxt);
  5108. if (ret == 0)
  5109. return(0);
  5110. if (ctxt->atom == NULL) {
  5111. ERROR("internal: no atom generated");
  5112. }
  5113. xmlFAParseQuantifier(ctxt);
  5114. return(1);
  5115. }
  5116. /**
  5117. * xmlFAParseBranch:
  5118. * @ctxt: a regexp parser context
  5119. * @to: optional target to the end of the branch
  5120. *
  5121. * @to is used to optimize by removing duplicate path in automata
  5122. * in expressions like (a|b)(c|d)
  5123. *
  5124. * [2] branch ::= piece*
  5125. */
  5126. static int
  5127. xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
  5128. xmlRegStatePtr previous;
  5129. int ret;
  5130. previous = ctxt->state;
  5131. ret = xmlFAParsePiece(ctxt);
  5132. if (ret == 0) {
  5133. /* Empty branch */
  5134. xmlFAGenerateEpsilonTransition(ctxt, previous, to);
  5135. } else {
  5136. if (xmlFAGenerateTransitions(ctxt, previous,
  5137. (CUR=='|' || CUR==')' || CUR==0) ? to : NULL, ctxt->atom) < 0)
  5138. return(-1);
  5139. previous = ctxt->state;
  5140. ctxt->atom = NULL;
  5141. }
  5142. while ((ret != 0) && (ctxt->error == 0)) {
  5143. ret = xmlFAParsePiece(ctxt);
  5144. if (ret != 0) {
  5145. if (xmlFAGenerateTransitions(ctxt, previous,
  5146. (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
  5147. ctxt->atom) < 0)
  5148. return(-1);
  5149. previous = ctxt->state;
  5150. ctxt->atom = NULL;
  5151. }
  5152. }
  5153. return(0);
  5154. }
  5155. /**
  5156. * xmlFAParseRegExp:
  5157. * @ctxt: a regexp parser context
  5158. * @top: is this the top-level expression ?
  5159. *
  5160. * [1] regExp ::= branch ( '|' branch )*
  5161. */
  5162. static void
  5163. xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
  5164. xmlRegStatePtr start, end;
  5165. /* if not top start should have been generated by an epsilon trans */
  5166. start = ctxt->state;
  5167. ctxt->end = NULL;
  5168. xmlFAParseBranch(ctxt, NULL);
  5169. if (top) {
  5170. #ifdef DEBUG_REGEXP_GRAPH
  5171. printf("State %d is final\n", ctxt->state->no);
  5172. #endif
  5173. ctxt->state->type = XML_REGEXP_FINAL_STATE;
  5174. }
  5175. if (CUR != '|') {
  5176. ctxt->end = ctxt->state;
  5177. return;
  5178. }
  5179. end = ctxt->state;
  5180. while ((CUR == '|') && (ctxt->error == 0)) {
  5181. NEXT;
  5182. ctxt->state = start;
  5183. ctxt->end = NULL;
  5184. xmlFAParseBranch(ctxt, end);
  5185. }
  5186. if (!top) {
  5187. ctxt->state = end;
  5188. ctxt->end = end;
  5189. }
  5190. }
  5191. /************************************************************************
  5192. * *
  5193. * The basic API *
  5194. * *
  5195. ************************************************************************/
  5196. /**
  5197. * xmlRegexpPrint:
  5198. * @output: the file for the output debug
  5199. * @regexp: the compiled regexp
  5200. *
  5201. * Print the content of the compiled regular expression
  5202. */
  5203. void
  5204. xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
  5205. int i;
  5206. if (output == NULL)
  5207. return;
  5208. fprintf(output, " regexp: ");
  5209. if (regexp == NULL) {
  5210. fprintf(output, "NULL\n");
  5211. return;
  5212. }
  5213. fprintf(output, "'%s' ", regexp->string);
  5214. fprintf(output, "\n");
  5215. fprintf(output, "%d atoms:\n", regexp->nbAtoms);
  5216. for (i = 0;i < regexp->nbAtoms; i++) {
  5217. fprintf(output, " %02d ", i);
  5218. xmlRegPrintAtom(output, regexp->atoms[i]);
  5219. }
  5220. fprintf(output, "%d states:", regexp->nbStates);
  5221. fprintf(output, "\n");
  5222. for (i = 0;i < regexp->nbStates; i++) {
  5223. xmlRegPrintState(output, regexp->states[i]);
  5224. }
  5225. fprintf(output, "%d counters:\n", regexp->nbCounters);
  5226. for (i = 0;i < regexp->nbCounters; i++) {
  5227. fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
  5228. regexp->counters[i].max);
  5229. }
  5230. }
  5231. /**
  5232. * xmlRegexpCompile:
  5233. * @regexp: a regular expression string
  5234. *
  5235. * Parses a regular expression conforming to XML Schemas Part 2 Datatype
  5236. * Appendix F and builds an automata suitable for testing strings against
  5237. * that regular expression
  5238. *
  5239. * Returns the compiled expression or NULL in case of error
  5240. */
  5241. xmlRegexpPtr
  5242. xmlRegexpCompile(const xmlChar *regexp) {
  5243. xmlRegexpPtr ret;
  5244. xmlRegParserCtxtPtr ctxt;
  5245. ctxt = xmlRegNewParserCtxt(regexp);
  5246. if (ctxt == NULL)
  5247. return(NULL);
  5248. /* initialize the parser */
  5249. ctxt->end = NULL;
  5250. ctxt->start = ctxt->state = xmlRegNewState(ctxt);
  5251. xmlRegStatePush(ctxt, ctxt->start);
  5252. /* parse the expression building an automata */
  5253. xmlFAParseRegExp(ctxt, 1);
  5254. if (CUR != 0) {
  5255. ERROR("xmlFAParseRegExp: extra characters");
  5256. }
  5257. if (ctxt->error != 0) {
  5258. xmlRegFreeParserCtxt(ctxt);
  5259. return(NULL);
  5260. }
  5261. ctxt->end = ctxt->state;
  5262. ctxt->start->type = XML_REGEXP_START_STATE;
  5263. ctxt->end->type = XML_REGEXP_FINAL_STATE;
  5264. /* remove the Epsilon except for counted transitions */
  5265. xmlFAEliminateEpsilonTransitions(ctxt);
  5266. if (ctxt->error != 0) {
  5267. xmlRegFreeParserCtxt(ctxt);
  5268. return(NULL);
  5269. }
  5270. ret = xmlRegEpxFromParse(ctxt);
  5271. xmlRegFreeParserCtxt(ctxt);
  5272. return(ret);
  5273. }
  5274. /**
  5275. * xmlRegexpExec:
  5276. * @comp: the compiled regular expression
  5277. * @content: the value to check against the regular expression
  5278. *
  5279. * Check if the regular expression generates the value
  5280. *
  5281. * Returns 1 if it matches, 0 if not and a negative value in case of error
  5282. */
  5283. int
  5284. xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
  5285. if ((comp == NULL) || (content == NULL))
  5286. return(-1);
  5287. return(xmlFARegExec(comp, content));
  5288. }
  5289. /**
  5290. * xmlRegexpIsDeterminist:
  5291. * @comp: the compiled regular expression
  5292. *
  5293. * Check if the regular expression is determinist
  5294. *
  5295. * Returns 1 if it yes, 0 if not and a negative value in case of error
  5296. */
  5297. int
  5298. xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
  5299. xmlAutomataPtr am;
  5300. int ret;
  5301. if (comp == NULL)
  5302. return(-1);
  5303. if (comp->determinist != -1)
  5304. return(comp->determinist);
  5305. am = xmlNewAutomata();
  5306. if (am == NULL)
  5307. return(-1);
  5308. if (am->states != NULL) {
  5309. int i;
  5310. for (i = 0;i < am->nbStates;i++)
  5311. xmlRegFreeState(am->states[i]);
  5312. xmlFree(am->states);
  5313. }
  5314. am->nbAtoms = comp->nbAtoms;
  5315. am->atoms = comp->atoms;
  5316. am->nbStates = comp->nbStates;
  5317. am->states = comp->states;
  5318. am->determinist = -1;
  5319. am->flags = comp->flags;
  5320. ret = xmlFAComputesDeterminism(am);
  5321. am->atoms = NULL;
  5322. am->states = NULL;
  5323. xmlFreeAutomata(am);
  5324. comp->determinist = ret;
  5325. return(ret);
  5326. }
  5327. /**
  5328. * xmlRegFreeRegexp:
  5329. * @regexp: the regexp
  5330. *
  5331. * Free a regexp
  5332. */
  5333. void
  5334. xmlRegFreeRegexp(xmlRegexpPtr regexp) {
  5335. int i;
  5336. if (regexp == NULL)
  5337. return;
  5338. if (regexp->string != NULL)
  5339. xmlFree(regexp->string);
  5340. if (regexp->states != NULL) {
  5341. for (i = 0;i < regexp->nbStates;i++)
  5342. xmlRegFreeState(regexp->states[i]);
  5343. xmlFree(regexp->states);
  5344. }
  5345. if (regexp->atoms != NULL) {
  5346. for (i = 0;i < regexp->nbAtoms;i++)
  5347. xmlRegFreeAtom(regexp->atoms[i]);
  5348. xmlFree(regexp->atoms);
  5349. }
  5350. if (regexp->counters != NULL)
  5351. xmlFree(regexp->counters);
  5352. if (regexp->compact != NULL)
  5353. xmlFree(regexp->compact);
  5354. if (regexp->transdata != NULL)
  5355. xmlFree(regexp->transdata);
  5356. if (regexp->stringMap != NULL) {
  5357. for (i = 0; i < regexp->nbstrings;i++)
  5358. xmlFree(regexp->stringMap[i]);
  5359. xmlFree(regexp->stringMap);
  5360. }
  5361. xmlFree(regexp);
  5362. }
  5363. #ifdef LIBXML_AUTOMATA_ENABLED
  5364. /************************************************************************
  5365. * *
  5366. * The Automata interface *
  5367. * *
  5368. ************************************************************************/
  5369. /**
  5370. * xmlNewAutomata:
  5371. *
  5372. * Create a new automata
  5373. *
  5374. * Returns the new object or NULL in case of failure
  5375. */
  5376. xmlAutomataPtr
  5377. xmlNewAutomata(void) {
  5378. xmlAutomataPtr ctxt;
  5379. ctxt = xmlRegNewParserCtxt(NULL);
  5380. if (ctxt == NULL)
  5381. return(NULL);
  5382. /* initialize the parser */
  5383. ctxt->end = NULL;
  5384. ctxt->start = ctxt->state = xmlRegNewState(ctxt);
  5385. if (ctxt->start == NULL) {
  5386. xmlFreeAutomata(ctxt);
  5387. return(NULL);
  5388. }
  5389. ctxt->start->type = XML_REGEXP_START_STATE;
  5390. if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
  5391. xmlRegFreeState(ctxt->start);
  5392. xmlFreeAutomata(ctxt);
  5393. return(NULL);
  5394. }
  5395. ctxt->flags = 0;
  5396. return(ctxt);
  5397. }
  5398. /**
  5399. * xmlFreeAutomata:
  5400. * @am: an automata
  5401. *
  5402. * Free an automata
  5403. */
  5404. void
  5405. xmlFreeAutomata(xmlAutomataPtr am) {
  5406. if (am == NULL)
  5407. return;
  5408. xmlRegFreeParserCtxt(am);
  5409. }
  5410. /**
  5411. * xmlAutomataSetFlags:
  5412. * @am: an automata
  5413. * @flags: a set of internal flags
  5414. *
  5415. * Set some flags on the automata
  5416. */
  5417. void
  5418. xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
  5419. if (am == NULL)
  5420. return;
  5421. am->flags |= flags;
  5422. }
  5423. /**
  5424. * xmlAutomataGetInitState:
  5425. * @am: an automata
  5426. *
  5427. * Initial state lookup
  5428. *
  5429. * Returns the initial state of the automata
  5430. */
  5431. xmlAutomataStatePtr
  5432. xmlAutomataGetInitState(xmlAutomataPtr am) {
  5433. if (am == NULL)
  5434. return(NULL);
  5435. return(am->start);
  5436. }
  5437. /**
  5438. * xmlAutomataSetFinalState:
  5439. * @am: an automata
  5440. * @state: a state in this automata
  5441. *
  5442. * Makes that state a final state
  5443. *
  5444. * Returns 0 or -1 in case of error
  5445. */
  5446. int
  5447. xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
  5448. if ((am == NULL) || (state == NULL))
  5449. return(-1);
  5450. state->type = XML_REGEXP_FINAL_STATE;
  5451. return(0);
  5452. }
  5453. /**
  5454. * xmlAutomataNewTransition:
  5455. * @am: an automata
  5456. * @from: the starting point of the transition
  5457. * @to: the target point of the transition or NULL
  5458. * @token: the input string associated to that transition
  5459. * @data: data passed to the callback function if the transition is activated
  5460. *
  5461. * If @to is NULL, this creates first a new target state in the automata
  5462. * and then adds a transition from the @from state to the target state
  5463. * activated by the value of @token
  5464. *
  5465. * Returns the target state or NULL in case of error
  5466. */
  5467. xmlAutomataStatePtr
  5468. xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5469. xmlAutomataStatePtr to, const xmlChar *token,
  5470. void *data) {
  5471. xmlRegAtomPtr atom;
  5472. if ((am == NULL) || (from == NULL) || (token == NULL))
  5473. return(NULL);
  5474. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5475. if (atom == NULL)
  5476. return(NULL);
  5477. atom->data = data;
  5478. atom->valuep = xmlStrdup(token);
  5479. if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
  5480. xmlRegFreeAtom(atom);
  5481. return(NULL);
  5482. }
  5483. if (to == NULL)
  5484. return(am->state);
  5485. return(to);
  5486. }
  5487. /**
  5488. * xmlAutomataNewTransition2:
  5489. * @am: an automata
  5490. * @from: the starting point of the transition
  5491. * @to: the target point of the transition or NULL
  5492. * @token: the first input string associated to that transition
  5493. * @token2: the second input string associated to that transition
  5494. * @data: data passed to the callback function if the transition is activated
  5495. *
  5496. * If @to is NULL, this creates first a new target state in the automata
  5497. * and then adds a transition from the @from state to the target state
  5498. * activated by the value of @token
  5499. *
  5500. * Returns the target state or NULL in case of error
  5501. */
  5502. xmlAutomataStatePtr
  5503. xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5504. xmlAutomataStatePtr to, const xmlChar *token,
  5505. const xmlChar *token2, void *data) {
  5506. xmlRegAtomPtr atom;
  5507. if ((am == NULL) || (from == NULL) || (token == NULL))
  5508. return(NULL);
  5509. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5510. if (atom == NULL)
  5511. return(NULL);
  5512. atom->data = data;
  5513. if ((token2 == NULL) || (*token2 == 0)) {
  5514. atom->valuep = xmlStrdup(token);
  5515. } else {
  5516. int lenn, lenp;
  5517. xmlChar *str;
  5518. lenn = strlen((char *) token2);
  5519. lenp = strlen((char *) token);
  5520. str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
  5521. if (str == NULL) {
  5522. xmlRegFreeAtom(atom);
  5523. return(NULL);
  5524. }
  5525. memcpy(&str[0], token, lenp);
  5526. str[lenp] = '|';
  5527. memcpy(&str[lenp + 1], token2, lenn);
  5528. str[lenn + lenp + 1] = 0;
  5529. atom->valuep = str;
  5530. }
  5531. if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
  5532. xmlRegFreeAtom(atom);
  5533. return(NULL);
  5534. }
  5535. if (to == NULL)
  5536. return(am->state);
  5537. return(to);
  5538. }
  5539. /**
  5540. * xmlAutomataNewNegTrans:
  5541. * @am: an automata
  5542. * @from: the starting point of the transition
  5543. * @to: the target point of the transition or NULL
  5544. * @token: the first input string associated to that transition
  5545. * @token2: the second input string associated to that transition
  5546. * @data: data passed to the callback function if the transition is activated
  5547. *
  5548. * If @to is NULL, this creates first a new target state in the automata
  5549. * and then adds a transition from the @from state to the target state
  5550. * activated by any value except (@token,@token2)
  5551. * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
  5552. # the semantic of XSD ##other
  5553. *
  5554. * Returns the target state or NULL in case of error
  5555. */
  5556. xmlAutomataStatePtr
  5557. xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5558. xmlAutomataStatePtr to, const xmlChar *token,
  5559. const xmlChar *token2, void *data) {
  5560. xmlRegAtomPtr atom;
  5561. xmlChar err_msg[200];
  5562. if ((am == NULL) || (from == NULL) || (token == NULL))
  5563. return(NULL);
  5564. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5565. if (atom == NULL)
  5566. return(NULL);
  5567. atom->data = data;
  5568. atom->neg = 1;
  5569. if ((token2 == NULL) || (*token2 == 0)) {
  5570. atom->valuep = xmlStrdup(token);
  5571. } else {
  5572. int lenn, lenp;
  5573. xmlChar *str;
  5574. lenn = strlen((char *) token2);
  5575. lenp = strlen((char *) token);
  5576. str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
  5577. if (str == NULL) {
  5578. xmlRegFreeAtom(atom);
  5579. return(NULL);
  5580. }
  5581. memcpy(&str[0], token, lenp);
  5582. str[lenp] = '|';
  5583. memcpy(&str[lenp + 1], token2, lenn);
  5584. str[lenn + lenp + 1] = 0;
  5585. atom->valuep = str;
  5586. }
  5587. snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
  5588. err_msg[199] = 0;
  5589. atom->valuep2 = xmlStrdup(err_msg);
  5590. if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
  5591. xmlRegFreeAtom(atom);
  5592. return(NULL);
  5593. }
  5594. am->negs++;
  5595. if (to == NULL)
  5596. return(am->state);
  5597. return(to);
  5598. }
  5599. /**
  5600. * xmlAutomataNewCountTrans2:
  5601. * @am: an automata
  5602. * @from: the starting point of the transition
  5603. * @to: the target point of the transition or NULL
  5604. * @token: the input string associated to that transition
  5605. * @token2: the second input string associated to that transition
  5606. * @min: the minimum successive occurrences of token
  5607. * @max: the maximum successive occurrences of token
  5608. * @data: data associated to the transition
  5609. *
  5610. * If @to is NULL, this creates first a new target state in the automata
  5611. * and then adds a transition from the @from state to the target state
  5612. * activated by a succession of input of value @token and @token2 and
  5613. * whose number is between @min and @max
  5614. *
  5615. * Returns the target state or NULL in case of error
  5616. */
  5617. xmlAutomataStatePtr
  5618. xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5619. xmlAutomataStatePtr to, const xmlChar *token,
  5620. const xmlChar *token2,
  5621. int min, int max, void *data) {
  5622. xmlRegAtomPtr atom;
  5623. int counter;
  5624. if ((am == NULL) || (from == NULL) || (token == NULL))
  5625. return(NULL);
  5626. if (min < 0)
  5627. return(NULL);
  5628. if ((max < min) || (max < 1))
  5629. return(NULL);
  5630. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5631. if (atom == NULL)
  5632. return(NULL);
  5633. if ((token2 == NULL) || (*token2 == 0)) {
  5634. atom->valuep = xmlStrdup(token);
  5635. } else {
  5636. int lenn, lenp;
  5637. xmlChar *str;
  5638. lenn = strlen((char *) token2);
  5639. lenp = strlen((char *) token);
  5640. str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
  5641. if (str == NULL) {
  5642. xmlRegFreeAtom(atom);
  5643. return(NULL);
  5644. }
  5645. memcpy(&str[0], token, lenp);
  5646. str[lenp] = '|';
  5647. memcpy(&str[lenp + 1], token2, lenn);
  5648. str[lenn + lenp + 1] = 0;
  5649. atom->valuep = str;
  5650. }
  5651. atom->data = data;
  5652. if (min == 0)
  5653. atom->min = 1;
  5654. else
  5655. atom->min = min;
  5656. atom->max = max;
  5657. /*
  5658. * associate a counter to the transition.
  5659. */
  5660. counter = xmlRegGetCounter(am);
  5661. am->counters[counter].min = min;
  5662. am->counters[counter].max = max;
  5663. /* xmlFAGenerateTransitions(am, from, to, atom); */
  5664. if (to == NULL) {
  5665. to = xmlRegNewState(am);
  5666. xmlRegStatePush(am, to);
  5667. }
  5668. xmlRegStateAddTrans(am, from, atom, to, counter, -1);
  5669. xmlRegAtomPush(am, atom);
  5670. am->state = to;
  5671. if (to == NULL)
  5672. to = am->state;
  5673. if (to == NULL)
  5674. return(NULL);
  5675. if (min == 0)
  5676. xmlFAGenerateEpsilonTransition(am, from, to);
  5677. return(to);
  5678. }
  5679. /**
  5680. * xmlAutomataNewCountTrans:
  5681. * @am: an automata
  5682. * @from: the starting point of the transition
  5683. * @to: the target point of the transition or NULL
  5684. * @token: the input string associated to that transition
  5685. * @min: the minimum successive occurrences of token
  5686. * @max: the maximum successive occurrences of token
  5687. * @data: data associated to the transition
  5688. *
  5689. * If @to is NULL, this creates first a new target state in the automata
  5690. * and then adds a transition from the @from state to the target state
  5691. * activated by a succession of input of value @token and whose number
  5692. * is between @min and @max
  5693. *
  5694. * Returns the target state or NULL in case of error
  5695. */
  5696. xmlAutomataStatePtr
  5697. xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5698. xmlAutomataStatePtr to, const xmlChar *token,
  5699. int min, int max, void *data) {
  5700. xmlRegAtomPtr atom;
  5701. int counter;
  5702. if ((am == NULL) || (from == NULL) || (token == NULL))
  5703. return(NULL);
  5704. if (min < 0)
  5705. return(NULL);
  5706. if ((max < min) || (max < 1))
  5707. return(NULL);
  5708. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5709. if (atom == NULL)
  5710. return(NULL);
  5711. atom->valuep = xmlStrdup(token);
  5712. atom->data = data;
  5713. if (min == 0)
  5714. atom->min = 1;
  5715. else
  5716. atom->min = min;
  5717. atom->max = max;
  5718. /*
  5719. * associate a counter to the transition.
  5720. */
  5721. counter = xmlRegGetCounter(am);
  5722. am->counters[counter].min = min;
  5723. am->counters[counter].max = max;
  5724. /* xmlFAGenerateTransitions(am, from, to, atom); */
  5725. if (to == NULL) {
  5726. to = xmlRegNewState(am);
  5727. xmlRegStatePush(am, to);
  5728. }
  5729. xmlRegStateAddTrans(am, from, atom, to, counter, -1);
  5730. xmlRegAtomPush(am, atom);
  5731. am->state = to;
  5732. if (to == NULL)
  5733. to = am->state;
  5734. if (to == NULL)
  5735. return(NULL);
  5736. if (min == 0)
  5737. xmlFAGenerateEpsilonTransition(am, from, to);
  5738. return(to);
  5739. }
  5740. /**
  5741. * xmlAutomataNewOnceTrans2:
  5742. * @am: an automata
  5743. * @from: the starting point of the transition
  5744. * @to: the target point of the transition or NULL
  5745. * @token: the input string associated to that transition
  5746. * @token2: the second input string associated to that transition
  5747. * @min: the minimum successive occurrences of token
  5748. * @max: the maximum successive occurrences of token
  5749. * @data: data associated to the transition
  5750. *
  5751. * If @to is NULL, this creates first a new target state in the automata
  5752. * and then adds a transition from the @from state to the target state
  5753. * activated by a succession of input of value @token and @token2 and whose
  5754. * number is between @min and @max, moreover that transition can only be
  5755. * crossed once.
  5756. *
  5757. * Returns the target state or NULL in case of error
  5758. */
  5759. xmlAutomataStatePtr
  5760. xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5761. xmlAutomataStatePtr to, const xmlChar *token,
  5762. const xmlChar *token2,
  5763. int min, int max, void *data) {
  5764. xmlRegAtomPtr atom;
  5765. int counter;
  5766. if ((am == NULL) || (from == NULL) || (token == NULL))
  5767. return(NULL);
  5768. if (min < 1)
  5769. return(NULL);
  5770. if (max < min)
  5771. return(NULL);
  5772. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5773. if (atom == NULL)
  5774. return(NULL);
  5775. if ((token2 == NULL) || (*token2 == 0)) {
  5776. atom->valuep = xmlStrdup(token);
  5777. } else {
  5778. int lenn, lenp;
  5779. xmlChar *str;
  5780. lenn = strlen((char *) token2);
  5781. lenp = strlen((char *) token);
  5782. str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
  5783. if (str == NULL) {
  5784. xmlRegFreeAtom(atom);
  5785. return(NULL);
  5786. }
  5787. memcpy(&str[0], token, lenp);
  5788. str[lenp] = '|';
  5789. memcpy(&str[lenp + 1], token2, lenn);
  5790. str[lenn + lenp + 1] = 0;
  5791. atom->valuep = str;
  5792. }
  5793. atom->data = data;
  5794. atom->quant = XML_REGEXP_QUANT_ONCEONLY;
  5795. atom->min = min;
  5796. atom->max = max;
  5797. /*
  5798. * associate a counter to the transition.
  5799. */
  5800. counter = xmlRegGetCounter(am);
  5801. am->counters[counter].min = 1;
  5802. am->counters[counter].max = 1;
  5803. /* xmlFAGenerateTransitions(am, from, to, atom); */
  5804. if (to == NULL) {
  5805. to = xmlRegNewState(am);
  5806. xmlRegStatePush(am, to);
  5807. }
  5808. xmlRegStateAddTrans(am, from, atom, to, counter, -1);
  5809. xmlRegAtomPush(am, atom);
  5810. am->state = to;
  5811. return(to);
  5812. }
  5813. /**
  5814. * xmlAutomataNewOnceTrans:
  5815. * @am: an automata
  5816. * @from: the starting point of the transition
  5817. * @to: the target point of the transition or NULL
  5818. * @token: the input string associated to that transition
  5819. * @min: the minimum successive occurrences of token
  5820. * @max: the maximum successive occurrences of token
  5821. * @data: data associated to the transition
  5822. *
  5823. * If @to is NULL, this creates first a new target state in the automata
  5824. * and then adds a transition from the @from state to the target state
  5825. * activated by a succession of input of value @token and whose number
  5826. * is between @min and @max, moreover that transition can only be crossed
  5827. * once.
  5828. *
  5829. * Returns the target state or NULL in case of error
  5830. */
  5831. xmlAutomataStatePtr
  5832. xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5833. xmlAutomataStatePtr to, const xmlChar *token,
  5834. int min, int max, void *data) {
  5835. xmlRegAtomPtr atom;
  5836. int counter;
  5837. if ((am == NULL) || (from == NULL) || (token == NULL))
  5838. return(NULL);
  5839. if (min < 1)
  5840. return(NULL);
  5841. if (max < min)
  5842. return(NULL);
  5843. atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
  5844. if (atom == NULL)
  5845. return(NULL);
  5846. atom->valuep = xmlStrdup(token);
  5847. atom->data = data;
  5848. atom->quant = XML_REGEXP_QUANT_ONCEONLY;
  5849. atom->min = min;
  5850. atom->max = max;
  5851. /*
  5852. * associate a counter to the transition.
  5853. */
  5854. counter = xmlRegGetCounter(am);
  5855. am->counters[counter].min = 1;
  5856. am->counters[counter].max = 1;
  5857. /* xmlFAGenerateTransitions(am, from, to, atom); */
  5858. if (to == NULL) {
  5859. to = xmlRegNewState(am);
  5860. xmlRegStatePush(am, to);
  5861. }
  5862. xmlRegStateAddTrans(am, from, atom, to, counter, -1);
  5863. xmlRegAtomPush(am, atom);
  5864. am->state = to;
  5865. return(to);
  5866. }
  5867. /**
  5868. * xmlAutomataNewState:
  5869. * @am: an automata
  5870. *
  5871. * Create a new disconnected state in the automata
  5872. *
  5873. * Returns the new state or NULL in case of error
  5874. */
  5875. xmlAutomataStatePtr
  5876. xmlAutomataNewState(xmlAutomataPtr am) {
  5877. xmlAutomataStatePtr to;
  5878. if (am == NULL)
  5879. return(NULL);
  5880. to = xmlRegNewState(am);
  5881. xmlRegStatePush(am, to);
  5882. return(to);
  5883. }
  5884. /**
  5885. * xmlAutomataNewEpsilon:
  5886. * @am: an automata
  5887. * @from: the starting point of the transition
  5888. * @to: the target point of the transition or NULL
  5889. *
  5890. * If @to is NULL, this creates first a new target state in the automata
  5891. * and then adds an epsilon transition from the @from state to the
  5892. * target state
  5893. *
  5894. * Returns the target state or NULL in case of error
  5895. */
  5896. xmlAutomataStatePtr
  5897. xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5898. xmlAutomataStatePtr to) {
  5899. if ((am == NULL) || (from == NULL))
  5900. return(NULL);
  5901. xmlFAGenerateEpsilonTransition(am, from, to);
  5902. if (to == NULL)
  5903. return(am->state);
  5904. return(to);
  5905. }
  5906. /**
  5907. * xmlAutomataNewAllTrans:
  5908. * @am: an automata
  5909. * @from: the starting point of the transition
  5910. * @to: the target point of the transition or NULL
  5911. * @lax: allow to transition if not all all transitions have been activated
  5912. *
  5913. * If @to is NULL, this creates first a new target state in the automata
  5914. * and then adds a an ALL transition from the @from state to the
  5915. * target state. That transition is an epsilon transition allowed only when
  5916. * all transitions from the @from node have been activated.
  5917. *
  5918. * Returns the target state or NULL in case of error
  5919. */
  5920. xmlAutomataStatePtr
  5921. xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5922. xmlAutomataStatePtr to, int lax) {
  5923. if ((am == NULL) || (from == NULL))
  5924. return(NULL);
  5925. xmlFAGenerateAllTransition(am, from, to, lax);
  5926. if (to == NULL)
  5927. return(am->state);
  5928. return(to);
  5929. }
  5930. /**
  5931. * xmlAutomataNewCounter:
  5932. * @am: an automata
  5933. * @min: the minimal value on the counter
  5934. * @max: the maximal value on the counter
  5935. *
  5936. * Create a new counter
  5937. *
  5938. * Returns the counter number or -1 in case of error
  5939. */
  5940. int
  5941. xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
  5942. int ret;
  5943. if (am == NULL)
  5944. return(-1);
  5945. ret = xmlRegGetCounter(am);
  5946. if (ret < 0)
  5947. return(-1);
  5948. am->counters[ret].min = min;
  5949. am->counters[ret].max = max;
  5950. return(ret);
  5951. }
  5952. /**
  5953. * xmlAutomataNewCountedTrans:
  5954. * @am: an automata
  5955. * @from: the starting point of the transition
  5956. * @to: the target point of the transition or NULL
  5957. * @counter: the counter associated to that transition
  5958. *
  5959. * If @to is NULL, this creates first a new target state in the automata
  5960. * and then adds an epsilon transition from the @from state to the target state
  5961. * which will increment the counter provided
  5962. *
  5963. * Returns the target state or NULL in case of error
  5964. */
  5965. xmlAutomataStatePtr
  5966. xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5967. xmlAutomataStatePtr to, int counter) {
  5968. if ((am == NULL) || (from == NULL) || (counter < 0))
  5969. return(NULL);
  5970. xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
  5971. if (to == NULL)
  5972. return(am->state);
  5973. return(to);
  5974. }
  5975. /**
  5976. * xmlAutomataNewCounterTrans:
  5977. * @am: an automata
  5978. * @from: the starting point of the transition
  5979. * @to: the target point of the transition or NULL
  5980. * @counter: the counter associated to that transition
  5981. *
  5982. * If @to is NULL, this creates first a new target state in the automata
  5983. * and then adds an epsilon transition from the @from state to the target state
  5984. * which will be allowed only if the counter is within the right range.
  5985. *
  5986. * Returns the target state or NULL in case of error
  5987. */
  5988. xmlAutomataStatePtr
  5989. xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  5990. xmlAutomataStatePtr to, int counter) {
  5991. if ((am == NULL) || (from == NULL) || (counter < 0))
  5992. return(NULL);
  5993. xmlFAGenerateCountedTransition(am, from, to, counter);
  5994. if (to == NULL)
  5995. return(am->state);
  5996. return(to);
  5997. }
  5998. /**
  5999. * xmlAutomataCompile:
  6000. * @am: an automata
  6001. *
  6002. * Compile the automata into a Reg Exp ready for being executed.
  6003. * The automata should be free after this point.
  6004. *
  6005. * Returns the compiled regexp or NULL in case of error
  6006. */
  6007. xmlRegexpPtr
  6008. xmlAutomataCompile(xmlAutomataPtr am) {
  6009. xmlRegexpPtr ret;
  6010. if ((am == NULL) || (am->error != 0)) return(NULL);
  6011. xmlFAEliminateEpsilonTransitions(am);
  6012. /* xmlFAComputesDeterminism(am); */
  6013. ret = xmlRegEpxFromParse(am);
  6014. return(ret);
  6015. }
  6016. /**
  6017. * xmlAutomataIsDeterminist:
  6018. * @am: an automata
  6019. *
  6020. * Checks if an automata is determinist.
  6021. *
  6022. * Returns 1 if true, 0 if not, and -1 in case of error
  6023. */
  6024. int
  6025. xmlAutomataIsDeterminist(xmlAutomataPtr am) {
  6026. int ret;
  6027. if (am == NULL)
  6028. return(-1);
  6029. ret = xmlFAComputesDeterminism(am);
  6030. return(ret);
  6031. }
  6032. #endif /* LIBXML_AUTOMATA_ENABLED */
  6033. #ifdef LIBXML_EXPR_ENABLED
  6034. /************************************************************************
  6035. * *
  6036. * Formal Expression handling code *
  6037. * *
  6038. ************************************************************************/
  6039. /************************************************************************
  6040. * *
  6041. * Expression handling context *
  6042. * *
  6043. ************************************************************************/
  6044. struct _xmlExpCtxt {
  6045. xmlDictPtr dict;
  6046. xmlExpNodePtr *table;
  6047. int size;
  6048. int nbElems;
  6049. int nb_nodes;
  6050. int maxNodes;
  6051. const char *expr;
  6052. const char *cur;
  6053. int nb_cons;
  6054. int tabSize;
  6055. };
  6056. /**
  6057. * xmlExpNewCtxt:
  6058. * @maxNodes: the maximum number of nodes
  6059. * @dict: optional dictionary to use internally
  6060. *
  6061. * Creates a new context for manipulating expressions
  6062. *
  6063. * Returns the context or NULL in case of error
  6064. */
  6065. xmlExpCtxtPtr
  6066. xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
  6067. xmlExpCtxtPtr ret;
  6068. int size = 256;
  6069. if (maxNodes <= 4096)
  6070. maxNodes = 4096;
  6071. ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
  6072. if (ret == NULL)
  6073. return(NULL);
  6074. memset(ret, 0, sizeof(xmlExpCtxt));
  6075. ret->size = size;
  6076. ret->nbElems = 0;
  6077. ret->maxNodes = maxNodes;
  6078. ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
  6079. if (ret->table == NULL) {
  6080. xmlFree(ret);
  6081. return(NULL);
  6082. }
  6083. memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
  6084. if (dict == NULL) {
  6085. ret->dict = xmlDictCreate();
  6086. if (ret->dict == NULL) {
  6087. xmlFree(ret->table);
  6088. xmlFree(ret);
  6089. return(NULL);
  6090. }
  6091. } else {
  6092. ret->dict = dict;
  6093. xmlDictReference(ret->dict);
  6094. }
  6095. return(ret);
  6096. }
  6097. /**
  6098. * xmlExpFreeCtxt:
  6099. * @ctxt: an expression context
  6100. *
  6101. * Free an expression context
  6102. */
  6103. void
  6104. xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
  6105. if (ctxt == NULL)
  6106. return;
  6107. xmlDictFree(ctxt->dict);
  6108. if (ctxt->table != NULL)
  6109. xmlFree(ctxt->table);
  6110. xmlFree(ctxt);
  6111. }
  6112. /************************************************************************
  6113. * *
  6114. * Structure associated to an expression node *
  6115. * *
  6116. ************************************************************************/
  6117. #define MAX_NODES 10000
  6118. /* #define DEBUG_DERIV */
  6119. /*
  6120. * TODO:
  6121. * - Wildcards
  6122. * - public API for creation
  6123. *
  6124. * Started
  6125. * - regression testing
  6126. *
  6127. * Done
  6128. * - split into module and test tool
  6129. * - memleaks
  6130. */
  6131. typedef enum {
  6132. XML_EXP_NILABLE = (1 << 0)
  6133. } xmlExpNodeInfo;
  6134. #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
  6135. struct _xmlExpNode {
  6136. unsigned char type;/* xmlExpNodeType */
  6137. unsigned char info;/* OR of xmlExpNodeInfo */
  6138. unsigned short key; /* the hash key */
  6139. unsigned int ref; /* The number of references */
  6140. int c_max; /* the maximum length it can consume */
  6141. xmlExpNodePtr exp_left;
  6142. xmlExpNodePtr next;/* the next node in the hash table or free list */
  6143. union {
  6144. struct {
  6145. int f_min;
  6146. int f_max;
  6147. } count;
  6148. struct {
  6149. xmlExpNodePtr f_right;
  6150. } children;
  6151. const xmlChar *f_str;
  6152. } field;
  6153. };
  6154. #define exp_min field.count.f_min
  6155. #define exp_max field.count.f_max
  6156. /* #define exp_left field.children.f_left */
  6157. #define exp_right field.children.f_right
  6158. #define exp_str field.f_str
  6159. static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
  6160. static xmlExpNode forbiddenExpNode = {
  6161. XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
  6162. };
  6163. xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
  6164. static xmlExpNode emptyExpNode = {
  6165. XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
  6166. };
  6167. xmlExpNodePtr emptyExp = &emptyExpNode;
  6168. /************************************************************************
  6169. * *
  6170. * The custom hash table for unicity and canonicalization *
  6171. * of sub-expressions pointers *
  6172. * *
  6173. ************************************************************************/
  6174. /*
  6175. * xmlExpHashNameComputeKey:
  6176. * Calculate the hash key for a token
  6177. */
  6178. static unsigned short
  6179. xmlExpHashNameComputeKey(const xmlChar *name) {
  6180. unsigned short value = 0L;
  6181. char ch;
  6182. if (name != NULL) {
  6183. value += 30 * (*name);
  6184. while ((ch = *name++) != 0) {
  6185. value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
  6186. }
  6187. }
  6188. return (value);
  6189. }
  6190. /*
  6191. * xmlExpHashComputeKey:
  6192. * Calculate the hash key for a compound expression
  6193. */
  6194. static unsigned short
  6195. xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
  6196. xmlExpNodePtr right) {
  6197. unsigned long value;
  6198. unsigned short ret;
  6199. switch (type) {
  6200. case XML_EXP_SEQ:
  6201. value = left->key;
  6202. value += right->key;
  6203. value *= 3;
  6204. ret = (unsigned short) value;
  6205. break;
  6206. case XML_EXP_OR:
  6207. value = left->key;
  6208. value += right->key;
  6209. value *= 7;
  6210. ret = (unsigned short) value;
  6211. break;
  6212. case XML_EXP_COUNT:
  6213. value = left->key;
  6214. value += right->key;
  6215. ret = (unsigned short) value;
  6216. break;
  6217. default:
  6218. ret = 0;
  6219. }
  6220. return(ret);
  6221. }
  6222. static xmlExpNodePtr
  6223. xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
  6224. xmlExpNodePtr ret;
  6225. if (ctxt->nb_nodes >= MAX_NODES)
  6226. return(NULL);
  6227. ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
  6228. if (ret == NULL)
  6229. return(NULL);
  6230. memset(ret, 0, sizeof(xmlExpNode));
  6231. ret->type = type;
  6232. ret->next = NULL;
  6233. ctxt->nb_nodes++;
  6234. ctxt->nb_cons++;
  6235. return(ret);
  6236. }
  6237. /**
  6238. * xmlExpHashGetEntry:
  6239. * @table: the hash table
  6240. *
  6241. * Get the unique entry from the hash table. The entry is created if
  6242. * needed. @left and @right are consumed, i.e. their ref count will
  6243. * be decremented by the operation.
  6244. *
  6245. * Returns the pointer or NULL in case of error
  6246. */
  6247. static xmlExpNodePtr
  6248. xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
  6249. xmlExpNodePtr left, xmlExpNodePtr right,
  6250. const xmlChar *name, int min, int max) {
  6251. unsigned short kbase, key;
  6252. xmlExpNodePtr entry;
  6253. xmlExpNodePtr insert;
  6254. if (ctxt == NULL)
  6255. return(NULL);
  6256. /*
  6257. * Check for duplicate and insertion location.
  6258. */
  6259. if (type == XML_EXP_ATOM) {
  6260. kbase = xmlExpHashNameComputeKey(name);
  6261. } else if (type == XML_EXP_COUNT) {
  6262. /* COUNT reduction rule 1 */
  6263. /* a{1} -> a */
  6264. if (min == max) {
  6265. if (min == 1) {
  6266. return(left);
  6267. }
  6268. if (min == 0) {
  6269. xmlExpFree(ctxt, left);
  6270. return(emptyExp);
  6271. }
  6272. }
  6273. if (min < 0) {
  6274. xmlExpFree(ctxt, left);
  6275. return(forbiddenExp);
  6276. }
  6277. if (max == -1)
  6278. kbase = min + 79;
  6279. else
  6280. kbase = max - min;
  6281. kbase += left->key;
  6282. } else if (type == XML_EXP_OR) {
  6283. /* Forbid reduction rules */
  6284. if (left->type == XML_EXP_FORBID) {
  6285. xmlExpFree(ctxt, left);
  6286. return(right);
  6287. }
  6288. if (right->type == XML_EXP_FORBID) {
  6289. xmlExpFree(ctxt, right);
  6290. return(left);
  6291. }
  6292. /* OR reduction rule 1 */
  6293. /* a | a reduced to a */
  6294. if (left == right) {
  6295. xmlExpFree(ctxt, right);
  6296. return(left);
  6297. }
  6298. /* OR canonicalization rule 1 */
  6299. /* linearize (a | b) | c into a | (b | c) */
  6300. if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
  6301. xmlExpNodePtr tmp = left;
  6302. left = right;
  6303. right = tmp;
  6304. }
  6305. /* OR reduction rule 2 */
  6306. /* a | (a | b) and b | (a | b) are reduced to a | b */
  6307. if (right->type == XML_EXP_OR) {
  6308. if ((left == right->exp_left) ||
  6309. (left == right->exp_right)) {
  6310. xmlExpFree(ctxt, left);
  6311. return(right);
  6312. }
  6313. }
  6314. /* OR canonicalization rule 2 */
  6315. /* linearize (a | b) | c into a | (b | c) */
  6316. if (left->type == XML_EXP_OR) {
  6317. xmlExpNodePtr tmp;
  6318. /* OR canonicalization rule 2 */
  6319. if ((left->exp_right->type != XML_EXP_OR) &&
  6320. (left->exp_right->key < left->exp_left->key)) {
  6321. tmp = left->exp_right;
  6322. left->exp_right = left->exp_left;
  6323. left->exp_left = tmp;
  6324. }
  6325. left->exp_right->ref++;
  6326. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
  6327. NULL, 0, 0);
  6328. left->exp_left->ref++;
  6329. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
  6330. NULL, 0, 0);
  6331. xmlExpFree(ctxt, left);
  6332. return(tmp);
  6333. }
  6334. if (right->type == XML_EXP_OR) {
  6335. /* Ordering in the tree */
  6336. /* C | (A | B) -> A | (B | C) */
  6337. if (left->key > right->exp_right->key) {
  6338. xmlExpNodePtr tmp;
  6339. right->exp_right->ref++;
  6340. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
  6341. left, NULL, 0, 0);
  6342. right->exp_left->ref++;
  6343. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
  6344. tmp, NULL, 0, 0);
  6345. xmlExpFree(ctxt, right);
  6346. return(tmp);
  6347. }
  6348. /* Ordering in the tree */
  6349. /* B | (A | C) -> A | (B | C) */
  6350. if (left->key > right->exp_left->key) {
  6351. xmlExpNodePtr tmp;
  6352. right->exp_right->ref++;
  6353. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
  6354. right->exp_right, NULL, 0, 0);
  6355. right->exp_left->ref++;
  6356. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
  6357. tmp, NULL, 0, 0);
  6358. xmlExpFree(ctxt, right);
  6359. return(tmp);
  6360. }
  6361. }
  6362. /* we know both types are != XML_EXP_OR here */
  6363. else if (left->key > right->key) {
  6364. xmlExpNodePtr tmp = left;
  6365. left = right;
  6366. right = tmp;
  6367. }
  6368. kbase = xmlExpHashComputeKey(type, left, right);
  6369. } else if (type == XML_EXP_SEQ) {
  6370. /* Forbid reduction rules */
  6371. if (left->type == XML_EXP_FORBID) {
  6372. xmlExpFree(ctxt, right);
  6373. return(left);
  6374. }
  6375. if (right->type == XML_EXP_FORBID) {
  6376. xmlExpFree(ctxt, left);
  6377. return(right);
  6378. }
  6379. /* Empty reduction rules */
  6380. if (right->type == XML_EXP_EMPTY) {
  6381. return(left);
  6382. }
  6383. if (left->type == XML_EXP_EMPTY) {
  6384. return(right);
  6385. }
  6386. kbase = xmlExpHashComputeKey(type, left, right);
  6387. } else
  6388. return(NULL);
  6389. key = kbase % ctxt->size;
  6390. if (ctxt->table[key] != NULL) {
  6391. for (insert = ctxt->table[key]; insert != NULL;
  6392. insert = insert->next) {
  6393. if ((insert->key == kbase) &&
  6394. (insert->type == type)) {
  6395. if (type == XML_EXP_ATOM) {
  6396. if (name == insert->exp_str) {
  6397. insert->ref++;
  6398. return(insert);
  6399. }
  6400. } else if (type == XML_EXP_COUNT) {
  6401. if ((insert->exp_min == min) && (insert->exp_max == max) &&
  6402. (insert->exp_left == left)) {
  6403. insert->ref++;
  6404. left->ref--;
  6405. return(insert);
  6406. }
  6407. } else if ((insert->exp_left == left) &&
  6408. (insert->exp_right == right)) {
  6409. insert->ref++;
  6410. left->ref--;
  6411. right->ref--;
  6412. return(insert);
  6413. }
  6414. }
  6415. }
  6416. }
  6417. entry = xmlExpNewNode(ctxt, type);
  6418. if (entry == NULL)
  6419. return(NULL);
  6420. entry->key = kbase;
  6421. if (type == XML_EXP_ATOM) {
  6422. entry->exp_str = name;
  6423. entry->c_max = 1;
  6424. } else if (type == XML_EXP_COUNT) {
  6425. entry->exp_min = min;
  6426. entry->exp_max = max;
  6427. entry->exp_left = left;
  6428. if ((min == 0) || (IS_NILLABLE(left)))
  6429. entry->info |= XML_EXP_NILABLE;
  6430. if (max < 0)
  6431. entry->c_max = -1;
  6432. else
  6433. entry->c_max = max * entry->exp_left->c_max;
  6434. } else {
  6435. entry->exp_left = left;
  6436. entry->exp_right = right;
  6437. if (type == XML_EXP_OR) {
  6438. if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
  6439. entry->info |= XML_EXP_NILABLE;
  6440. if ((entry->exp_left->c_max == -1) ||
  6441. (entry->exp_right->c_max == -1))
  6442. entry->c_max = -1;
  6443. else if (entry->exp_left->c_max > entry->exp_right->c_max)
  6444. entry->c_max = entry->exp_left->c_max;
  6445. else
  6446. entry->c_max = entry->exp_right->c_max;
  6447. } else {
  6448. if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
  6449. entry->info |= XML_EXP_NILABLE;
  6450. if ((entry->exp_left->c_max == -1) ||
  6451. (entry->exp_right->c_max == -1))
  6452. entry->c_max = -1;
  6453. else
  6454. entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
  6455. }
  6456. }
  6457. entry->ref = 1;
  6458. if (ctxt->table[key] != NULL)
  6459. entry->next = ctxt->table[key];
  6460. ctxt->table[key] = entry;
  6461. ctxt->nbElems++;
  6462. return(entry);
  6463. }
  6464. /**
  6465. * xmlExpFree:
  6466. * @ctxt: the expression context
  6467. * @exp: the expression
  6468. *
  6469. * Dereference the expression
  6470. */
  6471. void
  6472. xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
  6473. if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
  6474. return;
  6475. exp->ref--;
  6476. if (exp->ref == 0) {
  6477. unsigned short key;
  6478. /* Unlink it first from the hash table */
  6479. key = exp->key % ctxt->size;
  6480. if (ctxt->table[key] == exp) {
  6481. ctxt->table[key] = exp->next;
  6482. } else {
  6483. xmlExpNodePtr tmp;
  6484. tmp = ctxt->table[key];
  6485. while (tmp != NULL) {
  6486. if (tmp->next == exp) {
  6487. tmp->next = exp->next;
  6488. break;
  6489. }
  6490. tmp = tmp->next;
  6491. }
  6492. }
  6493. if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
  6494. xmlExpFree(ctxt, exp->exp_left);
  6495. xmlExpFree(ctxt, exp->exp_right);
  6496. } else if (exp->type == XML_EXP_COUNT) {
  6497. xmlExpFree(ctxt, exp->exp_left);
  6498. }
  6499. xmlFree(exp);
  6500. ctxt->nb_nodes--;
  6501. }
  6502. }
  6503. /**
  6504. * xmlExpRef:
  6505. * @exp: the expression
  6506. *
  6507. * Increase the reference count of the expression
  6508. */
  6509. void
  6510. xmlExpRef(xmlExpNodePtr exp) {
  6511. if (exp != NULL)
  6512. exp->ref++;
  6513. }
  6514. /**
  6515. * xmlExpNewAtom:
  6516. * @ctxt: the expression context
  6517. * @name: the atom name
  6518. * @len: the atom name length in byte (or -1);
  6519. *
  6520. * Get the atom associated to this name from that context
  6521. *
  6522. * Returns the node or NULL in case of error
  6523. */
  6524. xmlExpNodePtr
  6525. xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
  6526. if ((ctxt == NULL) || (name == NULL))
  6527. return(NULL);
  6528. name = xmlDictLookup(ctxt->dict, name, len);
  6529. if (name == NULL)
  6530. return(NULL);
  6531. return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
  6532. }
  6533. /**
  6534. * xmlExpNewOr:
  6535. * @ctxt: the expression context
  6536. * @left: left expression
  6537. * @right: right expression
  6538. *
  6539. * Get the atom associated to the choice @left | @right
  6540. * Note that @left and @right are consumed in the operation, to keep
  6541. * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
  6542. * this is true even in case of failure (unless ctxt == NULL).
  6543. *
  6544. * Returns the node or NULL in case of error
  6545. */
  6546. xmlExpNodePtr
  6547. xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
  6548. if (ctxt == NULL)
  6549. return(NULL);
  6550. if ((left == NULL) || (right == NULL)) {
  6551. xmlExpFree(ctxt, left);
  6552. xmlExpFree(ctxt, right);
  6553. return(NULL);
  6554. }
  6555. return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
  6556. }
  6557. /**
  6558. * xmlExpNewSeq:
  6559. * @ctxt: the expression context
  6560. * @left: left expression
  6561. * @right: right expression
  6562. *
  6563. * Get the atom associated to the sequence @left , @right
  6564. * Note that @left and @right are consumed in the operation, to keep
  6565. * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
  6566. * this is true even in case of failure (unless ctxt == NULL).
  6567. *
  6568. * Returns the node or NULL in case of error
  6569. */
  6570. xmlExpNodePtr
  6571. xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
  6572. if (ctxt == NULL)
  6573. return(NULL);
  6574. if ((left == NULL) || (right == NULL)) {
  6575. xmlExpFree(ctxt, left);
  6576. xmlExpFree(ctxt, right);
  6577. return(NULL);
  6578. }
  6579. return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
  6580. }
  6581. /**
  6582. * xmlExpNewRange:
  6583. * @ctxt: the expression context
  6584. * @subset: the expression to be repeated
  6585. * @min: the lower bound for the repetition
  6586. * @max: the upper bound for the repetition, -1 means infinite
  6587. *
  6588. * Get the atom associated to the range (@subset){@min, @max}
  6589. * Note that @subset is consumed in the operation, to keep
  6590. * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
  6591. * this is true even in case of failure (unless ctxt == NULL).
  6592. *
  6593. * Returns the node or NULL in case of error
  6594. */
  6595. xmlExpNodePtr
  6596. xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
  6597. if (ctxt == NULL)
  6598. return(NULL);
  6599. if ((subset == NULL) || (min < 0) || (max < -1) ||
  6600. ((max >= 0) && (min > max))) {
  6601. xmlExpFree(ctxt, subset);
  6602. return(NULL);
  6603. }
  6604. return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
  6605. NULL, NULL, min, max));
  6606. }
  6607. /************************************************************************
  6608. * *
  6609. * Public API for operations on expressions *
  6610. * *
  6611. ************************************************************************/
  6612. static int
  6613. xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6614. const xmlChar**list, int len, int nb) {
  6615. int tmp, tmp2;
  6616. tail:
  6617. switch (exp->type) {
  6618. case XML_EXP_EMPTY:
  6619. return(0);
  6620. case XML_EXP_ATOM:
  6621. for (tmp = 0;tmp < nb;tmp++)
  6622. if (list[tmp] == exp->exp_str)
  6623. return(0);
  6624. if (nb >= len)
  6625. return(-2);
  6626. list[nb] = exp->exp_str;
  6627. return(1);
  6628. case XML_EXP_COUNT:
  6629. exp = exp->exp_left;
  6630. goto tail;
  6631. case XML_EXP_SEQ:
  6632. case XML_EXP_OR:
  6633. tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
  6634. if (tmp < 0)
  6635. return(tmp);
  6636. tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
  6637. nb + tmp);
  6638. if (tmp2 < 0)
  6639. return(tmp2);
  6640. return(tmp + tmp2);
  6641. }
  6642. return(-1);
  6643. }
  6644. /**
  6645. * xmlExpGetLanguage:
  6646. * @ctxt: the expression context
  6647. * @exp: the expression
  6648. * @langList: where to store the tokens
  6649. * @len: the allocated length of @list
  6650. *
  6651. * Find all the strings used in @exp and store them in @list
  6652. *
  6653. * Returns the number of unique strings found, -1 in case of errors and
  6654. * -2 if there is more than @len strings
  6655. */
  6656. int
  6657. xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6658. const xmlChar**langList, int len) {
  6659. if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
  6660. return(-1);
  6661. return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
  6662. }
  6663. static int
  6664. xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6665. const xmlChar**list, int len, int nb) {
  6666. int tmp, tmp2;
  6667. tail:
  6668. switch (exp->type) {
  6669. case XML_EXP_FORBID:
  6670. return(0);
  6671. case XML_EXP_EMPTY:
  6672. return(0);
  6673. case XML_EXP_ATOM:
  6674. for (tmp = 0;tmp < nb;tmp++)
  6675. if (list[tmp] == exp->exp_str)
  6676. return(0);
  6677. if (nb >= len)
  6678. return(-2);
  6679. list[nb] = exp->exp_str;
  6680. return(1);
  6681. case XML_EXP_COUNT:
  6682. exp = exp->exp_left;
  6683. goto tail;
  6684. case XML_EXP_SEQ:
  6685. tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
  6686. if (tmp < 0)
  6687. return(tmp);
  6688. if (IS_NILLABLE(exp->exp_left)) {
  6689. tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
  6690. nb + tmp);
  6691. if (tmp2 < 0)
  6692. return(tmp2);
  6693. tmp += tmp2;
  6694. }
  6695. return(tmp);
  6696. case XML_EXP_OR:
  6697. tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
  6698. if (tmp < 0)
  6699. return(tmp);
  6700. tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
  6701. nb + tmp);
  6702. if (tmp2 < 0)
  6703. return(tmp2);
  6704. return(tmp + tmp2);
  6705. }
  6706. return(-1);
  6707. }
  6708. /**
  6709. * xmlExpGetStart:
  6710. * @ctxt: the expression context
  6711. * @exp: the expression
  6712. * @tokList: where to store the tokens
  6713. * @len: the allocated length of @list
  6714. *
  6715. * Find all the strings that appears at the start of the languages
  6716. * accepted by @exp and store them in @list. E.g. for (a, b) | c
  6717. * it will return the list [a, c]
  6718. *
  6719. * Returns the number of unique strings found, -1 in case of errors and
  6720. * -2 if there is more than @len strings
  6721. */
  6722. int
  6723. xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6724. const xmlChar**tokList, int len) {
  6725. if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
  6726. return(-1);
  6727. return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
  6728. }
  6729. /**
  6730. * xmlExpIsNillable:
  6731. * @exp: the expression
  6732. *
  6733. * Finds if the expression is nillable, i.e. if it accepts the empty sequence
  6734. *
  6735. * Returns 1 if nillable, 0 if not and -1 in case of error
  6736. */
  6737. int
  6738. xmlExpIsNillable(xmlExpNodePtr exp) {
  6739. if (exp == NULL)
  6740. return(-1);
  6741. return(IS_NILLABLE(exp) != 0);
  6742. }
  6743. static xmlExpNodePtr
  6744. xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
  6745. {
  6746. xmlExpNodePtr ret;
  6747. switch (exp->type) {
  6748. case XML_EXP_EMPTY:
  6749. return(forbiddenExp);
  6750. case XML_EXP_FORBID:
  6751. return(forbiddenExp);
  6752. case XML_EXP_ATOM:
  6753. if (exp->exp_str == str) {
  6754. #ifdef DEBUG_DERIV
  6755. printf("deriv atom: equal => Empty\n");
  6756. #endif
  6757. ret = emptyExp;
  6758. } else {
  6759. #ifdef DEBUG_DERIV
  6760. printf("deriv atom: mismatch => forbid\n");
  6761. #endif
  6762. /* TODO wildcards here */
  6763. ret = forbiddenExp;
  6764. }
  6765. return(ret);
  6766. case XML_EXP_OR: {
  6767. xmlExpNodePtr tmp;
  6768. #ifdef DEBUG_DERIV
  6769. printf("deriv or: => or(derivs)\n");
  6770. #endif
  6771. tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
  6772. if (tmp == NULL) {
  6773. return(NULL);
  6774. }
  6775. ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
  6776. if (ret == NULL) {
  6777. xmlExpFree(ctxt, tmp);
  6778. return(NULL);
  6779. }
  6780. ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
  6781. NULL, 0, 0);
  6782. return(ret);
  6783. }
  6784. case XML_EXP_SEQ:
  6785. #ifdef DEBUG_DERIV
  6786. printf("deriv seq: starting with left\n");
  6787. #endif
  6788. ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
  6789. if (ret == NULL) {
  6790. return(NULL);
  6791. } else if (ret == forbiddenExp) {
  6792. if (IS_NILLABLE(exp->exp_left)) {
  6793. #ifdef DEBUG_DERIV
  6794. printf("deriv seq: left failed but nillable\n");
  6795. #endif
  6796. ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
  6797. }
  6798. } else {
  6799. #ifdef DEBUG_DERIV
  6800. printf("deriv seq: left match => sequence\n");
  6801. #endif
  6802. exp->exp_right->ref++;
  6803. ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
  6804. NULL, 0, 0);
  6805. }
  6806. return(ret);
  6807. case XML_EXP_COUNT: {
  6808. int min, max;
  6809. xmlExpNodePtr tmp;
  6810. if (exp->exp_max == 0)
  6811. return(forbiddenExp);
  6812. ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
  6813. if (ret == NULL)
  6814. return(NULL);
  6815. if (ret == forbiddenExp) {
  6816. #ifdef DEBUG_DERIV
  6817. printf("deriv count: pattern mismatch => forbid\n");
  6818. #endif
  6819. return(ret);
  6820. }
  6821. if (exp->exp_max == 1)
  6822. return(ret);
  6823. if (exp->exp_max < 0) /* unbounded */
  6824. max = -1;
  6825. else
  6826. max = exp->exp_max - 1;
  6827. if (exp->exp_min > 0)
  6828. min = exp->exp_min - 1;
  6829. else
  6830. min = 0;
  6831. exp->exp_left->ref++;
  6832. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
  6833. NULL, min, max);
  6834. if (ret == emptyExp) {
  6835. #ifdef DEBUG_DERIV
  6836. printf("deriv count: match to empty => new count\n");
  6837. #endif
  6838. return(tmp);
  6839. }
  6840. #ifdef DEBUG_DERIV
  6841. printf("deriv count: match => sequence with new count\n");
  6842. #endif
  6843. return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
  6844. NULL, 0, 0));
  6845. }
  6846. }
  6847. return(NULL);
  6848. }
  6849. /**
  6850. * xmlExpStringDerive:
  6851. * @ctxt: the expression context
  6852. * @exp: the expression
  6853. * @str: the string
  6854. * @len: the string len in bytes if available
  6855. *
  6856. * Do one step of Brzozowski derivation of the expression @exp with
  6857. * respect to the input string
  6858. *
  6859. * Returns the resulting expression or NULL in case of internal error
  6860. */
  6861. xmlExpNodePtr
  6862. xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6863. const xmlChar *str, int len) {
  6864. const xmlChar *input;
  6865. if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
  6866. return(NULL);
  6867. }
  6868. /*
  6869. * check the string is in the dictionary, if yes use an interned
  6870. * copy, otherwise we know it's not an acceptable input
  6871. */
  6872. input = xmlDictExists(ctxt->dict, str, len);
  6873. if (input == NULL) {
  6874. return(forbiddenExp);
  6875. }
  6876. return(xmlExpStringDeriveInt(ctxt, exp, input));
  6877. }
  6878. static int
  6879. xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
  6880. int ret = 1;
  6881. if (sub->c_max == -1) {
  6882. if (exp->c_max != -1)
  6883. ret = 0;
  6884. } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
  6885. ret = 0;
  6886. }
  6887. #if 0
  6888. if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
  6889. ret = 0;
  6890. #endif
  6891. return(ret);
  6892. }
  6893. static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  6894. xmlExpNodePtr sub);
  6895. /**
  6896. * xmlExpDivide:
  6897. * @ctxt: the expressions context
  6898. * @exp: the englobing expression
  6899. * @sub: the subexpression
  6900. * @mult: the multiple expression
  6901. * @remain: the remain from the derivation of the multiple
  6902. *
  6903. * Check if exp is a multiple of sub, i.e. if there is a finite number n
  6904. * so that sub{n} subsume exp
  6905. *
  6906. * Returns the multiple value if successful, 0 if it is not a multiple
  6907. * and -1 in case of internal error.
  6908. */
  6909. static int
  6910. xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
  6911. xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
  6912. int i;
  6913. xmlExpNodePtr tmp, tmp2;
  6914. if (mult != NULL) *mult = NULL;
  6915. if (remain != NULL) *remain = NULL;
  6916. if (exp->c_max == -1) return(0);
  6917. if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
  6918. for (i = 1;i <= exp->c_max;i++) {
  6919. sub->ref++;
  6920. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
  6921. sub, NULL, NULL, i, i);
  6922. if (tmp == NULL) {
  6923. return(-1);
  6924. }
  6925. if (!xmlExpCheckCard(tmp, exp)) {
  6926. xmlExpFree(ctxt, tmp);
  6927. continue;
  6928. }
  6929. tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
  6930. if (tmp2 == NULL) {
  6931. xmlExpFree(ctxt, tmp);
  6932. return(-1);
  6933. }
  6934. if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
  6935. if (remain != NULL)
  6936. *remain = tmp2;
  6937. else
  6938. xmlExpFree(ctxt, tmp2);
  6939. if (mult != NULL)
  6940. *mult = tmp;
  6941. else
  6942. xmlExpFree(ctxt, tmp);
  6943. #ifdef DEBUG_DERIV
  6944. printf("Divide succeeded %d\n", i);
  6945. #endif
  6946. return(i);
  6947. }
  6948. xmlExpFree(ctxt, tmp);
  6949. xmlExpFree(ctxt, tmp2);
  6950. }
  6951. #ifdef DEBUG_DERIV
  6952. printf("Divide failed\n");
  6953. #endif
  6954. return(0);
  6955. }
  6956. /**
  6957. * xmlExpExpDeriveInt:
  6958. * @ctxt: the expressions context
  6959. * @exp: the englobing expression
  6960. * @sub: the subexpression
  6961. *
  6962. * Try to do a step of Brzozowski derivation but at a higher level
  6963. * the input being a subexpression.
  6964. *
  6965. * Returns the resulting expression or NULL in case of internal error
  6966. */
  6967. static xmlExpNodePtr
  6968. xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  6969. xmlExpNodePtr ret, tmp, tmp2, tmp3;
  6970. const xmlChar **tab;
  6971. int len, i;
  6972. /*
  6973. * In case of equality and if the expression can only consume a finite
  6974. * amount, then the derivation is empty
  6975. */
  6976. if ((exp == sub) && (exp->c_max >= 0)) {
  6977. #ifdef DEBUG_DERIV
  6978. printf("Equal(exp, sub) and finite -> Empty\n");
  6979. #endif
  6980. return(emptyExp);
  6981. }
  6982. /*
  6983. * decompose sub sequence first
  6984. */
  6985. if (sub->type == XML_EXP_EMPTY) {
  6986. #ifdef DEBUG_DERIV
  6987. printf("Empty(sub) -> Empty\n");
  6988. #endif
  6989. exp->ref++;
  6990. return(exp);
  6991. }
  6992. if (sub->type == XML_EXP_SEQ) {
  6993. #ifdef DEBUG_DERIV
  6994. printf("Seq(sub) -> decompose\n");
  6995. #endif
  6996. tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
  6997. if (tmp == NULL)
  6998. return(NULL);
  6999. if (tmp == forbiddenExp)
  7000. return(tmp);
  7001. ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
  7002. xmlExpFree(ctxt, tmp);
  7003. return(ret);
  7004. }
  7005. if (sub->type == XML_EXP_OR) {
  7006. #ifdef DEBUG_DERIV
  7007. printf("Or(sub) -> decompose\n");
  7008. #endif
  7009. tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
  7010. if (tmp == forbiddenExp)
  7011. return(tmp);
  7012. if (tmp == NULL)
  7013. return(NULL);
  7014. ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
  7015. if ((ret == NULL) || (ret == forbiddenExp)) {
  7016. xmlExpFree(ctxt, tmp);
  7017. return(ret);
  7018. }
  7019. return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
  7020. }
  7021. if (!xmlExpCheckCard(exp, sub)) {
  7022. #ifdef DEBUG_DERIV
  7023. printf("CheckCard(exp, sub) failed -> Forbid\n");
  7024. #endif
  7025. return(forbiddenExp);
  7026. }
  7027. switch (exp->type) {
  7028. case XML_EXP_EMPTY:
  7029. if (sub == emptyExp)
  7030. return(emptyExp);
  7031. #ifdef DEBUG_DERIV
  7032. printf("Empty(exp) -> Forbid\n");
  7033. #endif
  7034. return(forbiddenExp);
  7035. case XML_EXP_FORBID:
  7036. #ifdef DEBUG_DERIV
  7037. printf("Forbid(exp) -> Forbid\n");
  7038. #endif
  7039. return(forbiddenExp);
  7040. case XML_EXP_ATOM:
  7041. if (sub->type == XML_EXP_ATOM) {
  7042. /* TODO: handle wildcards */
  7043. if (exp->exp_str == sub->exp_str) {
  7044. #ifdef DEBUG_DERIV
  7045. printf("Atom match -> Empty\n");
  7046. #endif
  7047. return(emptyExp);
  7048. }
  7049. #ifdef DEBUG_DERIV
  7050. printf("Atom mismatch -> Forbid\n");
  7051. #endif
  7052. return(forbiddenExp);
  7053. }
  7054. if ((sub->type == XML_EXP_COUNT) &&
  7055. (sub->exp_max == 1) &&
  7056. (sub->exp_left->type == XML_EXP_ATOM)) {
  7057. /* TODO: handle wildcards */
  7058. if (exp->exp_str == sub->exp_left->exp_str) {
  7059. #ifdef DEBUG_DERIV
  7060. printf("Atom match -> Empty\n");
  7061. #endif
  7062. return(emptyExp);
  7063. }
  7064. #ifdef DEBUG_DERIV
  7065. printf("Atom mismatch -> Forbid\n");
  7066. #endif
  7067. return(forbiddenExp);
  7068. }
  7069. #ifdef DEBUG_DERIV
  7070. printf("Complex exp vs Atom -> Forbid\n");
  7071. #endif
  7072. return(forbiddenExp);
  7073. case XML_EXP_SEQ:
  7074. /* try to get the sequence consumed only if possible */
  7075. if (xmlExpCheckCard(exp->exp_left, sub)) {
  7076. /* See if the sequence can be consumed directly */
  7077. #ifdef DEBUG_DERIV
  7078. printf("Seq trying left only\n");
  7079. #endif
  7080. ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
  7081. if ((ret != forbiddenExp) && (ret != NULL)) {
  7082. #ifdef DEBUG_DERIV
  7083. printf("Seq trying left only worked\n");
  7084. #endif
  7085. /*
  7086. * TODO: assumption here that we are determinist
  7087. * i.e. we won't get to a nillable exp left
  7088. * subset which could be matched by the right
  7089. * part too.
  7090. * e.g.: (a | b)+,(a | c) and 'a+,a'
  7091. */
  7092. exp->exp_right->ref++;
  7093. return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
  7094. exp->exp_right, NULL, 0, 0));
  7095. }
  7096. #ifdef DEBUG_DERIV
  7097. } else {
  7098. printf("Seq: left too short\n");
  7099. #endif
  7100. }
  7101. /* Try instead to decompose */
  7102. if (sub->type == XML_EXP_COUNT) {
  7103. int min, max;
  7104. #ifdef DEBUG_DERIV
  7105. printf("Seq: sub is a count\n");
  7106. #endif
  7107. ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
  7108. if (ret == NULL)
  7109. return(NULL);
  7110. if (ret != forbiddenExp) {
  7111. #ifdef DEBUG_DERIV
  7112. printf("Seq , Count match on left\n");
  7113. #endif
  7114. if (sub->exp_max < 0)
  7115. max = -1;
  7116. else
  7117. max = sub->exp_max -1;
  7118. if (sub->exp_min > 0)
  7119. min = sub->exp_min -1;
  7120. else
  7121. min = 0;
  7122. exp->exp_right->ref++;
  7123. tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
  7124. exp->exp_right, NULL, 0, 0);
  7125. if (tmp == NULL)
  7126. return(NULL);
  7127. sub->exp_left->ref++;
  7128. tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
  7129. sub->exp_left, NULL, NULL, min, max);
  7130. if (tmp2 == NULL) {
  7131. xmlExpFree(ctxt, tmp);
  7132. return(NULL);
  7133. }
  7134. ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
  7135. xmlExpFree(ctxt, tmp);
  7136. xmlExpFree(ctxt, tmp2);
  7137. return(ret);
  7138. }
  7139. }
  7140. /* we made no progress on structured operations */
  7141. break;
  7142. case XML_EXP_OR:
  7143. #ifdef DEBUG_DERIV
  7144. printf("Or , trying both side\n");
  7145. #endif
  7146. ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
  7147. if (ret == NULL)
  7148. return(NULL);
  7149. tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
  7150. if (tmp == NULL) {
  7151. xmlExpFree(ctxt, ret);
  7152. return(NULL);
  7153. }
  7154. return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
  7155. case XML_EXP_COUNT: {
  7156. int min, max;
  7157. if (sub->type == XML_EXP_COUNT) {
  7158. /*
  7159. * Try to see if the loop is completely subsumed
  7160. */
  7161. tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
  7162. if (tmp == NULL)
  7163. return(NULL);
  7164. if (tmp == forbiddenExp) {
  7165. int mult;
  7166. #ifdef DEBUG_DERIV
  7167. printf("Count, Count inner don't subsume\n");
  7168. #endif
  7169. mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
  7170. NULL, &tmp);
  7171. if (mult <= 0) {
  7172. #ifdef DEBUG_DERIV
  7173. printf("Count, Count not multiple => forbidden\n");
  7174. #endif
  7175. return(forbiddenExp);
  7176. }
  7177. if (sub->exp_max == -1) {
  7178. max = -1;
  7179. if (exp->exp_max == -1) {
  7180. if (exp->exp_min <= sub->exp_min * mult)
  7181. min = 0;
  7182. else
  7183. min = exp->exp_min - sub->exp_min * mult;
  7184. } else {
  7185. #ifdef DEBUG_DERIV
  7186. printf("Count, Count finite can't subsume infinite\n");
  7187. #endif
  7188. xmlExpFree(ctxt, tmp);
  7189. return(forbiddenExp);
  7190. }
  7191. } else {
  7192. if (exp->exp_max == -1) {
  7193. #ifdef DEBUG_DERIV
  7194. printf("Infinite loop consume mult finite loop\n");
  7195. #endif
  7196. if (exp->exp_min > sub->exp_min * mult) {
  7197. max = -1;
  7198. min = exp->exp_min - sub->exp_min * mult;
  7199. } else {
  7200. max = -1;
  7201. min = 0;
  7202. }
  7203. } else {
  7204. if (exp->exp_max < sub->exp_max * mult) {
  7205. #ifdef DEBUG_DERIV
  7206. printf("loops max mult mismatch => forbidden\n");
  7207. #endif
  7208. xmlExpFree(ctxt, tmp);
  7209. return(forbiddenExp);
  7210. }
  7211. if (sub->exp_max * mult > exp->exp_min)
  7212. min = 0;
  7213. else
  7214. min = exp->exp_min - sub->exp_max * mult;
  7215. max = exp->exp_max - sub->exp_max * mult;
  7216. }
  7217. }
  7218. } else if (!IS_NILLABLE(tmp)) {
  7219. /*
  7220. * TODO: loop here to try to grow if working on finite
  7221. * blocks.
  7222. */
  7223. #ifdef DEBUG_DERIV
  7224. printf("Count, Count remain not nillable => forbidden\n");
  7225. #endif
  7226. xmlExpFree(ctxt, tmp);
  7227. return(forbiddenExp);
  7228. } else if (sub->exp_max == -1) {
  7229. if (exp->exp_max == -1) {
  7230. if (exp->exp_min <= sub->exp_min) {
  7231. #ifdef DEBUG_DERIV
  7232. printf("Infinite loops Okay => COUNT(0,Inf)\n");
  7233. #endif
  7234. max = -1;
  7235. min = 0;
  7236. } else {
  7237. #ifdef DEBUG_DERIV
  7238. printf("Infinite loops min => Count(X,Inf)\n");
  7239. #endif
  7240. max = -1;
  7241. min = exp->exp_min - sub->exp_min;
  7242. }
  7243. } else if (exp->exp_min > sub->exp_min) {
  7244. #ifdef DEBUG_DERIV
  7245. printf("loops min mismatch 1 => forbidden ???\n");
  7246. #endif
  7247. xmlExpFree(ctxt, tmp);
  7248. return(forbiddenExp);
  7249. } else {
  7250. max = -1;
  7251. min = 0;
  7252. }
  7253. } else {
  7254. if (exp->exp_max == -1) {
  7255. #ifdef DEBUG_DERIV
  7256. printf("Infinite loop consume finite loop\n");
  7257. #endif
  7258. if (exp->exp_min > sub->exp_min) {
  7259. max = -1;
  7260. min = exp->exp_min - sub->exp_min;
  7261. } else {
  7262. max = -1;
  7263. min = 0;
  7264. }
  7265. } else {
  7266. if (exp->exp_max < sub->exp_max) {
  7267. #ifdef DEBUG_DERIV
  7268. printf("loops max mismatch => forbidden\n");
  7269. #endif
  7270. xmlExpFree(ctxt, tmp);
  7271. return(forbiddenExp);
  7272. }
  7273. if (sub->exp_max > exp->exp_min)
  7274. min = 0;
  7275. else
  7276. min = exp->exp_min - sub->exp_max;
  7277. max = exp->exp_max - sub->exp_max;
  7278. }
  7279. }
  7280. #ifdef DEBUG_DERIV
  7281. printf("loops match => SEQ(COUNT())\n");
  7282. #endif
  7283. exp->exp_left->ref++;
  7284. tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
  7285. NULL, NULL, min, max);
  7286. if (tmp2 == NULL) {
  7287. return(NULL);
  7288. }
  7289. ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
  7290. NULL, 0, 0);
  7291. return(ret);
  7292. }
  7293. tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
  7294. if (tmp == NULL)
  7295. return(NULL);
  7296. if (tmp == forbiddenExp) {
  7297. #ifdef DEBUG_DERIV
  7298. printf("loop mismatch => forbidden\n");
  7299. #endif
  7300. return(forbiddenExp);
  7301. }
  7302. if (exp->exp_min > 0)
  7303. min = exp->exp_min - 1;
  7304. else
  7305. min = 0;
  7306. if (exp->exp_max < 0)
  7307. max = -1;
  7308. else
  7309. max = exp->exp_max - 1;
  7310. #ifdef DEBUG_DERIV
  7311. printf("loop match => SEQ(COUNT())\n");
  7312. #endif
  7313. exp->exp_left->ref++;
  7314. tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
  7315. NULL, NULL, min, max);
  7316. if (tmp2 == NULL)
  7317. return(NULL);
  7318. ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
  7319. NULL, 0, 0);
  7320. return(ret);
  7321. }
  7322. }
  7323. #ifdef DEBUG_DERIV
  7324. printf("Fallback to derivative\n");
  7325. #endif
  7326. if (IS_NILLABLE(sub)) {
  7327. if (!(IS_NILLABLE(exp)))
  7328. return(forbiddenExp);
  7329. else
  7330. ret = emptyExp;
  7331. } else
  7332. ret = NULL;
  7333. /*
  7334. * here the structured derivation made no progress so
  7335. * we use the default token based derivation to force one more step
  7336. */
  7337. if (ctxt->tabSize == 0)
  7338. ctxt->tabSize = 40;
  7339. tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
  7340. sizeof(const xmlChar *));
  7341. if (tab == NULL) {
  7342. return(NULL);
  7343. }
  7344. /*
  7345. * collect all the strings accepted by the subexpression on input
  7346. */
  7347. len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
  7348. while (len < 0) {
  7349. const xmlChar **temp;
  7350. temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
  7351. sizeof(const xmlChar *));
  7352. if (temp == NULL) {
  7353. xmlFree((xmlChar **) tab);
  7354. return(NULL);
  7355. }
  7356. tab = temp;
  7357. ctxt->tabSize *= 2;
  7358. len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
  7359. }
  7360. for (i = 0;i < len;i++) {
  7361. tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
  7362. if ((tmp == NULL) || (tmp == forbiddenExp)) {
  7363. xmlExpFree(ctxt, ret);
  7364. xmlFree((xmlChar **) tab);
  7365. return(tmp);
  7366. }
  7367. tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
  7368. if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
  7369. xmlExpFree(ctxt, tmp);
  7370. xmlExpFree(ctxt, ret);
  7371. xmlFree((xmlChar **) tab);
  7372. return(tmp);
  7373. }
  7374. tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
  7375. xmlExpFree(ctxt, tmp);
  7376. xmlExpFree(ctxt, tmp2);
  7377. if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
  7378. xmlExpFree(ctxt, ret);
  7379. xmlFree((xmlChar **) tab);
  7380. return(tmp3);
  7381. }
  7382. if (ret == NULL)
  7383. ret = tmp3;
  7384. else {
  7385. ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
  7386. if (ret == NULL) {
  7387. xmlFree((xmlChar **) tab);
  7388. return(NULL);
  7389. }
  7390. }
  7391. }
  7392. xmlFree((xmlChar **) tab);
  7393. return(ret);
  7394. }
  7395. /**
  7396. * xmlExpExpDerive:
  7397. * @ctxt: the expressions context
  7398. * @exp: the englobing expression
  7399. * @sub: the subexpression
  7400. *
  7401. * Evaluates the expression resulting from @exp consuming a sub expression @sub
  7402. * Based on algebraic derivation and sometimes direct Brzozowski derivation
  7403. * it usually takes less than linear time and can handle expressions generating
  7404. * infinite languages.
  7405. *
  7406. * Returns the resulting expression or NULL in case of internal error, the
  7407. * result must be freed
  7408. */
  7409. xmlExpNodePtr
  7410. xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  7411. if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
  7412. return(NULL);
  7413. /*
  7414. * O(1) speedups
  7415. */
  7416. if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
  7417. #ifdef DEBUG_DERIV
  7418. printf("Sub nillable and not exp : can't subsume\n");
  7419. #endif
  7420. return(forbiddenExp);
  7421. }
  7422. if (xmlExpCheckCard(exp, sub) == 0) {
  7423. #ifdef DEBUG_DERIV
  7424. printf("sub generate longer sequences than exp : can't subsume\n");
  7425. #endif
  7426. return(forbiddenExp);
  7427. }
  7428. return(xmlExpExpDeriveInt(ctxt, exp, sub));
  7429. }
  7430. /**
  7431. * xmlExpSubsume:
  7432. * @ctxt: the expressions context
  7433. * @exp: the englobing expression
  7434. * @sub: the subexpression
  7435. *
  7436. * Check whether @exp accepts all the languages accepted by @sub
  7437. * the input being a subexpression.
  7438. *
  7439. * Returns 1 if true 0 if false and -1 in case of failure.
  7440. */
  7441. int
  7442. xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  7443. xmlExpNodePtr tmp;
  7444. if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
  7445. return(-1);
  7446. /*
  7447. * TODO: speedup by checking the language of sub is a subset of the
  7448. * language of exp
  7449. */
  7450. /*
  7451. * O(1) speedups
  7452. */
  7453. if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
  7454. #ifdef DEBUG_DERIV
  7455. printf("Sub nillable and not exp : can't subsume\n");
  7456. #endif
  7457. return(0);
  7458. }
  7459. if (xmlExpCheckCard(exp, sub) == 0) {
  7460. #ifdef DEBUG_DERIV
  7461. printf("sub generate longer sequences than exp : can't subsume\n");
  7462. #endif
  7463. return(0);
  7464. }
  7465. tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
  7466. #ifdef DEBUG_DERIV
  7467. printf("Result derivation :\n");
  7468. PRINT_EXP(tmp);
  7469. #endif
  7470. if (tmp == NULL)
  7471. return(-1);
  7472. if (tmp == forbiddenExp)
  7473. return(0);
  7474. if (tmp == emptyExp)
  7475. return(1);
  7476. if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
  7477. xmlExpFree(ctxt, tmp);
  7478. return(1);
  7479. }
  7480. xmlExpFree(ctxt, tmp);
  7481. return(0);
  7482. }
  7483. /************************************************************************
  7484. * *
  7485. * Parsing expression *
  7486. * *
  7487. ************************************************************************/
  7488. static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
  7489. #undef CUR
  7490. #define CUR (*ctxt->cur)
  7491. #undef NEXT
  7492. #define NEXT ctxt->cur++;
  7493. #undef IS_BLANK
  7494. #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
  7495. #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
  7496. static int
  7497. xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
  7498. int ret = 0;
  7499. SKIP_BLANKS
  7500. if (CUR == '*') {
  7501. NEXT
  7502. return(-1);
  7503. }
  7504. if ((CUR < '0') || (CUR > '9'))
  7505. return(-1);
  7506. while ((CUR >= '0') && (CUR <= '9')) {
  7507. ret = ret * 10 + (CUR - '0');
  7508. NEXT
  7509. }
  7510. return(ret);
  7511. }
  7512. static xmlExpNodePtr
  7513. xmlExpParseOr(xmlExpCtxtPtr ctxt) {
  7514. const char *base;
  7515. xmlExpNodePtr ret;
  7516. const xmlChar *val;
  7517. SKIP_BLANKS
  7518. base = ctxt->cur;
  7519. if (*ctxt->cur == '(') {
  7520. NEXT
  7521. ret = xmlExpParseExpr(ctxt);
  7522. SKIP_BLANKS
  7523. if (*ctxt->cur != ')') {
  7524. fprintf(stderr, "unbalanced '(' : %s\n", base);
  7525. xmlExpFree(ctxt, ret);
  7526. return(NULL);
  7527. }
  7528. NEXT;
  7529. SKIP_BLANKS
  7530. goto parse_quantifier;
  7531. }
  7532. while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
  7533. (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
  7534. (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
  7535. NEXT;
  7536. val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
  7537. if (val == NULL)
  7538. return(NULL);
  7539. ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
  7540. if (ret == NULL)
  7541. return(NULL);
  7542. SKIP_BLANKS
  7543. parse_quantifier:
  7544. if (CUR == '{') {
  7545. int min, max;
  7546. NEXT
  7547. min = xmlExpParseNumber(ctxt);
  7548. if (min < 0) {
  7549. xmlExpFree(ctxt, ret);
  7550. return(NULL);
  7551. }
  7552. SKIP_BLANKS
  7553. if (CUR == ',') {
  7554. NEXT
  7555. max = xmlExpParseNumber(ctxt);
  7556. SKIP_BLANKS
  7557. } else
  7558. max = min;
  7559. if (CUR != '}') {
  7560. xmlExpFree(ctxt, ret);
  7561. return(NULL);
  7562. }
  7563. NEXT
  7564. ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
  7565. min, max);
  7566. SKIP_BLANKS
  7567. } else if (CUR == '?') {
  7568. NEXT
  7569. ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
  7570. 0, 1);
  7571. SKIP_BLANKS
  7572. } else if (CUR == '+') {
  7573. NEXT
  7574. ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
  7575. 1, -1);
  7576. SKIP_BLANKS
  7577. } else if (CUR == '*') {
  7578. NEXT
  7579. ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
  7580. 0, -1);
  7581. SKIP_BLANKS
  7582. }
  7583. return(ret);
  7584. }
  7585. static xmlExpNodePtr
  7586. xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
  7587. xmlExpNodePtr ret, right;
  7588. ret = xmlExpParseOr(ctxt);
  7589. SKIP_BLANKS
  7590. while (CUR == '|') {
  7591. NEXT
  7592. right = xmlExpParseOr(ctxt);
  7593. if (right == NULL) {
  7594. xmlExpFree(ctxt, ret);
  7595. return(NULL);
  7596. }
  7597. ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
  7598. if (ret == NULL)
  7599. return(NULL);
  7600. }
  7601. return(ret);
  7602. }
  7603. static xmlExpNodePtr
  7604. xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
  7605. xmlExpNodePtr ret, right;
  7606. ret = xmlExpParseSeq(ctxt);
  7607. SKIP_BLANKS
  7608. while (CUR == ',') {
  7609. NEXT
  7610. right = xmlExpParseSeq(ctxt);
  7611. if (right == NULL) {
  7612. xmlExpFree(ctxt, ret);
  7613. return(NULL);
  7614. }
  7615. ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
  7616. if (ret == NULL)
  7617. return(NULL);
  7618. }
  7619. return(ret);
  7620. }
  7621. /**
  7622. * xmlExpParse:
  7623. * @ctxt: the expressions context
  7624. * @expr: the 0 terminated string
  7625. *
  7626. * Minimal parser for regexps, it understand the following constructs
  7627. * - string terminals
  7628. * - choice operator |
  7629. * - sequence operator ,
  7630. * - subexpressions (...)
  7631. * - usual cardinality operators + * and ?
  7632. * - finite sequences { min, max }
  7633. * - infinite sequences { min, * }
  7634. * There is minimal checkings made especially no checking on strings values
  7635. *
  7636. * Returns a new expression or NULL in case of failure
  7637. */
  7638. xmlExpNodePtr
  7639. xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
  7640. xmlExpNodePtr ret;
  7641. ctxt->expr = expr;
  7642. ctxt->cur = expr;
  7643. ret = xmlExpParseExpr(ctxt);
  7644. SKIP_BLANKS
  7645. if (*ctxt->cur != 0) {
  7646. xmlExpFree(ctxt, ret);
  7647. return(NULL);
  7648. }
  7649. return(ret);
  7650. }
  7651. static void
  7652. xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
  7653. xmlExpNodePtr c;
  7654. if (expr == NULL) return;
  7655. if (glob) xmlBufferWriteChar(buf, "(");
  7656. switch (expr->type) {
  7657. case XML_EXP_EMPTY:
  7658. xmlBufferWriteChar(buf, "empty");
  7659. break;
  7660. case XML_EXP_FORBID:
  7661. xmlBufferWriteChar(buf, "forbidden");
  7662. break;
  7663. case XML_EXP_ATOM:
  7664. xmlBufferWriteCHAR(buf, expr->exp_str);
  7665. break;
  7666. case XML_EXP_SEQ:
  7667. c = expr->exp_left;
  7668. if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
  7669. xmlExpDumpInt(buf, c, 1);
  7670. else
  7671. xmlExpDumpInt(buf, c, 0);
  7672. xmlBufferWriteChar(buf, " , ");
  7673. c = expr->exp_right;
  7674. if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
  7675. xmlExpDumpInt(buf, c, 1);
  7676. else
  7677. xmlExpDumpInt(buf, c, 0);
  7678. break;
  7679. case XML_EXP_OR:
  7680. c = expr->exp_left;
  7681. if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
  7682. xmlExpDumpInt(buf, c, 1);
  7683. else
  7684. xmlExpDumpInt(buf, c, 0);
  7685. xmlBufferWriteChar(buf, " | ");
  7686. c = expr->exp_right;
  7687. if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
  7688. xmlExpDumpInt(buf, c, 1);
  7689. else
  7690. xmlExpDumpInt(buf, c, 0);
  7691. break;
  7692. case XML_EXP_COUNT: {
  7693. char rep[40];
  7694. c = expr->exp_left;
  7695. if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
  7696. xmlExpDumpInt(buf, c, 1);
  7697. else
  7698. xmlExpDumpInt(buf, c, 0);
  7699. if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
  7700. rep[0] = '?';
  7701. rep[1] = 0;
  7702. } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
  7703. rep[0] = '*';
  7704. rep[1] = 0;
  7705. } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
  7706. rep[0] = '+';
  7707. rep[1] = 0;
  7708. } else if (expr->exp_max == expr->exp_min) {
  7709. snprintf(rep, 39, "{%d}", expr->exp_min);
  7710. } else if (expr->exp_max < 0) {
  7711. snprintf(rep, 39, "{%d,inf}", expr->exp_min);
  7712. } else {
  7713. snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
  7714. }
  7715. rep[39] = 0;
  7716. xmlBufferWriteChar(buf, rep);
  7717. break;
  7718. }
  7719. default:
  7720. fprintf(stderr, "Error in tree\n");
  7721. }
  7722. if (glob)
  7723. xmlBufferWriteChar(buf, ")");
  7724. }
  7725. /**
  7726. * xmlExpDump:
  7727. * @buf: a buffer to receive the output
  7728. * @expr: the compiled expression
  7729. *
  7730. * Serialize the expression as compiled to the buffer
  7731. */
  7732. void
  7733. xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
  7734. if ((buf == NULL) || (expr == NULL))
  7735. return;
  7736. xmlExpDumpInt(buf, expr, 0);
  7737. }
  7738. /**
  7739. * xmlExpMaxToken:
  7740. * @expr: a compiled expression
  7741. *
  7742. * Indicate the maximum number of input a expression can accept
  7743. *
  7744. * Returns the maximum length or -1 in case of error
  7745. */
  7746. int
  7747. xmlExpMaxToken(xmlExpNodePtr expr) {
  7748. if (expr == NULL)
  7749. return(-1);
  7750. return(expr->c_max);
  7751. }
  7752. /**
  7753. * xmlExpCtxtNbNodes:
  7754. * @ctxt: an expression context
  7755. *
  7756. * Debugging facility provides the number of allocated nodes at a that point
  7757. *
  7758. * Returns the number of nodes in use or -1 in case of error
  7759. */
  7760. int
  7761. xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
  7762. if (ctxt == NULL)
  7763. return(-1);
  7764. return(ctxt->nb_nodes);
  7765. }
  7766. /**
  7767. * xmlExpCtxtNbCons:
  7768. * @ctxt: an expression context
  7769. *
  7770. * Debugging facility provides the number of allocated nodes over lifetime
  7771. *
  7772. * Returns the number of nodes ever allocated or -1 in case of error
  7773. */
  7774. int
  7775. xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
  7776. if (ctxt == NULL)
  7777. return(-1);
  7778. return(ctxt->nb_cons);
  7779. }
  7780. #endif /* LIBXML_EXPR_ENABLED */
  7781. #define bottom_xmlregexp
  7782. #include "elfgcchack.h"
  7783. #endif /* LIBXML_REGEXP_ENABLED */