parsetree.cpp 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139
  1. /*
  2. * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
  3. */
  4. /* This file is part of Ragel.
  5. *
  6. * Ragel is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Ragel is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Ragel; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. */
  20. #include <iostream>
  21. #include <iomanip>
  22. #include <errno.h>
  23. #include <limits.h>
  24. #include <stdlib.h>
  25. /* Parsing. */
  26. #include "ragel.h"
  27. #include "rlparse.h"
  28. #include "parsetree.h"
  29. using namespace std;
  30. ostream &operator<<( ostream &out, const NameRef &nameRef );
  31. ostream &operator<<( ostream &out, const NameInst &nameInst );
  32. /* Convert the literal string which comes in from the scanner into an array of
  33. * characters with escapes and options interpreted. Also null terminates the
  34. * string. Though this null termination should not be relied on for
  35. * interpreting literals in the parser because the string may contain \0 */
  36. char *prepareLitString( const InputLoc &loc, const char *data, long length,
  37. long &resLen, bool &caseInsensitive )
  38. {
  39. char *resData = new char[length+1];
  40. caseInsensitive = false;
  41. const char *src = data + 1;
  42. const char *end = data + length - 1;
  43. while ( *end != '\'' && *end != '\"' ) {
  44. if ( *end == 'i' )
  45. caseInsensitive = true;
  46. else {
  47. error( loc ) << "literal string '" << *end <<
  48. "' option not supported" << endl;
  49. }
  50. end -= 1;
  51. }
  52. char *dest = resData;
  53. long len = 0;
  54. while ( src != end ) {
  55. if ( *src == '\\' ) {
  56. switch ( src[1] ) {
  57. case '0': dest[len++] = '\0'; break;
  58. case 'a': dest[len++] = '\a'; break;
  59. case 'b': dest[len++] = '\b'; break;
  60. case 't': dest[len++] = '\t'; break;
  61. case 'n': dest[len++] = '\n'; break;
  62. case 'v': dest[len++] = '\v'; break;
  63. case 'f': dest[len++] = '\f'; break;
  64. case 'r': dest[len++] = '\r'; break;
  65. case '\n': break;
  66. default: dest[len++] = src[1]; break;
  67. }
  68. src += 2;
  69. }
  70. else {
  71. dest[len++] = *src++;
  72. }
  73. }
  74. resLen = len;
  75. resData[resLen] = 0;
  76. return resData;
  77. }
  78. FsmAp *VarDef::walk( ParseData *pd )
  79. {
  80. /* We enter into a new name scope. */
  81. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  82. /* Recurse on the expression. */
  83. FsmAp *rtnVal = machineDef->walk( pd );
  84. /* Do the tranfer of local error actions. */
  85. LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  86. if ( localErrDictEl != 0 ) {
  87. for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
  88. rtnVal->transferErrorActions( state, localErrDictEl->value );
  89. }
  90. /* If the expression below is a join operation with multiple expressions
  91. * then it just had epsilon transisions resolved. If it is a join
  92. * with only a single expression then run the epsilon op now. */
  93. if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
  94. rtnVal->epsilonOp();
  95. /* We can now unset entry points that are not longer used. */
  96. pd->unsetObsoleteEntries( rtnVal );
  97. /* If the name of the variable is referenced then add the entry point to
  98. * the graph. */
  99. if ( pd->curNameInst->numRefs > 0 )
  100. rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
  101. /* Pop the name scope. */
  102. pd->popNameScope( nameFrame );
  103. return rtnVal;
  104. }
  105. void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
  106. {
  107. /* The variable definition enters a new scope. */
  108. NameInst *prevNameInst = pd->curNameInst;
  109. pd->curNameInst = pd->addNameInst( loc, name, false );
  110. if ( machineDef->type == MachineDef::LongestMatchType )
  111. pd->curNameInst->isLongestMatch = true;
  112. /* Recurse. */
  113. machineDef->makeNameTree( pd );
  114. /* The name scope ends, pop the name instantiation. */
  115. pd->curNameInst = prevNameInst;
  116. }
  117. void VarDef::resolveNameRefs( ParseData *pd )
  118. {
  119. /* Entering into a new scope. */
  120. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  121. /* Recurse. */
  122. machineDef->resolveNameRefs( pd );
  123. /* The name scope ends, pop the name instantiation. */
  124. pd->popNameScope( nameFrame );
  125. }
  126. InputLoc LongestMatchPart::getLoc()
  127. {
  128. return action != 0 ? action->loc : semiLoc;
  129. }
  130. /*
  131. * If there are any LMs then all of the following entry points must reset
  132. * tokstart:
  133. *
  134. * 1. fentry(StateRef)
  135. * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
  136. * 3. targt of any transition that has an fcall (the return loc).
  137. * 4. start state of all longest match routines.
  138. */
  139. Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
  140. const char *name, InlineList *inlineList )
  141. {
  142. Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
  143. action->actionRefs.append( pd->curNameInst );
  144. pd->actionList.append( action );
  145. action->isLmAction = true;
  146. return action;
  147. }
  148. void LongestMatch::makeActions( ParseData *pd )
  149. {
  150. /* Make actions that set the action id. */
  151. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
  152. /* For each part create actions for setting the match type. We need
  153. * to do this so that the actions will go into the actionIndex. */
  154. InlineList *inlineList = new InlineList;
  155. inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
  156. InlineItem::LmSetActId ) );
  157. char *actName = new char[50];
  158. sprintf( actName, "store%i", lmi->longestMatchId );
  159. lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
  160. }
  161. /* Make actions that execute the user action and restart on the last
  162. * character. */
  163. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
  164. /* For each part create actions for setting the match type. We need
  165. * to do this so that the actions will go into the actionIndex. */
  166. InlineList *inlineList = new InlineList;
  167. inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
  168. InlineItem::LmOnLast ) );
  169. char *actName = new char[50];
  170. sprintf( actName, "last%i", lmi->longestMatchId );
  171. lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
  172. }
  173. /* Make actions that execute the user action and restart on the next
  174. * character. These actions will set tokend themselves (it is the current
  175. * char). */
  176. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
  177. /* For each part create actions for setting the match type. We need
  178. * to do this so that the actions will go into the actionIndex. */
  179. InlineList *inlineList = new InlineList;
  180. inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
  181. InlineItem::LmOnNext ) );
  182. char *actName = new char[50];
  183. sprintf( actName, "next%i", lmi->longestMatchId );
  184. lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
  185. }
  186. /* Make actions that execute the user action and restart at tokend. These
  187. * actions execute some time after matching the last char. */
  188. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
  189. /* For each part create actions for setting the match type. We need
  190. * to do this so that the actions will go into the actionIndex. */
  191. InlineList *inlineList = new InlineList;
  192. inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
  193. InlineItem::LmOnLagBehind ) );
  194. char *actName = new char[50];
  195. sprintf( actName, "lag%i", lmi->longestMatchId );
  196. lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
  197. }
  198. InputLoc loc;
  199. loc.line = 1;
  200. loc.col = 1;
  201. loc.fileName = "NONE";
  202. /* Create the error action. */
  203. InlineList *il6 = new InlineList;
  204. il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
  205. lmActSelect = newAction( pd, loc, "switch", il6 );
  206. }
  207. void LongestMatch::findName( ParseData *pd )
  208. {
  209. NameInst *nameInst = pd->curNameInst;
  210. while ( nameInst->name == 0 ) {
  211. nameInst = nameInst->parent;
  212. /* Since every machine must must have a name, we should always find a
  213. * name for the longest match. */
  214. assert( nameInst != 0 );
  215. }
  216. name = nameInst->name;
  217. }
  218. void LongestMatch::makeNameTree( ParseData *pd )
  219. {
  220. /* Create an anonymous scope for the longest match. Will be used for
  221. * restarting machine after matching a token. */
  222. NameInst *prevNameInst = pd->curNameInst;
  223. pd->curNameInst = pd->addNameInst( loc, 0, false );
  224. /* Recurse into all parts of the longest match operator. */
  225. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
  226. lmi->join->makeNameTree( pd );
  227. /* Traverse the name tree upwards to find a name for this lm. */
  228. findName( pd );
  229. /* Also make the longest match's actions at this point. */
  230. makeActions( pd );
  231. /* The name scope ends, pop the name instantiation. */
  232. pd->curNameInst = prevNameInst;
  233. }
  234. void LongestMatch::resolveNameRefs( ParseData *pd )
  235. {
  236. /* The longest match gets its own name scope. */
  237. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  238. /* Take an action reference for each longest match item and recurse. */
  239. for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
  240. /* Record the reference if the item has an action. */
  241. if ( lmi->action != 0 )
  242. lmi->action->actionRefs.append( pd->localNameScope );
  243. /* Recurse down the join. */
  244. lmi->join->resolveNameRefs( pd );
  245. }
  246. /* The name scope ends, pop the name instantiation. */
  247. pd->popNameScope( nameFrame );
  248. }
  249. void LongestMatch::restart( FsmAp *graph, TransAp *trans )
  250. {
  251. StateAp *fromState = trans->fromState;
  252. graph->detachTrans( fromState, trans->toState, trans );
  253. graph->attachTrans( fromState, graph->startState, trans );
  254. }
  255. void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
  256. {
  257. graph->markReachableFromHereStopFinal( graph->startState );
  258. for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
  259. if ( ms->stateBits & STB_ISMARKED ) {
  260. ms->lmItemSet.insert( 0 );
  261. ms->stateBits &= ~ STB_ISMARKED;
  262. }
  263. }
  264. /* Transfer the first item of non-empty lmAction tables to the item sets
  265. * of the states that follow. Exclude states that have no transitions out.
  266. * This must happen on a separate pass so that on each iteration of the
  267. * next pass we have the item set entries from all lmAction tables. */
  268. for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
  269. for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
  270. if ( trans->lmActionTable.length() > 0 ) {
  271. LmActionTableEl *lmAct = trans->lmActionTable.data;
  272. StateAp *toState = trans->toState;
  273. assert( toState );
  274. /* Can only optimize this if there are no transitions out.
  275. * Note there can be out transitions going nowhere with
  276. * actions and they too must inhibit this optimization. */
  277. if ( toState->outList.length() > 0 ) {
  278. /* Fill the item sets. */
  279. graph->markReachableFromHereStopFinal( toState );
  280. for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
  281. if ( ms->stateBits & STB_ISMARKED ) {
  282. ms->lmItemSet.insert( lmAct->value );
  283. ms->stateBits &= ~ STB_ISMARKED;
  284. }
  285. }
  286. }
  287. }
  288. }
  289. }
  290. /* The lmItem sets are now filled, telling us which longest match rules
  291. * can succeed in which states. First determine if we need to make sure
  292. * act is defaulted to zero. We need to do this if there are any states
  293. * with lmItemSet.length() > 1 and NULL is included. That is, that the
  294. * switch may get called when in fact nothing has been matched. */
  295. int maxItemSetLength = 0;
  296. graph->markReachableFromHereStopFinal( graph->startState );
  297. for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
  298. if ( ms->stateBits & STB_ISMARKED ) {
  299. if ( ms->lmItemSet.length() > maxItemSetLength )
  300. maxItemSetLength = ms->lmItemSet.length();
  301. ms->stateBits &= ~ STB_ISMARKED;
  302. }
  303. }
  304. /* The actions executed on starting to match a token. */
  305. graph->isolateStartState();
  306. graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
  307. graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
  308. if ( maxItemSetLength > 1 ) {
  309. /* The longest match action switch may be called when tokens are
  310. * matched, in which case act must be initialized, there must be a
  311. * case to handle the error, and the generated machine will require an
  312. * error state. */
  313. lmSwitchHandlesError = true;
  314. pd->lmRequiresErrorState = true;
  315. graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
  316. }
  317. /* The place to store transitions to restart. It maybe possible for the
  318. * restarting to affect the searching through the graph that follows. For
  319. * now take the safe route and save the list of transitions to restart
  320. * until after all searching is done. */
  321. Vector<TransAp*> restartTrans;
  322. /* Set actions that do immediate token recognition, set the longest match part
  323. * id and set the token ending. */
  324. for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
  325. for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
  326. if ( trans->lmActionTable.length() > 0 ) {
  327. LmActionTableEl *lmAct = trans->lmActionTable.data;
  328. StateAp *toState = trans->toState;
  329. assert( toState );
  330. /* Can only optimize this if there are no transitions out.
  331. * Note there can be out transitions going nowhere with
  332. * actions and they too must inhibit this optimization. */
  333. if ( toState->outList.length() == 0 ) {
  334. /* Can execute the immediate action for the longest match
  335. * part. Redirect the action to the start state.
  336. *
  337. * NOTE: When we need to inhibit on_last due to leaving
  338. * actions the above test suffices. If the state has out
  339. * actions then it will fail because the out action will
  340. * have been transferred to an error transition, which
  341. * makes the outlist non-empty. */
  342. trans->actionTable.setAction( lmAct->key,
  343. lmAct->value->actOnLast );
  344. restartTrans.append( trans );
  345. }
  346. else {
  347. /* Look for non final states that have a non-empty item
  348. * set. If these are present then we need to record the
  349. * end of the token. Also Find the highest item set
  350. * length reachable from here (excluding at transtions to
  351. * final states). */
  352. bool nonFinalNonEmptyItemSet = false;
  353. maxItemSetLength = 0;
  354. graph->markReachableFromHereStopFinal( toState );
  355. for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
  356. if ( ms->stateBits & STB_ISMARKED ) {
  357. if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
  358. nonFinalNonEmptyItemSet = true;
  359. if ( ms->lmItemSet.length() > maxItemSetLength )
  360. maxItemSetLength = ms->lmItemSet.length();
  361. ms->stateBits &= ~ STB_ISMARKED;
  362. }
  363. }
  364. /* If there are reachable states that are not final and
  365. * have non empty item sets or that have an item set
  366. * length greater than one then we need to set tokend
  367. * because the error action that matches the token will
  368. * require it. */
  369. if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
  370. trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
  371. /* Some states may not know which longest match item to
  372. * execute, must set it. */
  373. if ( maxItemSetLength > 1 ) {
  374. /* There are transitions out, another match may come. */
  375. trans->actionTable.setAction( lmAct->key,
  376. lmAct->value->setActId );
  377. }
  378. }
  379. }
  380. }
  381. }
  382. /* Now that all graph searching is done it certainly safe set the
  383. * restarting. It may be safe above, however this must be verified. */
  384. for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
  385. restart( graph, *pt );
  386. int lmErrActionOrd = pd->curActionOrd++;
  387. /* Embed the error for recognizing a char. */
  388. for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
  389. if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
  390. if ( st->isFinState() ) {
  391. /* On error execute the onActNext action, which knows that
  392. * the last character of the token was one back and restart. */
  393. graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
  394. &st->lmItemSet[0]->actOnNext, 1 );
  395. st->eofActionTable.setAction( lmErrActionOrd,
  396. st->lmItemSet[0]->actOnNext );
  397. st->eofTarget = graph->startState;
  398. }
  399. else {
  400. graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
  401. &st->lmItemSet[0]->actLagBehind, 1 );
  402. st->eofActionTable.setAction( lmErrActionOrd,
  403. st->lmItemSet[0]->actLagBehind );
  404. st->eofTarget = graph->startState;
  405. }
  406. }
  407. else if ( st->lmItemSet.length() > 1 ) {
  408. /* Need to use the select. Take note of which items the select
  409. * is needed for so only the necessary actions are included. */
  410. for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
  411. if ( *plmi != 0 )
  412. (*plmi)->inLmSelect = true;
  413. }
  414. /* On error, execute the action select and go to the start state. */
  415. graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
  416. &lmActSelect, 1 );
  417. st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
  418. st->eofTarget = graph->startState;
  419. }
  420. }
  421. /* Finally, the start state should be made final. */
  422. graph->setFinState( graph->startState );
  423. }
  424. void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
  425. {
  426. for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
  427. if ( st->outActionTable.length() > 0 )
  428. graph->setErrorActions( st, st->outActionTable );
  429. }
  430. }
  431. FsmAp *LongestMatch::walk( ParseData *pd )
  432. {
  433. /* The longest match has it's own name scope. */
  434. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  435. /* Make each part of the longest match. */
  436. FsmAp **parts = new FsmAp*[longestMatchList->length()];
  437. LmPartList::Iter lmi = *longestMatchList;
  438. for ( int i = 0; lmi.lte(); lmi++, i++ ) {
  439. /* Create the machine and embed the setting of the longest match id. */
  440. parts[i] = lmi->join->walk( pd );
  441. parts[i]->longMatchAction( pd->curActionOrd++, lmi );
  442. }
  443. /* Before we union the patterns we need to deal with leaving actions. They
  444. * are transfered to error transitions out of the final states (like local
  445. * error actions) and to eof actions. In the scanner we need to forbid
  446. * on_last for any final state that has an leaving action. */
  447. for ( int i = 0; i < longestMatchList->length(); i++ )
  448. transferScannerLeavingActions( parts[i] );
  449. /* Union machines one and up with machine zero. The grammar dictates that
  450. * there will always be at least one part. */
  451. FsmAp *rtnVal = parts[0];
  452. for ( int i = 1; i < longestMatchList->length(); i++ ) {
  453. rtnVal->unionOp( parts[i] );
  454. afterOpMinimize( rtnVal );
  455. }
  456. runLongestMatch( pd, rtnVal );
  457. /* Pop the name scope. */
  458. pd->popNameScope( nameFrame );
  459. delete[] parts;
  460. return rtnVal;
  461. }
  462. FsmAp *MachineDef::walk( ParseData *pd )
  463. {
  464. FsmAp *rtnVal = 0;
  465. switch ( type ) {
  466. case JoinType:
  467. rtnVal = join->walk( pd );
  468. break;
  469. case LongestMatchType:
  470. rtnVal = longestMatch->walk( pd );
  471. break;
  472. case LengthDefType:
  473. condData->lastCondKey.increment();
  474. rtnVal = new FsmAp();
  475. rtnVal->concatFsm( condData->lastCondKey );
  476. break;
  477. }
  478. return rtnVal;
  479. }
  480. void MachineDef::makeNameTree( ParseData *pd )
  481. {
  482. switch ( type ) {
  483. case JoinType:
  484. join->makeNameTree( pd );
  485. break;
  486. case LongestMatchType:
  487. longestMatch->makeNameTree( pd );
  488. break;
  489. case LengthDefType:
  490. break;
  491. }
  492. }
  493. void MachineDef::resolveNameRefs( ParseData *pd )
  494. {
  495. switch ( type ) {
  496. case JoinType:
  497. join->resolveNameRefs( pd );
  498. break;
  499. case LongestMatchType:
  500. longestMatch->resolveNameRefs( pd );
  501. break;
  502. case LengthDefType:
  503. break;
  504. }
  505. }
  506. /* Construct with a location and the first expression. */
  507. Join::Join( const InputLoc &loc, Expression *expr )
  508. :
  509. loc(loc)
  510. {
  511. exprList.append( expr );
  512. }
  513. /* Construct with a location and the first expression. */
  514. Join::Join( Expression *expr )
  515. {
  516. exprList.append( expr );
  517. }
  518. /* Walk an expression node. */
  519. FsmAp *Join::walk( ParseData *pd )
  520. {
  521. if ( exprList.length() > 1 )
  522. return walkJoin( pd );
  523. else
  524. return exprList.head->walk( pd );
  525. }
  526. /* There is a list of expressions to join. */
  527. FsmAp *Join::walkJoin( ParseData *pd )
  528. {
  529. /* We enter into a new name scope. */
  530. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  531. /* Evaluate the machines. */
  532. FsmAp **fsms = new FsmAp*[exprList.length()];
  533. ExprList::Iter expr = exprList;
  534. for ( int e = 0; e < exprList.length(); e++, expr++ )
  535. fsms[e] = expr->walk( pd );
  536. /* Get the start and final names. Final is
  537. * guaranteed to exist, start is not. */
  538. NameInst *startName = pd->curNameInst->start;
  539. NameInst *finalName = pd->curNameInst->final;
  540. int startId = -1;
  541. if ( startName != 0 ) {
  542. /* Take note that there was an implicit link to the start machine. */
  543. pd->localNameScope->referencedNames.append( startName );
  544. startId = startName->id;
  545. }
  546. /* A final id of -1 indicates there is no epsilon that references the
  547. * final state, therefor do not create one or set an entry point to it. */
  548. int finalId = -1;
  549. if ( finalName->numRefs > 0 )
  550. finalId = finalName->id;
  551. /* Join machines 1 and up onto machine 0. */
  552. FsmAp *retFsm = fsms[0];
  553. retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
  554. /* We can now unset entry points that are not longer used. */
  555. pd->unsetObsoleteEntries( retFsm );
  556. /* Pop the name scope. */
  557. pd->popNameScope( nameFrame );
  558. delete[] fsms;
  559. return retFsm;
  560. }
  561. void Join::makeNameTree( ParseData *pd )
  562. {
  563. if ( exprList.length() > 1 ) {
  564. /* Create the new anonymous scope. */
  565. NameInst *prevNameInst = pd->curNameInst;
  566. pd->curNameInst = pd->addNameInst( loc, 0, false );
  567. /* Join scopes need an implicit "final" target. */
  568. pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
  569. pd->nextNameId++, false );
  570. /* Recurse into all expressions in the list. */
  571. for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
  572. expr->makeNameTree( pd );
  573. /* The name scope ends, pop the name instantiation. */
  574. pd->curNameInst = prevNameInst;
  575. }
  576. else {
  577. /* Recurse into the single expression. */
  578. exprList.head->makeNameTree( pd );
  579. }
  580. }
  581. void Join::resolveNameRefs( ParseData *pd )
  582. {
  583. /* Branch on whether or not there is to be a join. */
  584. if ( exprList.length() > 1 ) {
  585. /* The variable definition enters a new scope. */
  586. NameFrame nameFrame = pd->enterNameScope( true, 1 );
  587. /* The join scope must contain a start label. */
  588. NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
  589. if ( resolved.length() > 0 ) {
  590. /* Take the first. */
  591. pd->curNameInst->start = resolved[0];
  592. if ( resolved.length() > 1 ) {
  593. /* Complain about the multiple references. */
  594. error(loc) << "join operation has multiple start labels" << endl;
  595. errorStateLabels( resolved );
  596. }
  597. }
  598. /* Make sure there is a start label. */
  599. if ( pd->curNameInst->start != 0 ) {
  600. /* There is an implicit reference to start name. */
  601. pd->curNameInst->start->numRefs += 1;
  602. }
  603. else {
  604. /* No start label. */
  605. error(loc) << "join operation has no start label" << endl;
  606. }
  607. /* Recurse into all expressions in the list. */
  608. for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
  609. expr->resolveNameRefs( pd );
  610. /* The name scope ends, pop the name instantiation. */
  611. pd->popNameScope( nameFrame );
  612. }
  613. else {
  614. /* Recurse into the single expression. */
  615. exprList.head->resolveNameRefs( pd );
  616. }
  617. }
  618. /* Clean up after an expression node. */
  619. Expression::~Expression()
  620. {
  621. switch ( type ) {
  622. case OrType: case IntersectType: case SubtractType:
  623. case StrongSubtractType:
  624. delete expression;
  625. delete term;
  626. break;
  627. case TermType:
  628. delete term;
  629. break;
  630. case BuiltinType:
  631. break;
  632. }
  633. }
  634. /* Evaluate a single expression node. */
  635. FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
  636. {
  637. FsmAp *rtnVal = 0;
  638. switch ( type ) {
  639. case OrType: {
  640. /* Evaluate the expression. */
  641. rtnVal = expression->walk( pd, false );
  642. /* Evaluate the term. */
  643. FsmAp *rhs = term->walk( pd );
  644. /* Perform union. */
  645. rtnVal->unionOp( rhs );
  646. afterOpMinimize( rtnVal, lastInSeq );
  647. break;
  648. }
  649. case IntersectType: {
  650. /* Evaluate the expression. */
  651. rtnVal = expression->walk( pd );
  652. /* Evaluate the term. */
  653. FsmAp *rhs = term->walk( pd );
  654. /* Perform intersection. */
  655. rtnVal->intersectOp( rhs );
  656. afterOpMinimize( rtnVal, lastInSeq );
  657. break;
  658. }
  659. case SubtractType: {
  660. /* Evaluate the expression. */
  661. rtnVal = expression->walk( pd );
  662. /* Evaluate the term. */
  663. FsmAp *rhs = term->walk( pd );
  664. /* Perform subtraction. */
  665. rtnVal->subtractOp( rhs );
  666. afterOpMinimize( rtnVal, lastInSeq );
  667. break;
  668. }
  669. case StrongSubtractType: {
  670. /* Evaluate the expression. */
  671. rtnVal = expression->walk( pd );
  672. /* Evaluate the term and pad it with any* machines. */
  673. FsmAp *rhs = dotStarFsm( pd );
  674. FsmAp *termFsm = term->walk( pd );
  675. FsmAp *trailAnyStar = dotStarFsm( pd );
  676. rhs->concatOp( termFsm );
  677. rhs->concatOp( trailAnyStar );
  678. /* Perform subtraction. */
  679. rtnVal->subtractOp( rhs );
  680. afterOpMinimize( rtnVal, lastInSeq );
  681. break;
  682. }
  683. case TermType: {
  684. /* Return result of the term. */
  685. rtnVal = term->walk( pd );
  686. break;
  687. }
  688. case BuiltinType: {
  689. /* Duplicate the builtin. */
  690. rtnVal = makeBuiltin( builtin, pd );
  691. break;
  692. }
  693. }
  694. return rtnVal;
  695. }
  696. void Expression::makeNameTree( ParseData *pd )
  697. {
  698. switch ( type ) {
  699. case OrType:
  700. case IntersectType:
  701. case SubtractType:
  702. case StrongSubtractType:
  703. expression->makeNameTree( pd );
  704. term->makeNameTree( pd );
  705. break;
  706. case TermType:
  707. term->makeNameTree( pd );
  708. break;
  709. case BuiltinType:
  710. break;
  711. }
  712. }
  713. void Expression::resolveNameRefs( ParseData *pd )
  714. {
  715. switch ( type ) {
  716. case OrType:
  717. case IntersectType:
  718. case SubtractType:
  719. case StrongSubtractType:
  720. expression->resolveNameRefs( pd );
  721. term->resolveNameRefs( pd );
  722. break;
  723. case TermType:
  724. term->resolveNameRefs( pd );
  725. break;
  726. case BuiltinType:
  727. break;
  728. }
  729. }
  730. /* Clean up after a term node. */
  731. Term::~Term()
  732. {
  733. switch ( type ) {
  734. case ConcatType:
  735. case RightStartType:
  736. case RightFinishType:
  737. case LeftType:
  738. delete term;
  739. delete factorWithAug;
  740. break;
  741. case FactorWithAugType:
  742. delete factorWithAug;
  743. break;
  744. }
  745. }
  746. /* Evaluate a term node. */
  747. FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
  748. {
  749. FsmAp *rtnVal = 0;
  750. switch ( type ) {
  751. case ConcatType: {
  752. /* Evaluate the Term. */
  753. rtnVal = term->walk( pd, false );
  754. /* Evaluate the FactorWithRep. */
  755. FsmAp *rhs = factorWithAug->walk( pd );
  756. /* Perform concatenation. */
  757. rtnVal->concatOp( rhs );
  758. afterOpMinimize( rtnVal, lastInSeq );
  759. break;
  760. }
  761. case RightStartType: {
  762. /* Evaluate the Term. */
  763. rtnVal = term->walk( pd );
  764. /* Evaluate the FactorWithRep. */
  765. FsmAp *rhs = factorWithAug->walk( pd );
  766. /* Set up the priority descriptors. The left machine gets the
  767. * lower priority where as the right get the higher start priority. */
  768. priorDescs[0].key = pd->nextPriorKey++;
  769. priorDescs[0].priority = 0;
  770. rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  771. /* The start transitions of the right machine gets the higher
  772. * priority. Use the same unique key. */
  773. priorDescs[1].key = priorDescs[0].key;
  774. priorDescs[1].priority = 1;
  775. rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  776. /* Perform concatenation. */
  777. rtnVal->concatOp( rhs );
  778. afterOpMinimize( rtnVal, lastInSeq );
  779. break;
  780. }
  781. case RightFinishType: {
  782. /* Evaluate the Term. */
  783. rtnVal = term->walk( pd );
  784. /* Evaluate the FactorWithRep. */
  785. FsmAp *rhs = factorWithAug->walk( pd );
  786. /* Set up the priority descriptors. The left machine gets the
  787. * lower priority where as the finishing transitions to the right
  788. * get the higher priority. */
  789. priorDescs[0].key = pd->nextPriorKey++;
  790. priorDescs[0].priority = 0;
  791. rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  792. /* The finishing transitions of the right machine get the higher
  793. * priority. Use the same unique key. */
  794. priorDescs[1].key = priorDescs[0].key;
  795. priorDescs[1].priority = 1;
  796. rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  797. /* If the right machine's start state is final we need to guard
  798. * against the left machine persisting by moving through the empty
  799. * string. */
  800. if ( rhs->startState->isFinState() ) {
  801. rhs->startState->outPriorTable.setPrior(
  802. pd->curPriorOrd++, &priorDescs[1] );
  803. }
  804. /* Perform concatenation. */
  805. rtnVal->concatOp( rhs );
  806. afterOpMinimize( rtnVal, lastInSeq );
  807. break;
  808. }
  809. case LeftType: {
  810. /* Evaluate the Term. */
  811. rtnVal = term->walk( pd );
  812. /* Evaluate the FactorWithRep. */
  813. FsmAp *rhs = factorWithAug->walk( pd );
  814. /* Set up the priority descriptors. The left machine gets the
  815. * higher priority. */
  816. priorDescs[0].key = pd->nextPriorKey++;
  817. priorDescs[0].priority = 1;
  818. rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  819. /* The right machine gets the lower priority. We cannot use
  820. * allTransPrior here in case the start state of the right machine
  821. * is final. It would allow the right machine thread to run along
  822. * with the left if just passing through the start state. Using
  823. * startFsmPrior prevents this. */
  824. priorDescs[1].key = priorDescs[0].key;
  825. priorDescs[1].priority = 0;
  826. rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  827. /* Perform concatenation. */
  828. rtnVal->concatOp( rhs );
  829. afterOpMinimize( rtnVal, lastInSeq );
  830. break;
  831. }
  832. case FactorWithAugType: {
  833. rtnVal = factorWithAug->walk( pd );
  834. break;
  835. }
  836. }
  837. return rtnVal;
  838. }
  839. void Term::makeNameTree( ParseData *pd )
  840. {
  841. switch ( type ) {
  842. case ConcatType:
  843. case RightStartType:
  844. case RightFinishType:
  845. case LeftType:
  846. term->makeNameTree( pd );
  847. factorWithAug->makeNameTree( pd );
  848. break;
  849. case FactorWithAugType:
  850. factorWithAug->makeNameTree( pd );
  851. break;
  852. }
  853. }
  854. void Term::resolveNameRefs( ParseData *pd )
  855. {
  856. switch ( type ) {
  857. case ConcatType:
  858. case RightStartType:
  859. case RightFinishType:
  860. case LeftType:
  861. term->resolveNameRefs( pd );
  862. factorWithAug->resolveNameRefs( pd );
  863. break;
  864. case FactorWithAugType:
  865. factorWithAug->resolveNameRefs( pd );
  866. break;
  867. }
  868. }
  869. /* Clean up after a factor with augmentation node. */
  870. FactorWithAug::~FactorWithAug()
  871. {
  872. delete factorWithRep;
  873. /* Walk the vector of parser actions, deleting function names. */
  874. /* Clean up priority descriptors. */
  875. if ( priorDescs != 0 )
  876. delete[] priorDescs;
  877. }
  878. void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
  879. {
  880. /* Assign actions. */
  881. for ( int i = 0; i < actions.length(); i++ ) {
  882. switch ( actions[i].type ) {
  883. /* Transition actions. */
  884. case at_start:
  885. graph->startFsmAction( actionOrd[i], actions[i].action );
  886. afterOpMinimize( graph );
  887. break;
  888. case at_all:
  889. graph->allTransAction( actionOrd[i], actions[i].action );
  890. break;
  891. case at_finish:
  892. graph->finishFsmAction( actionOrd[i], actions[i].action );
  893. break;
  894. case at_leave:
  895. graph->leaveFsmAction( actionOrd[i], actions[i].action );
  896. break;
  897. /* Global error actions. */
  898. case at_start_gbl_error:
  899. graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
  900. afterOpMinimize( graph );
  901. break;
  902. case at_all_gbl_error:
  903. graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
  904. break;
  905. case at_final_gbl_error:
  906. graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
  907. break;
  908. case at_not_start_gbl_error:
  909. graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
  910. break;
  911. case at_not_final_gbl_error:
  912. graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
  913. break;
  914. case at_middle_gbl_error:
  915. graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
  916. break;
  917. /* Local error actions. */
  918. case at_start_local_error:
  919. graph->startErrorAction( actionOrd[i], actions[i].action,
  920. actions[i].localErrKey );
  921. afterOpMinimize( graph );
  922. break;
  923. case at_all_local_error:
  924. graph->allErrorAction( actionOrd[i], actions[i].action,
  925. actions[i].localErrKey );
  926. break;
  927. case at_final_local_error:
  928. graph->finalErrorAction( actionOrd[i], actions[i].action,
  929. actions[i].localErrKey );
  930. break;
  931. case at_not_start_local_error:
  932. graph->notStartErrorAction( actionOrd[i], actions[i].action,
  933. actions[i].localErrKey );
  934. break;
  935. case at_not_final_local_error:
  936. graph->notFinalErrorAction( actionOrd[i], actions[i].action,
  937. actions[i].localErrKey );
  938. break;
  939. case at_middle_local_error:
  940. graph->middleErrorAction( actionOrd[i], actions[i].action,
  941. actions[i].localErrKey );
  942. break;
  943. /* EOF actions. */
  944. case at_start_eof:
  945. graph->startEOFAction( actionOrd[i], actions[i].action );
  946. afterOpMinimize( graph );
  947. break;
  948. case at_all_eof:
  949. graph->allEOFAction( actionOrd[i], actions[i].action );
  950. break;
  951. case at_final_eof:
  952. graph->finalEOFAction( actionOrd[i], actions[i].action );
  953. break;
  954. case at_not_start_eof:
  955. graph->notStartEOFAction( actionOrd[i], actions[i].action );
  956. break;
  957. case at_not_final_eof:
  958. graph->notFinalEOFAction( actionOrd[i], actions[i].action );
  959. break;
  960. case at_middle_eof:
  961. graph->middleEOFAction( actionOrd[i], actions[i].action );
  962. break;
  963. /* To State Actions. */
  964. case at_start_to_state:
  965. graph->startToStateAction( actionOrd[i], actions[i].action );
  966. afterOpMinimize( graph );
  967. break;
  968. case at_all_to_state:
  969. graph->allToStateAction( actionOrd[i], actions[i].action );
  970. break;
  971. case at_final_to_state:
  972. graph->finalToStateAction( actionOrd[i], actions[i].action );
  973. break;
  974. case at_not_start_to_state:
  975. graph->notStartToStateAction( actionOrd[i], actions[i].action );
  976. break;
  977. case at_not_final_to_state:
  978. graph->notFinalToStateAction( actionOrd[i], actions[i].action );
  979. break;
  980. case at_middle_to_state:
  981. graph->middleToStateAction( actionOrd[i], actions[i].action );
  982. break;
  983. /* From State Actions. */
  984. case at_start_from_state:
  985. graph->startFromStateAction( actionOrd[i], actions[i].action );
  986. afterOpMinimize( graph );
  987. break;
  988. case at_all_from_state:
  989. graph->allFromStateAction( actionOrd[i], actions[i].action );
  990. break;
  991. case at_final_from_state:
  992. graph->finalFromStateAction( actionOrd[i], actions[i].action );
  993. break;
  994. case at_not_start_from_state:
  995. graph->notStartFromStateAction( actionOrd[i], actions[i].action );
  996. break;
  997. case at_not_final_from_state:
  998. graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
  999. break;
  1000. case at_middle_from_state:
  1001. graph->middleFromStateAction( actionOrd[i], actions[i].action );
  1002. break;
  1003. /* Remaining cases, prevented by the parser. */
  1004. default:
  1005. assert( false );
  1006. break;
  1007. }
  1008. }
  1009. }
  1010. void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
  1011. {
  1012. /* Assign priorities. */
  1013. for ( int i = 0; i < priorityAugs.length(); i++ ) {
  1014. switch ( priorityAugs[i].type ) {
  1015. case at_start:
  1016. graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
  1017. /* Start fsm priorities are a special case that may require
  1018. * minimization afterwards. */
  1019. afterOpMinimize( graph );
  1020. break;
  1021. case at_all:
  1022. graph->allTransPrior( priorOrd[i], &priorDescs[i] );
  1023. break;
  1024. case at_finish:
  1025. graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
  1026. break;
  1027. case at_leave:
  1028. graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
  1029. break;
  1030. default:
  1031. /* Parser Prevents this case. */
  1032. break;
  1033. }
  1034. }
  1035. }
  1036. void FactorWithAug::assignConditions( FsmAp *graph )
  1037. {
  1038. for ( int i = 0; i < conditions.length(); i++ ) {
  1039. switch ( conditions[i].type ) {
  1040. /* Transition actions. */
  1041. case at_start:
  1042. graph->startFsmCondition( conditions[i].action, conditions[i].sense );
  1043. afterOpMinimize( graph );
  1044. break;
  1045. case at_all:
  1046. graph->allTransCondition( conditions[i].action, conditions[i].sense );
  1047. break;
  1048. case at_leave:
  1049. graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
  1050. break;
  1051. default:
  1052. break;
  1053. }
  1054. }
  1055. }
  1056. /* Evaluate a factor with augmentation node. */
  1057. FsmAp *FactorWithAug::walk( ParseData *pd )
  1058. {
  1059. /* Enter into the scopes created for the labels. */
  1060. NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
  1061. /* Make the array of function orderings. */
  1062. int *actionOrd = 0;
  1063. if ( actions.length() > 0 )
  1064. actionOrd = new int[actions.length()];
  1065. /* First walk the list of actions, assigning order to all starting
  1066. * actions. */
  1067. for ( int i = 0; i < actions.length(); i++ ) {
  1068. if ( actions[i].type == at_start ||
  1069. actions[i].type == at_start_gbl_error ||
  1070. actions[i].type == at_start_local_error ||
  1071. actions[i].type == at_start_to_state ||
  1072. actions[i].type == at_start_from_state ||
  1073. actions[i].type == at_start_eof )
  1074. actionOrd[i] = pd->curActionOrd++;
  1075. }
  1076. /* Evaluate the factor with repetition. */
  1077. FsmAp *rtnVal = factorWithRep->walk( pd );
  1078. /* Compute the remaining action orderings. */
  1079. for ( int i = 0; i < actions.length(); i++ ) {
  1080. if ( actions[i].type != at_start &&
  1081. actions[i].type != at_start_gbl_error &&
  1082. actions[i].type != at_start_local_error &&
  1083. actions[i].type != at_start_to_state &&
  1084. actions[i].type != at_start_from_state &&
  1085. actions[i].type != at_start_eof )
  1086. actionOrd[i] = pd->curActionOrd++;
  1087. }
  1088. /* Embed conditions. */
  1089. assignConditions( rtnVal );
  1090. /* Embed actions. */
  1091. assignActions( pd, rtnVal , actionOrd );
  1092. /* Make the array of priority orderings. Orderings are local to this walk
  1093. * of the factor with augmentation. */
  1094. int *priorOrd = 0;
  1095. if ( priorityAugs.length() > 0 )
  1096. priorOrd = new int[priorityAugs.length()];
  1097. /* Walk all priorities, assigning the priority ordering. */
  1098. for ( int i = 0; i < priorityAugs.length(); i++ )
  1099. priorOrd[i] = pd->curPriorOrd++;
  1100. /* If the priority descriptors have not been made, make them now. Make
  1101. * priority descriptors for each priority asignment that will be passed to
  1102. * the fsm. Used to keep track of the key, value and used bit. */
  1103. if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
  1104. priorDescs = new PriorDesc[priorityAugs.length()];
  1105. for ( int i = 0; i < priorityAugs.length(); i++ ) {
  1106. /* Init the prior descriptor for the priority setting. */
  1107. priorDescs[i].key = priorityAugs[i].priorKey;
  1108. priorDescs[i].priority = priorityAugs[i].priorValue;
  1109. }
  1110. }
  1111. /* Assign priorities into the machine. */
  1112. assignPriorities( rtnVal, priorOrd );
  1113. /* Assign epsilon transitions. */
  1114. for ( int e = 0; e < epsilonLinks.length(); e++ ) {
  1115. /* Get the name, which may not exist. If it doesn't then silently
  1116. * ignore it because an error has already been reported. */
  1117. NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
  1118. if ( epTarg != 0 ) {
  1119. /* Make the epsilon transitions. */
  1120. rtnVal->epsilonTrans( epTarg->id );
  1121. /* Note that we have made a link to the name. */
  1122. pd->localNameScope->referencedNames.append( epTarg );
  1123. }
  1124. }
  1125. /* Set entry points for labels. */
  1126. if ( labels.length() > 0 ) {
  1127. /* Pop the names. */
  1128. pd->resetNameScope( nameFrame );
  1129. /* Make labels that are referenced into entry points. */
  1130. for ( int i = 0; i < labels.length(); i++ ) {
  1131. pd->enterNameScope( false, 1 );
  1132. /* Will always be found. */
  1133. NameInst *name = pd->curNameInst;
  1134. /* If the name is referenced then set the entry point. */
  1135. if ( name->numRefs > 0 )
  1136. rtnVal->setEntry( name->id, rtnVal->startState );
  1137. }
  1138. pd->popNameScope( nameFrame );
  1139. }
  1140. if ( priorOrd != 0 )
  1141. delete[] priorOrd;
  1142. if ( actionOrd != 0 )
  1143. delete[] actionOrd;
  1144. return rtnVal;
  1145. }
  1146. void FactorWithAug::makeNameTree( ParseData *pd )
  1147. {
  1148. /* Add the labels to the tree of instantiated names. Each label
  1149. * makes a new scope. */
  1150. NameInst *prevNameInst = pd->curNameInst;
  1151. for ( int i = 0; i < labels.length(); i++ )
  1152. pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
  1153. /* Recurse, then pop the names. */
  1154. factorWithRep->makeNameTree( pd );
  1155. pd->curNameInst = prevNameInst;
  1156. }
  1157. void FactorWithAug::resolveNameRefs( ParseData *pd )
  1158. {
  1159. /* Enter into the name scope created by any labels. */
  1160. NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
  1161. /* Note action references. */
  1162. for ( int i = 0; i < actions.length(); i++ )
  1163. actions[i].action->actionRefs.append( pd->localNameScope );
  1164. /* Recurse first. IMPORTANT: we must do the exact same traversal as when
  1165. * the tree is constructed. */
  1166. factorWithRep->resolveNameRefs( pd );
  1167. /* Resolve epsilon transitions. */
  1168. for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
  1169. /* Get the link. */
  1170. EpsilonLink &link = epsilonLinks[ep];
  1171. NameInst *resolvedName = 0;
  1172. if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
  1173. /* Epsilon drawn to an implicit final state. An implicit final is
  1174. * only available in join operations. */
  1175. resolvedName = pd->localNameScope->final;
  1176. }
  1177. else {
  1178. /* Do an search for the name. */
  1179. NameSet resolved;
  1180. pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
  1181. if ( resolved.length() > 0 ) {
  1182. /* Take the first one. */
  1183. resolvedName = resolved[0];
  1184. if ( resolved.length() > 1 ) {
  1185. /* Complain about the multiple references. */
  1186. error(link.loc) << "state reference " << link.target <<
  1187. " resolves to multiple entry points" << endl;
  1188. errorStateLabels( resolved );
  1189. }
  1190. }
  1191. }
  1192. /* This is tricky, we stuff resolved epsilon transitions into one long
  1193. * vector in the parse data structure. Since the name resolution and
  1194. * graph generation both do identical walks of the parse tree we
  1195. * should always find the link resolutions in the right place. */
  1196. pd->epsilonResolvedLinks.append( resolvedName );
  1197. if ( resolvedName != 0 ) {
  1198. /* Found the name, bump of the reference count on it. */
  1199. resolvedName->numRefs += 1;
  1200. }
  1201. else {
  1202. /* Complain, no recovery action, the epsilon op will ignore any
  1203. * epsilon transitions whose names did not resolve. */
  1204. error(link.loc) << "could not resolve label " << link.target << endl;
  1205. }
  1206. }
  1207. if ( labels.length() > 0 )
  1208. pd->popNameScope( nameFrame );
  1209. }
  1210. /* Clean up after a factor with repetition node. */
  1211. FactorWithRep::~FactorWithRep()
  1212. {
  1213. switch ( type ) {
  1214. case StarType: case StarStarType: case OptionalType: case PlusType:
  1215. case ExactType: case MaxType: case MinType: case RangeType:
  1216. delete factorWithRep;
  1217. break;
  1218. case FactorWithNegType:
  1219. delete factorWithNeg;
  1220. break;
  1221. }
  1222. }
  1223. /* Evaluate a factor with repetition node. */
  1224. FsmAp *FactorWithRep::walk( ParseData *pd )
  1225. {
  1226. FsmAp *retFsm = 0;
  1227. switch ( type ) {
  1228. case StarType: {
  1229. /* Evaluate the FactorWithRep. */
  1230. retFsm = factorWithRep->walk( pd );
  1231. if ( retFsm->startState->isFinState() ) {
  1232. warning(loc) << "applying kleene star to a machine that "
  1233. "accepts zero length word" << endl;
  1234. retFsm->unsetFinState( retFsm->startState );
  1235. }
  1236. /* Shift over the start action orders then do the kleene star. */
  1237. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1238. retFsm->starOp( );
  1239. afterOpMinimize( retFsm );
  1240. break;
  1241. }
  1242. case StarStarType: {
  1243. /* Evaluate the FactorWithRep. */
  1244. retFsm = factorWithRep->walk( pd );
  1245. if ( retFsm->startState->isFinState() ) {
  1246. warning(loc) << "applying kleene star to a machine that "
  1247. "accepts zero length word" << endl;
  1248. }
  1249. /* Set up the prior descs. All gets priority one, whereas leaving gets
  1250. * priority zero. Make a unique key so that these priorities don't
  1251. * interfere with any priorities set by the user. */
  1252. priorDescs[0].key = pd->nextPriorKey++;
  1253. priorDescs[0].priority = 1;
  1254. retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  1255. /* Leaveing gets priority 0. Use same unique key. */
  1256. priorDescs[1].key = priorDescs[0].key;
  1257. priorDescs[1].priority = 0;
  1258. retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  1259. /* Shift over the start action orders then do the kleene star. */
  1260. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1261. retFsm->starOp( );
  1262. afterOpMinimize( retFsm );
  1263. break;
  1264. }
  1265. case OptionalType: {
  1266. /* Make the null fsm. */
  1267. FsmAp *nu = new FsmAp();
  1268. nu->lambdaFsm( );
  1269. /* Evaluate the FactorWithRep. */
  1270. retFsm = factorWithRep->walk( pd );
  1271. /* Perform the question operator. */
  1272. retFsm->unionOp( nu );
  1273. afterOpMinimize( retFsm );
  1274. break;
  1275. }
  1276. case PlusType: {
  1277. /* Evaluate the FactorWithRep. */
  1278. retFsm = factorWithRep->walk( pd );
  1279. if ( retFsm->startState->isFinState() ) {
  1280. warning(loc) << "applying plus operator to a machine that "
  1281. "accepts zero length word" << endl;
  1282. }
  1283. /* Need a duplicated for the star end. */
  1284. FsmAp *dup = new FsmAp( *retFsm );
  1285. /* The start func orders need to be shifted before doing the star. */
  1286. pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
  1287. /* Star the duplicate. */
  1288. dup->starOp( );
  1289. afterOpMinimize( dup );
  1290. retFsm->concatOp( dup );
  1291. afterOpMinimize( retFsm );
  1292. break;
  1293. }
  1294. case ExactType: {
  1295. /* Get an int from the repetition amount. */
  1296. if ( lowerRep == 0 ) {
  1297. /* No copies. Don't need to evaluate the factorWithRep.
  1298. * This Defeats the purpose so give a warning. */
  1299. warning(loc) << "exactly zero repetitions results "
  1300. "in the null machine" << endl;
  1301. retFsm = new FsmAp();
  1302. retFsm->lambdaFsm();
  1303. }
  1304. else {
  1305. /* Evaluate the first FactorWithRep. */
  1306. retFsm = factorWithRep->walk( pd );
  1307. if ( retFsm->startState->isFinState() ) {
  1308. warning(loc) << "applying repetition to a machine that "
  1309. "accepts zero length word" << endl;
  1310. }
  1311. /* The start func orders need to be shifted before doing the
  1312. * repetition. */
  1313. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1314. /* Do the repetition on the machine. Already guarded against n == 0 */
  1315. retFsm->repeatOp( lowerRep );
  1316. afterOpMinimize( retFsm );
  1317. }
  1318. break;
  1319. }
  1320. case MaxType: {
  1321. /* Get an int from the repetition amount. */
  1322. if ( upperRep == 0 ) {
  1323. /* No copies. Don't need to evaluate the factorWithRep.
  1324. * This Defeats the purpose so give a warning. */
  1325. warning(loc) << "max zero repetitions results "
  1326. "in the null machine" << endl;
  1327. retFsm = new FsmAp();
  1328. retFsm->lambdaFsm();
  1329. }
  1330. else {
  1331. /* Evaluate the first FactorWithRep. */
  1332. retFsm = factorWithRep->walk( pd );
  1333. if ( retFsm->startState->isFinState() ) {
  1334. warning(loc) << "applying max repetition to a machine that "
  1335. "accepts zero length word" << endl;
  1336. }
  1337. /* The start func orders need to be shifted before doing the
  1338. * repetition. */
  1339. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1340. /* Do the repetition on the machine. Already guarded against n == 0 */
  1341. retFsm->optionalRepeatOp( upperRep );
  1342. afterOpMinimize( retFsm );
  1343. }
  1344. break;
  1345. }
  1346. case MinType: {
  1347. /* Evaluate the repeated machine. */
  1348. retFsm = factorWithRep->walk( pd );
  1349. if ( retFsm->startState->isFinState() ) {
  1350. warning(loc) << "applying min repetition to a machine that "
  1351. "accepts zero length word" << endl;
  1352. }
  1353. /* The start func orders need to be shifted before doing the repetition
  1354. * and the kleene star. */
  1355. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1356. if ( lowerRep == 0 ) {
  1357. /* Acts just like a star op on the machine to return. */
  1358. retFsm->starOp( );
  1359. afterOpMinimize( retFsm );
  1360. }
  1361. else {
  1362. /* Take a duplicate for the plus. */
  1363. FsmAp *dup = new FsmAp( *retFsm );
  1364. /* Do repetition on the first half. */
  1365. retFsm->repeatOp( lowerRep );
  1366. afterOpMinimize( retFsm );
  1367. /* Star the duplicate. */
  1368. dup->starOp( );
  1369. afterOpMinimize( dup );
  1370. /* Tak on the kleene star. */
  1371. retFsm->concatOp( dup );
  1372. afterOpMinimize( retFsm );
  1373. }
  1374. break;
  1375. }
  1376. case RangeType: {
  1377. /* Check for bogus range. */
  1378. if ( upperRep - lowerRep < 0 ) {
  1379. error(loc) << "invalid range repetition" << endl;
  1380. /* Return null machine as recovery. */
  1381. retFsm = new FsmAp();
  1382. retFsm->lambdaFsm();
  1383. }
  1384. else if ( lowerRep == 0 && upperRep == 0 ) {
  1385. /* No copies. Don't need to evaluate the factorWithRep. This
  1386. * defeats the purpose so give a warning. */
  1387. warning(loc) << "zero to zero repetitions results "
  1388. "in the null machine" << endl;
  1389. retFsm = new FsmAp();
  1390. retFsm->lambdaFsm();
  1391. }
  1392. else {
  1393. /* Now need to evaluate the repeated machine. */
  1394. retFsm = factorWithRep->walk( pd );
  1395. if ( retFsm->startState->isFinState() ) {
  1396. warning(loc) << "applying range repetition to a machine that "
  1397. "accepts zero length word" << endl;
  1398. }
  1399. /* The start func orders need to be shifted before doing both kinds
  1400. * of repetition. */
  1401. pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
  1402. if ( lowerRep == 0 ) {
  1403. /* Just doing max repetition. Already guarded against n == 0. */
  1404. retFsm->optionalRepeatOp( upperRep );
  1405. afterOpMinimize( retFsm );
  1406. }
  1407. else if ( lowerRep == upperRep ) {
  1408. /* Just doing exact repetition. Already guarded against n == 0. */
  1409. retFsm->repeatOp( lowerRep );
  1410. afterOpMinimize( retFsm );
  1411. }
  1412. else {
  1413. /* This is the case that 0 < lowerRep < upperRep. Take a
  1414. * duplicate for the optional repeat. */
  1415. FsmAp *dup = new FsmAp( *retFsm );
  1416. /* Do repetition on the first half. */
  1417. retFsm->repeatOp( lowerRep );
  1418. afterOpMinimize( retFsm );
  1419. /* Do optional repetition on the second half. */
  1420. dup->optionalRepeatOp( upperRep - lowerRep );
  1421. afterOpMinimize( dup );
  1422. /* Tak on the duplicate machine. */
  1423. retFsm->concatOp( dup );
  1424. afterOpMinimize( retFsm );
  1425. }
  1426. }
  1427. break;
  1428. }
  1429. case FactorWithNegType: {
  1430. /* Evaluate the Factor. Pass it up. */
  1431. retFsm = factorWithNeg->walk( pd );
  1432. break;
  1433. }}
  1434. return retFsm;
  1435. }
  1436. void FactorWithRep::makeNameTree( ParseData *pd )
  1437. {
  1438. switch ( type ) {
  1439. case StarType:
  1440. case StarStarType:
  1441. case OptionalType:
  1442. case PlusType:
  1443. case ExactType:
  1444. case MaxType:
  1445. case MinType:
  1446. case RangeType:
  1447. factorWithRep->makeNameTree( pd );
  1448. break;
  1449. case FactorWithNegType:
  1450. factorWithNeg->makeNameTree( pd );
  1451. break;
  1452. }
  1453. }
  1454. void FactorWithRep::resolveNameRefs( ParseData *pd )
  1455. {
  1456. switch ( type ) {
  1457. case StarType:
  1458. case StarStarType:
  1459. case OptionalType:
  1460. case PlusType:
  1461. case ExactType:
  1462. case MaxType:
  1463. case MinType:
  1464. case RangeType:
  1465. factorWithRep->resolveNameRefs( pd );
  1466. break;
  1467. case FactorWithNegType:
  1468. factorWithNeg->resolveNameRefs( pd );
  1469. break;
  1470. }
  1471. }
  1472. /* Clean up after a factor with negation node. */
  1473. FactorWithNeg::~FactorWithNeg()
  1474. {
  1475. switch ( type ) {
  1476. case NegateType:
  1477. case CharNegateType:
  1478. delete factorWithNeg;
  1479. break;
  1480. case FactorType:
  1481. delete factor;
  1482. break;
  1483. }
  1484. }
  1485. /* Evaluate a factor with negation node. */
  1486. FsmAp *FactorWithNeg::walk( ParseData *pd )
  1487. {
  1488. FsmAp *retFsm = 0;
  1489. switch ( type ) {
  1490. case NegateType: {
  1491. /* Evaluate the factorWithNeg. */
  1492. FsmAp *toNegate = factorWithNeg->walk( pd );
  1493. /* Negation is subtract from dot-star. */
  1494. retFsm = dotStarFsm( pd );
  1495. retFsm->subtractOp( toNegate );
  1496. afterOpMinimize( retFsm );
  1497. break;
  1498. }
  1499. case CharNegateType: {
  1500. /* Evaluate the factorWithNeg. */
  1501. FsmAp *toNegate = factorWithNeg->walk( pd );
  1502. /* CharNegation is subtract from dot. */
  1503. retFsm = dotFsm( pd );
  1504. retFsm->subtractOp( toNegate );
  1505. afterOpMinimize( retFsm );
  1506. break;
  1507. }
  1508. case FactorType: {
  1509. /* Evaluate the Factor. Pass it up. */
  1510. retFsm = factor->walk( pd );
  1511. break;
  1512. }}
  1513. return retFsm;
  1514. }
  1515. void FactorWithNeg::makeNameTree( ParseData *pd )
  1516. {
  1517. switch ( type ) {
  1518. case NegateType:
  1519. case CharNegateType:
  1520. factorWithNeg->makeNameTree( pd );
  1521. break;
  1522. case FactorType:
  1523. factor->makeNameTree( pd );
  1524. break;
  1525. }
  1526. }
  1527. void FactorWithNeg::resolveNameRefs( ParseData *pd )
  1528. {
  1529. switch ( type ) {
  1530. case NegateType:
  1531. case CharNegateType:
  1532. factorWithNeg->resolveNameRefs( pd );
  1533. break;
  1534. case FactorType:
  1535. factor->resolveNameRefs( pd );
  1536. break;
  1537. }
  1538. }
  1539. /* Clean up after a factor node. */
  1540. Factor::~Factor()
  1541. {
  1542. switch ( type ) {
  1543. case LiteralType:
  1544. delete literal;
  1545. break;
  1546. case RangeType:
  1547. delete range;
  1548. break;
  1549. case OrExprType:
  1550. delete reItem;
  1551. break;
  1552. case RegExprType:
  1553. delete regExpr;
  1554. break;
  1555. case ReferenceType:
  1556. break;
  1557. case ParenType:
  1558. delete join;
  1559. break;
  1560. case LongestMatchType:
  1561. delete longestMatch;
  1562. break;
  1563. }
  1564. }
  1565. /* Evaluate a factor node. */
  1566. FsmAp *Factor::walk( ParseData *pd )
  1567. {
  1568. FsmAp *rtnVal = 0;
  1569. switch ( type ) {
  1570. case LiteralType:
  1571. rtnVal = literal->walk( pd );
  1572. break;
  1573. case RangeType:
  1574. rtnVal = range->walk( pd );
  1575. break;
  1576. case OrExprType:
  1577. rtnVal = reItem->walk( pd, 0 );
  1578. break;
  1579. case RegExprType:
  1580. rtnVal = regExpr->walk( pd, 0 );
  1581. break;
  1582. case ReferenceType:
  1583. rtnVal = varDef->walk( pd );
  1584. break;
  1585. case ParenType:
  1586. rtnVal = join->walk( pd );
  1587. break;
  1588. case LongestMatchType:
  1589. rtnVal = longestMatch->walk( pd );
  1590. break;
  1591. }
  1592. return rtnVal;
  1593. }
  1594. void Factor::makeNameTree( ParseData *pd )
  1595. {
  1596. switch ( type ) {
  1597. case LiteralType:
  1598. case RangeType:
  1599. case OrExprType:
  1600. case RegExprType:
  1601. break;
  1602. case ReferenceType:
  1603. varDef->makeNameTree( loc, pd );
  1604. break;
  1605. case ParenType:
  1606. join->makeNameTree( pd );
  1607. break;
  1608. case LongestMatchType:
  1609. longestMatch->makeNameTree( pd );
  1610. break;
  1611. }
  1612. }
  1613. void Factor::resolveNameRefs( ParseData *pd )
  1614. {
  1615. switch ( type ) {
  1616. case LiteralType:
  1617. case RangeType:
  1618. case OrExprType:
  1619. case RegExprType:
  1620. break;
  1621. case ReferenceType:
  1622. varDef->resolveNameRefs( pd );
  1623. break;
  1624. case ParenType:
  1625. join->resolveNameRefs( pd );
  1626. break;
  1627. case LongestMatchType:
  1628. longestMatch->resolveNameRefs( pd );
  1629. break;
  1630. }
  1631. }
  1632. /* Clean up a range object. Must delete the two literals. */
  1633. Range::~Range()
  1634. {
  1635. delete lowerLit;
  1636. delete upperLit;
  1637. }
  1638. /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
  1639. FsmAp *Range::walk( ParseData *pd )
  1640. {
  1641. /* Construct and verify the suitability of the lower end of the range. */
  1642. FsmAp *lowerFsm = lowerLit->walk( pd );
  1643. if ( !lowerFsm->checkSingleCharMachine() ) {
  1644. error(lowerLit->token.loc) <<
  1645. "bad range lower end, must be a single character" << endl;
  1646. }
  1647. /* Construct and verify the upper end. */
  1648. FsmAp *upperFsm = upperLit->walk( pd );
  1649. if ( !upperFsm->checkSingleCharMachine() ) {
  1650. error(upperLit->token.loc) <<
  1651. "bad range upper end, must be a single character" << endl;
  1652. }
  1653. /* Grab the keys from the machines, then delete them. */
  1654. Key lowKey = lowerFsm->startState->outList.head->lowKey;
  1655. Key highKey = upperFsm->startState->outList.head->lowKey;
  1656. delete lowerFsm;
  1657. delete upperFsm;
  1658. /* Validate the range. */
  1659. if ( lowKey > highKey ) {
  1660. /* Recover by setting upper to lower; */
  1661. error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
  1662. highKey = lowKey;
  1663. }
  1664. /* Return the range now that it is validated. */
  1665. FsmAp *retFsm = new FsmAp();
  1666. retFsm->rangeFsm( lowKey, highKey );
  1667. return retFsm;
  1668. }
  1669. /* Evaluate a literal object. */
  1670. FsmAp *Literal::walk( ParseData *pd )
  1671. {
  1672. /* FsmAp to return, is the alphabet signed. */
  1673. FsmAp *rtnVal = 0;
  1674. switch ( type ) {
  1675. case Number: {
  1676. /* Make the fsm key in int format. */
  1677. Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
  1678. /* Make the new machine. */
  1679. rtnVal = new FsmAp();
  1680. rtnVal->concatFsm( fsmKey );
  1681. break;
  1682. }
  1683. case LitString: {
  1684. /* Make the array of keys in int format. */
  1685. long length;
  1686. bool caseInsensitive;
  1687. char *data = prepareLitString( token.loc, token.data, token.length,
  1688. length, caseInsensitive );
  1689. Key *arr = new Key[length];
  1690. makeFsmKeyArray( arr, data, length, pd );
  1691. /* Make the new machine. */
  1692. rtnVal = new FsmAp();
  1693. if ( caseInsensitive )
  1694. rtnVal->concatFsmCI( arr, length );
  1695. else
  1696. rtnVal->concatFsm( arr, length );
  1697. delete[] data;
  1698. delete[] arr;
  1699. break;
  1700. }}
  1701. return rtnVal;
  1702. }
  1703. /* Clean up after a regular expression object. */
  1704. RegExpr::~RegExpr()
  1705. {
  1706. switch ( type ) {
  1707. case RecurseItem:
  1708. delete regExpr;
  1709. delete item;
  1710. break;
  1711. case Empty:
  1712. break;
  1713. }
  1714. }
  1715. /* Evaluate a regular expression object. */
  1716. FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
  1717. {
  1718. /* This is the root regex, pass down a pointer to this. */
  1719. if ( rootRegex == 0 )
  1720. rootRegex = this;
  1721. FsmAp *rtnVal = 0;
  1722. switch ( type ) {
  1723. case RecurseItem: {
  1724. /* Walk both items. */
  1725. rtnVal = regExpr->walk( pd, rootRegex );
  1726. FsmAp *fsm2 = item->walk( pd, rootRegex );
  1727. rtnVal->concatOp( fsm2 );
  1728. break;
  1729. }
  1730. case Empty: {
  1731. rtnVal = new FsmAp();
  1732. rtnVal->lambdaFsm();
  1733. break;
  1734. }
  1735. }
  1736. return rtnVal;
  1737. }
  1738. /* Clean up after an item in a regular expression. */
  1739. ReItem::~ReItem()
  1740. {
  1741. switch ( type ) {
  1742. case Data:
  1743. case Dot:
  1744. break;
  1745. case OrBlock:
  1746. case NegOrBlock:
  1747. delete orBlock;
  1748. break;
  1749. }
  1750. }
  1751. /* Evaluate a regular expression object. */
  1752. FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
  1753. {
  1754. /* The fsm to return, is the alphabet signed? */
  1755. FsmAp *rtnVal = 0;
  1756. switch ( type ) {
  1757. case Data: {
  1758. /* Move the data into an integer array and make a concat fsm. */
  1759. Key *arr = new Key[token.length];
  1760. makeFsmKeyArray( arr, token.data, token.length, pd );
  1761. /* Make the concat fsm. */
  1762. rtnVal = new FsmAp();
  1763. if ( rootRegex != 0 && rootRegex->caseInsensitive )
  1764. rtnVal->concatFsmCI( arr, token.length );
  1765. else
  1766. rtnVal->concatFsm( arr, token.length );
  1767. delete[] arr;
  1768. break;
  1769. }
  1770. case Dot: {
  1771. /* Make the dot fsm. */
  1772. rtnVal = dotFsm( pd );
  1773. break;
  1774. }
  1775. case OrBlock: {
  1776. /* Get the or block and minmize it. */
  1777. rtnVal = orBlock->walk( pd, rootRegex );
  1778. if ( rtnVal == 0 ) {
  1779. rtnVal = new FsmAp();
  1780. rtnVal->lambdaFsm();
  1781. }
  1782. rtnVal->minimizePartition2();
  1783. break;
  1784. }
  1785. case NegOrBlock: {
  1786. /* Get the or block and minimize it. */
  1787. FsmAp *fsm = orBlock->walk( pd, rootRegex );
  1788. fsm->minimizePartition2();
  1789. /* Make a dot fsm and subtract from it. */
  1790. rtnVal = dotFsm( pd );
  1791. rtnVal->subtractOp( fsm );
  1792. rtnVal->minimizePartition2();
  1793. break;
  1794. }
  1795. }
  1796. /* If the item is followed by a star, then apply the star op. */
  1797. if ( star ) {
  1798. if ( rtnVal->startState->isFinState() ) {
  1799. warning(loc) << "applying kleene star to a machine that "
  1800. "accepts zero length word" << endl;
  1801. }
  1802. rtnVal->starOp();
  1803. rtnVal->minimizePartition2();
  1804. }
  1805. return rtnVal;
  1806. }
  1807. /* Clean up after an or block of a regular expression. */
  1808. ReOrBlock::~ReOrBlock()
  1809. {
  1810. switch ( type ) {
  1811. case RecurseItem:
  1812. delete orBlock;
  1813. delete item;
  1814. break;
  1815. case Empty:
  1816. break;
  1817. }
  1818. }
  1819. /* Evaluate an or block of a regular expression. */
  1820. FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
  1821. {
  1822. FsmAp *rtnVal = 0;
  1823. switch ( type ) {
  1824. case RecurseItem: {
  1825. /* Evaluate the two fsm. */
  1826. FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
  1827. FsmAp *fsm2 = item->walk( pd, rootRegex );
  1828. if ( fsm1 == 0 )
  1829. rtnVal = fsm2;
  1830. else {
  1831. fsm1->unionOp( fsm2 );
  1832. rtnVal = fsm1;
  1833. }
  1834. break;
  1835. }
  1836. case Empty: {
  1837. rtnVal = 0;
  1838. break;
  1839. }
  1840. }
  1841. return rtnVal;;
  1842. }
  1843. /* Evaluate an or block item of a regular expression. */
  1844. FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
  1845. {
  1846. /* The return value, is the alphabet signed? */
  1847. FsmAp *rtnVal = 0;
  1848. switch ( type ) {
  1849. case Data: {
  1850. /* Make the or machine. */
  1851. rtnVal = new FsmAp();
  1852. /* Put the or data into an array of ints. Note that we find unique
  1853. * keys. Duplicates are silently ignored. The alternative would be to
  1854. * issue warning or an error but since we can't with [a0-9a] or 'a' |
  1855. * 'a' don't bother here. */
  1856. KeySet keySet;
  1857. makeFsmUniqueKeyArray( keySet, token.data, token.length,
  1858. rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
  1859. /* Run the or operator. */
  1860. rtnVal->orFsm( keySet.data, keySet.length() );
  1861. break;
  1862. }
  1863. case Range: {
  1864. /* Make the upper and lower keys. */
  1865. Key lowKey = makeFsmKeyChar( lower, pd );
  1866. Key highKey = makeFsmKeyChar( upper, pd );
  1867. /* Validate the range. */
  1868. if ( lowKey > highKey ) {
  1869. /* Recover by setting upper to lower; */
  1870. error(loc) << "lower end of range is greater then upper end" << endl;
  1871. highKey = lowKey;
  1872. }
  1873. /* Make the range machine. */
  1874. rtnVal = new FsmAp();
  1875. rtnVal->rangeFsm( lowKey, highKey );
  1876. if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
  1877. if ( lowKey <= 'Z' && 'A' <= highKey ) {
  1878. Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
  1879. Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
  1880. otherLow = 'a' + ( otherLow - 'A' );
  1881. otherHigh = 'a' + ( otherHigh - 'A' );
  1882. FsmAp *otherRange = new FsmAp();
  1883. otherRange->rangeFsm( otherLow, otherHigh );
  1884. rtnVal->unionOp( otherRange );
  1885. rtnVal->minimizePartition2();
  1886. }
  1887. else if ( lowKey <= 'z' && 'a' <= highKey ) {
  1888. Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
  1889. Key otherHigh = 'z' < highKey ? Key('z') : highKey;
  1890. otherLow = 'A' + ( otherLow - 'a' );
  1891. otherHigh = 'A' + ( otherHigh - 'a' );
  1892. FsmAp *otherRange = new FsmAp();
  1893. otherRange->rangeFsm( otherLow, otherHigh );
  1894. rtnVal->unionOp( otherRange );
  1895. rtnVal->minimizePartition2();
  1896. }
  1897. }
  1898. break;
  1899. }}
  1900. return rtnVal;
  1901. }