parsedata.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491
  1. /*
  2. * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
  3. */
  4. /* This file is part of Ragel.
  5. *
  6. * Ragel is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Ragel is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Ragel; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. */
  20. #include <iostream>
  21. #include <iomanip>
  22. #include <errno.h>
  23. #include <stdlib.h>
  24. #include <limits.h>
  25. #include "ragel.h"
  26. #include "rlparse.h"
  27. #include "parsedata.h"
  28. #include "parsetree.h"
  29. #include "mergesort.h"
  30. #include "xmlcodegen.h"
  31. #include "version.h"
  32. #include "inputdata.h"
  33. using namespace std;
  34. char mainMachine[] = "main";
  35. void Token::set( const char *str, int len )
  36. {
  37. length = len;
  38. data = new char[len+1];
  39. memcpy( data, str, len );
  40. data[len] = 0;
  41. }
  42. void Token::append( const Token &other )
  43. {
  44. int newLength = length + other.length;
  45. char *newString = new char[newLength+1];
  46. memcpy( newString, data, length );
  47. memcpy( newString + length, other.data, other.length );
  48. newString[newLength] = 0;
  49. data = newString;
  50. length = newLength;
  51. }
  52. /* Perform minimization after an operation according
  53. * to the command line args. */
  54. void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  55. {
  56. /* Switch on the prefered minimization algorithm. */
  57. if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
  58. /* First clean up the graph. FsmAp operations may leave these
  59. * lying around. There should be no dead end states. The subtract
  60. * intersection operators are the only places where they may be
  61. * created and those operators clean them up. */
  62. fsm->removeUnreachableStates();
  63. switch ( minimizeLevel ) {
  64. case MinimizeApprox:
  65. fsm->minimizeApproximate();
  66. break;
  67. case MinimizePartition1:
  68. fsm->minimizePartition1();
  69. break;
  70. case MinimizePartition2:
  71. fsm->minimizePartition2();
  72. break;
  73. case MinimizeStable:
  74. fsm->minimizeStable();
  75. break;
  76. }
  77. }
  78. }
  79. /* Count the transitions in the fsm by walking the state list. */
  80. int countTransitions( FsmAp *fsm )
  81. {
  82. int numTrans = 0;
  83. StateAp *state = fsm->stateList.head;
  84. while ( state != 0 ) {
  85. numTrans += state->outList.length();
  86. state = state->next;
  87. }
  88. return numTrans;
  89. }
  90. Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
  91. {
  92. /* Reset errno so we can check for overflow or underflow. In the event of
  93. * an error, sets the return val to the upper or lower bound being tested
  94. * against. */
  95. errno = 0;
  96. unsigned int size = keyOps->alphType->size;
  97. bool unusedBits = size < sizeof(unsigned long);
  98. unsigned long ul = strtoul( str, 0, 16 );
  99. if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
  100. error(loc) << "literal " << str << " overflows the alphabet type" << endl;
  101. ul = 1 << (size * 8);
  102. }
  103. if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
  104. ul |= ( (unsigned long)(-1L) >> (size*8) ) << (size*8);
  105. return Key( (long)ul );
  106. }
  107. #ifdef _MSC_VER
  108. # define strtoll _strtoi64
  109. #endif
  110. Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
  111. {
  112. if ( keyOps->alphType->isSigned ) {
  113. /* Convert the number to a decimal. First reset errno so we can check
  114. * for overflow or underflow. */
  115. errno = 0;
  116. long long minVal = keyOps->alphType->sMinVal;
  117. long long maxVal = keyOps->alphType->sMaxVal;
  118. long long ll = strtoll( str, 0, 10 );
  119. /* Check for underflow. */
  120. if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
  121. error(loc) << "literal " << str << " underflows the alphabet type" << endl;
  122. ll = minVal;
  123. }
  124. /* Check for overflow. */
  125. else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
  126. error(loc) << "literal " << str << " overflows the alphabet type" << endl;
  127. ll = maxVal;
  128. }
  129. return Key( (long)ll );
  130. }
  131. else {
  132. /* Convert the number to a decimal. First reset errno so we can check
  133. * for overflow or underflow. */
  134. errno = 0;
  135. unsigned long long minVal = keyOps->alphType->uMinVal;
  136. unsigned long long maxVal = keyOps->alphType->uMaxVal;
  137. unsigned long long ull = strtoull( str, 0, 10 );
  138. /* Check for underflow. */
  139. if ( ( errno == ERANGE && ull < 0 ) || ull < minVal) {
  140. error(loc) << "literal " << str << " underflows the alphabet type" << endl;
  141. ull = minVal;
  142. }
  143. /* Check for overflow. */
  144. else if ( ( errno == ERANGE && ull > 0 ) || ull > maxVal ) {
  145. error(loc) << "literal " << str << " overflows the alphabet type" << endl;
  146. ull = maxVal;
  147. }
  148. return Key( (unsigned long)ull );
  149. }
  150. }
  151. /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
  152. * number returned by the parser. Validates that the number doesn't overflow
  153. * the alphabet type. */
  154. Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
  155. {
  156. /* Switch on hex/decimal format. */
  157. if ( str[0] == '0' && str[1] == 'x' )
  158. return makeFsmKeyHex( str, loc, pd );
  159. else
  160. return makeFsmKeyDec( str, loc, pd );
  161. }
  162. /* Make an fsm int format (what the fsm graph uses) from a single character.
  163. * Performs proper conversion depending on signed/unsigned property of the
  164. * alphabet. */
  165. Key makeFsmKeyChar( char c, ParseData *pd )
  166. {
  167. if ( keyOps->isSigned ) {
  168. /* Copy from a char type. */
  169. return Key( c );
  170. }
  171. else {
  172. /* Copy from an unsigned byte type. */
  173. return Key( (unsigned char)c );
  174. }
  175. }
  176. /* Make an fsm key array in int format (what the fsm graph uses) from a string
  177. * of characters. Performs proper conversion depending on signed/unsigned
  178. * property of the alphabet. */
  179. void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
  180. {
  181. if ( keyOps->isSigned ) {
  182. /* Copy from a char star type. */
  183. char *src = data;
  184. for ( int i = 0; i < len; i++ )
  185. result[i] = Key(src[i]);
  186. }
  187. else {
  188. /* Copy from an unsigned byte ptr type. */
  189. unsigned char *src = (unsigned char*) data;
  190. for ( int i = 0; i < len; i++ )
  191. result[i] = Key(src[i]);
  192. }
  193. }
  194. /* Like makeFsmKeyArray except the result has only unique keys. They ordering
  195. * will be changed. */
  196. void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
  197. bool caseInsensitive, ParseData *pd )
  198. {
  199. /* Use a transitions list for getting unique keys. */
  200. if ( keyOps->isSigned ) {
  201. /* Copy from a char star type. */
  202. char *src = data;
  203. for ( int si = 0; si < len; si++ ) {
  204. Key key( src[si] );
  205. result.insert( key );
  206. if ( caseInsensitive ) {
  207. if ( key.isLower() )
  208. result.insert( key.toUpper() );
  209. else if ( key.isUpper() )
  210. result.insert( key.toLower() );
  211. }
  212. }
  213. }
  214. else {
  215. /* Copy from an unsigned byte ptr type. */
  216. unsigned char *src = (unsigned char*) data;
  217. for ( int si = 0; si < len; si++ ) {
  218. Key key( src[si] );
  219. result.insert( key );
  220. if ( caseInsensitive ) {
  221. if ( key.isLower() )
  222. result.insert( key.toUpper() );
  223. else if ( key.isUpper() )
  224. result.insert( key.toLower() );
  225. }
  226. }
  227. }
  228. }
  229. FsmAp *dotFsm( ParseData *pd )
  230. {
  231. FsmAp *retFsm = new FsmAp();
  232. retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
  233. return retFsm;
  234. }
  235. FsmAp *dotStarFsm( ParseData *pd )
  236. {
  237. FsmAp *retFsm = new FsmAp();
  238. retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
  239. return retFsm;
  240. }
  241. /* Make a builtin type. Depends on the signed nature of the alphabet type. */
  242. FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
  243. {
  244. /* FsmAp created to return. */
  245. FsmAp *retFsm = 0;
  246. bool isSigned = keyOps->isSigned;
  247. switch ( builtin ) {
  248. case BT_Any: {
  249. /* All characters. */
  250. retFsm = dotFsm( pd );
  251. break;
  252. }
  253. case BT_Ascii: {
  254. /* Ascii characters 0 to 127. */
  255. retFsm = new FsmAp();
  256. retFsm->rangeFsm( 0, 127 );
  257. break;
  258. }
  259. case BT_Extend: {
  260. /* Ascii extended characters. This is the full byte range. Dependent
  261. * on signed, vs no signed. If the alphabet is one byte then just use
  262. * dot fsm. */
  263. if ( isSigned ) {
  264. retFsm = new FsmAp();
  265. retFsm->rangeFsm( -128, 127 );
  266. }
  267. else {
  268. retFsm = new FsmAp();
  269. retFsm->rangeFsm( 0, 255 );
  270. }
  271. break;
  272. }
  273. case BT_Alpha: {
  274. /* Alpha [A-Za-z]. */
  275. FsmAp *upper = new FsmAp(), *lower = new FsmAp();
  276. upper->rangeFsm( 'A', 'Z' );
  277. lower->rangeFsm( 'a', 'z' );
  278. upper->unionOp( lower );
  279. upper->minimizePartition2();
  280. retFsm = upper;
  281. break;
  282. }
  283. case BT_Digit: {
  284. /* Digits [0-9]. */
  285. retFsm = new FsmAp();
  286. retFsm->rangeFsm( '0', '9' );
  287. break;
  288. }
  289. case BT_Alnum: {
  290. /* Alpha numerics [0-9A-Za-z]. */
  291. FsmAp *digit = new FsmAp(), *lower = new FsmAp();
  292. FsmAp *upper = new FsmAp();
  293. digit->rangeFsm( '0', '9' );
  294. upper->rangeFsm( 'A', 'Z' );
  295. lower->rangeFsm( 'a', 'z' );
  296. digit->unionOp( upper );
  297. digit->unionOp( lower );
  298. digit->minimizePartition2();
  299. retFsm = digit;
  300. break;
  301. }
  302. case BT_Lower: {
  303. /* Lower case characters. */
  304. retFsm = new FsmAp();
  305. retFsm->rangeFsm( 'a', 'z' );
  306. break;
  307. }
  308. case BT_Upper: {
  309. /* Upper case characters. */
  310. retFsm = new FsmAp();
  311. retFsm->rangeFsm( 'A', 'Z' );
  312. break;
  313. }
  314. case BT_Cntrl: {
  315. /* Control characters. */
  316. FsmAp *cntrl = new FsmAp();
  317. FsmAp *highChar = new FsmAp();
  318. cntrl->rangeFsm( 0, 31 );
  319. highChar->concatFsm( 127 );
  320. cntrl->unionOp( highChar );
  321. cntrl->minimizePartition2();
  322. retFsm = cntrl;
  323. break;
  324. }
  325. case BT_Graph: {
  326. /* Graphical ascii characters [!-~]. */
  327. retFsm = new FsmAp();
  328. retFsm->rangeFsm( '!', '~' );
  329. break;
  330. }
  331. case BT_Print: {
  332. /* Printable characters. Same as graph except includes space. */
  333. retFsm = new FsmAp();
  334. retFsm->rangeFsm( ' ', '~' );
  335. break;
  336. }
  337. case BT_Punct: {
  338. /* Punctuation. */
  339. FsmAp *range1 = new FsmAp();
  340. FsmAp *range2 = new FsmAp();
  341. FsmAp *range3 = new FsmAp();
  342. FsmAp *range4 = new FsmAp();
  343. range1->rangeFsm( '!', '/' );
  344. range2->rangeFsm( ':', '@' );
  345. range3->rangeFsm( '[', '`' );
  346. range4->rangeFsm( '{', '~' );
  347. range1->unionOp( range2 );
  348. range1->unionOp( range3 );
  349. range1->unionOp( range4 );
  350. range1->minimizePartition2();
  351. retFsm = range1;
  352. break;
  353. }
  354. case BT_Space: {
  355. /* Whitespace: [\t\v\f\n\r ]. */
  356. FsmAp *cntrl = new FsmAp();
  357. FsmAp *space = new FsmAp();
  358. cntrl->rangeFsm( '\t', '\r' );
  359. space->concatFsm( ' ' );
  360. cntrl->unionOp( space );
  361. cntrl->minimizePartition2();
  362. retFsm = cntrl;
  363. break;
  364. }
  365. case BT_Xdigit: {
  366. /* Hex digits [0-9A-Fa-f]. */
  367. FsmAp *digit = new FsmAp();
  368. FsmAp *upper = new FsmAp();
  369. FsmAp *lower = new FsmAp();
  370. digit->rangeFsm( '0', '9' );
  371. upper->rangeFsm( 'A', 'F' );
  372. lower->rangeFsm( 'a', 'f' );
  373. digit->unionOp( upper );
  374. digit->unionOp( lower );
  375. digit->minimizePartition2();
  376. retFsm = digit;
  377. break;
  378. }
  379. case BT_Lambda: {
  380. retFsm = new FsmAp();
  381. retFsm->lambdaFsm();
  382. break;
  383. }
  384. case BT_Empty: {
  385. retFsm = new FsmAp();
  386. retFsm->emptyFsm();
  387. break;
  388. }}
  389. return retFsm;
  390. }
  391. /* Check if this name inst or any name inst below is referenced. */
  392. bool NameInst::anyRefsRec()
  393. {
  394. if ( numRefs > 0 )
  395. return true;
  396. /* Recurse on children until true. */
  397. for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
  398. if ( (*ch)->anyRefsRec() )
  399. return true;
  400. }
  401. return false;
  402. }
  403. /*
  404. * ParseData
  405. */
  406. /* Initialize the structure that will collect info during the parse of a
  407. * machine. */
  408. ParseData::ParseData( const char *fileName, char *sectionName,
  409. const InputLoc &sectionLoc )
  410. :
  411. sectionGraph(0),
  412. generatingSectionSubset(false),
  413. nextPriorKey(0),
  414. /* 0 is reserved for global error actions. */
  415. nextLocalErrKey(1),
  416. nextNameId(0),
  417. nextCondId(0),
  418. alphTypeSet(false),
  419. getKeyExpr(0),
  420. accessExpr(0),
  421. prePushExpr(0),
  422. postPopExpr(0),
  423. pExpr(0),
  424. peExpr(0),
  425. eofExpr(0),
  426. csExpr(0),
  427. topExpr(0),
  428. stackExpr(0),
  429. actExpr(0),
  430. tokstartExpr(0),
  431. tokendExpr(0),
  432. dataExpr(0),
  433. lowerNum(0),
  434. upperNum(0),
  435. fileName(fileName),
  436. sectionName(sectionName),
  437. sectionLoc(sectionLoc),
  438. curActionOrd(0),
  439. curPriorOrd(0),
  440. rootName(0),
  441. exportsRootName(0),
  442. nextEpsilonResolvedLink(0),
  443. nextLongestMatchId(1),
  444. lmRequiresErrorState(false),
  445. cgd(0)
  446. {
  447. /* Initialize the dictionary of graphs. This is our symbol table. The
  448. * initialization needs to be done on construction which happens at the
  449. * beginning of a machine spec so any assignment operators can reference
  450. * the builtins. */
  451. initGraphDict();
  452. }
  453. /* Clean up the data collected during a parse. */
  454. ParseData::~ParseData()
  455. {
  456. /* Delete all the nodes in the action list. Will cause all the
  457. * string data that represents the actions to be deallocated. */
  458. actionList.empty();
  459. }
  460. /* Make a name id in the current name instantiation scope if it is not
  461. * already there. */
  462. NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
  463. {
  464. /* Create the name instantitaion object and insert it. */
  465. NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
  466. curNameInst->childVect.append( newNameInst );
  467. if ( data != 0 )
  468. curNameInst->children.insertMulti( data, newNameInst );
  469. return newNameInst;
  470. }
  471. void ParseData::initNameWalk()
  472. {
  473. curNameInst = rootName;
  474. curNameChild = 0;
  475. }
  476. void ParseData::initExportsNameWalk()
  477. {
  478. curNameInst = exportsRootName;
  479. curNameChild = 0;
  480. }
  481. /* Goes into the next child scope. The number of the child is already set up.
  482. * We need this for the syncronous name tree and parse tree walk to work
  483. * properly. It is reset on entry into a scope and advanced on poping of a
  484. * scope. A call to enterNameScope should be accompanied by a corresponding
  485. * popNameScope. */
  486. NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
  487. {
  488. /* Save off the current data. */
  489. NameFrame retFrame;
  490. retFrame.prevNameInst = curNameInst;
  491. retFrame.prevNameChild = curNameChild;
  492. retFrame.prevLocalScope = localNameScope;
  493. /* Enter into the new name scope. */
  494. for ( int i = 0; i < numScopes; i++ ) {
  495. curNameInst = curNameInst->childVect[curNameChild];
  496. curNameChild = 0;
  497. }
  498. if ( isLocal )
  499. localNameScope = curNameInst;
  500. return retFrame;
  501. }
  502. /* Return from a child scope to a parent. The parent info must be specified as
  503. * an argument and is obtained from the corresponding call to enterNameScope.
  504. * */
  505. void ParseData::popNameScope( const NameFrame &frame )
  506. {
  507. /* Pop the name scope. */
  508. curNameInst = frame.prevNameInst;
  509. curNameChild = frame.prevNameChild+1;
  510. localNameScope = frame.prevLocalScope;
  511. }
  512. void ParseData::resetNameScope( const NameFrame &frame )
  513. {
  514. /* Pop the name scope. */
  515. curNameInst = frame.prevNameInst;
  516. curNameChild = frame.prevNameChild;
  517. localNameScope = frame.prevLocalScope;
  518. }
  519. void ParseData::unsetObsoleteEntries( FsmAp *graph )
  520. {
  521. /* Loop the reference names and increment the usage. Names that are no
  522. * longer needed will be unset in graph. */
  523. for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
  524. /* Get the name. */
  525. NameInst *name = *ref;
  526. name->numUses += 1;
  527. /* If the name is no longer needed unset its corresponding entry. */
  528. if ( name->numUses == name->numRefs ) {
  529. assert( graph->entryPoints.find( name->id ) != 0 );
  530. graph->unsetEntry( name->id );
  531. assert( graph->entryPoints.find( name->id ) == 0 );
  532. }
  533. }
  534. }
  535. NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
  536. {
  537. /* Queue needed for breadth-first search, load it with the start node. */
  538. NameInstList nameQueue;
  539. nameQueue.append( refFrom );
  540. NameSet result;
  541. while ( nameQueue.length() > 0 ) {
  542. /* Pull the next from location off the queue. */
  543. NameInst *from = nameQueue.detachFirst();
  544. /* Look for the name. */
  545. NameMapEl *low, *high;
  546. if ( from->children.findMulti( data, low, high ) ) {
  547. /* Record all instances of the name. */
  548. for ( ; low <= high; low++ )
  549. result.insert( low->value );
  550. }
  551. /* Name not there, do breadth-first operation of appending all
  552. * childrent to the processing queue. */
  553. for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
  554. if ( !recLabelsOnly || (*name)->isLabel )
  555. nameQueue.append( *name );
  556. }
  557. }
  558. /* Queue exhausted and name never found. */
  559. return result;
  560. }
  561. void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
  562. const NameRef &nameRef, int namePos )
  563. {
  564. /* Look for the name in the owning scope of the factor with aug. */
  565. NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
  566. /* If there are more parts to the name then continue on. */
  567. if ( ++namePos < nameRef.length() ) {
  568. /* There are more components to the name, search using all the part
  569. * results as the base. */
  570. for ( NameSet::Iter name = partResult; name.lte(); name++ )
  571. resolveFrom( result, *name, nameRef, namePos );
  572. }
  573. else {
  574. /* This is the last component, append the part results to the final
  575. * results. */
  576. result.insert( partResult );
  577. }
  578. }
  579. /* Write out a name reference. */
  580. ostream &operator<<( ostream &out, const NameRef &nameRef )
  581. {
  582. int pos = 0;
  583. if ( nameRef[pos] == 0 ) {
  584. out << "::";
  585. pos += 1;
  586. }
  587. out << nameRef[pos++];
  588. for ( ; pos < nameRef.length(); pos++ )
  589. out << "::" << nameRef[pos];
  590. return out;
  591. }
  592. ostream &operator<<( ostream &out, const NameInst &nameInst )
  593. {
  594. /* Count the number fully qualified name parts. */
  595. int numParents = 0;
  596. NameInst *curParent = nameInst.parent;
  597. while ( curParent != 0 ) {
  598. numParents += 1;
  599. curParent = curParent->parent;
  600. }
  601. /* Make an array and fill it in. */
  602. curParent = nameInst.parent;
  603. NameInst **parents = new NameInst*[numParents];
  604. for ( int p = numParents-1; p >= 0; p-- ) {
  605. parents[p] = curParent;
  606. curParent = curParent->parent;
  607. }
  608. /* Write the parents out, skip the root. */
  609. for ( int p = 1; p < numParents; p++ )
  610. out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
  611. /* Write the name and cleanup. */
  612. out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
  613. delete[] parents;
  614. return out;
  615. }
  616. struct CmpNameInstLoc
  617. {
  618. static int compare( const NameInst *ni1, const NameInst *ni2 )
  619. {
  620. if ( ni1->loc.line < ni2->loc.line )
  621. return -1;
  622. else if ( ni1->loc.line > ni2->loc.line )
  623. return 1;
  624. else if ( ni1->loc.col < ni2->loc.col )
  625. return -1;
  626. else if ( ni1->loc.col > ni2->loc.col )
  627. return 1;
  628. return 0;
  629. }
  630. };
  631. void errorStateLabels( const NameSet &resolved )
  632. {
  633. MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
  634. mergeSort.sort( resolved.data, resolved.length() );
  635. for ( NameSet::Iter res = resolved; res.lte(); res++ )
  636. error((*res)->loc) << " -> " << **res << endl;
  637. }
  638. NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
  639. {
  640. NameInst *nameInst = 0;
  641. /* Do the local search if the name is not strictly a root level name
  642. * search. */
  643. if ( nameRef[0] != 0 ) {
  644. /* If the action is referenced, resolve all of them. */
  645. if ( action != 0 && action->actionRefs.length() > 0 ) {
  646. /* Look for the name in all referencing scopes. */
  647. NameSet resolved;
  648. for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
  649. resolveFrom( resolved, *actRef, nameRef, 0 );
  650. if ( resolved.length() > 0 ) {
  651. /* Take the first one. */
  652. nameInst = resolved[0];
  653. if ( resolved.length() > 1 ) {
  654. /* Complain about the multiple references. */
  655. error(loc) << "state reference " << nameRef <<
  656. " resolves to multiple entry points" << endl;
  657. errorStateLabels( resolved );
  658. }
  659. }
  660. }
  661. }
  662. /* If not found in the local scope, look in global. */
  663. if ( nameInst == 0 ) {
  664. NameSet resolved;
  665. int fromPos = nameRef[0] != 0 ? 0 : 1;
  666. resolveFrom( resolved, rootName, nameRef, fromPos );
  667. if ( resolved.length() > 0 ) {
  668. /* Take the first. */
  669. nameInst = resolved[0];
  670. if ( resolved.length() > 1 ) {
  671. /* Complain about the multiple references. */
  672. error(loc) << "state reference " << nameRef <<
  673. " resolves to multiple entry points" << endl;
  674. errorStateLabels( resolved );
  675. }
  676. }
  677. }
  678. if ( nameInst == 0 ) {
  679. /* If not found then complain. */
  680. error(loc) << "could not resolve state reference " << nameRef << endl;
  681. }
  682. return nameInst;
  683. }
  684. void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
  685. {
  686. for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
  687. switch ( item->type ) {
  688. case InlineItem::Entry: case InlineItem::Goto:
  689. case InlineItem::Call: case InlineItem::Next: {
  690. /* Resolve, pass action for local search. */
  691. NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
  692. /* Name lookup error reporting is handled by resolveStateRef. */
  693. if ( target != 0 ) {
  694. /* Check if the target goes into a longest match. */
  695. NameInst *search = target->parent;
  696. while ( search != 0 ) {
  697. if ( search->isLongestMatch ) {
  698. error(item->loc) << "cannot enter inside a longest "
  699. "match construction as an entry point" << endl;
  700. break;
  701. }
  702. search = search->parent;
  703. }
  704. /* Record the reference in the name. This will cause the
  705. * entry point to survive to the end of the graph
  706. * generating walk. */
  707. target->numRefs += 1;
  708. }
  709. item->nameTarg = target;
  710. break;
  711. }
  712. default:
  713. break;
  714. }
  715. /* Some of the item types may have children. */
  716. if ( item->children != 0 )
  717. resolveNameRefs( item->children, action );
  718. }
  719. }
  720. /* Resolve references to labels in actions. */
  721. void ParseData::resolveActionNameRefs()
  722. {
  723. for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
  724. /* Only care about the actions that are referenced. */
  725. if ( act->actionRefs.length() > 0 )
  726. resolveNameRefs( act->inlineList, act );
  727. }
  728. }
  729. /* Walk a name tree starting at from and fill the name index. */
  730. void ParseData::fillNameIndex( NameInst *from )
  731. {
  732. /* Fill the value for from in the name index. */
  733. nameIndex[from->id] = from;
  734. /* Recurse on the implicit final state and then all children. */
  735. if ( from->final != 0 )
  736. fillNameIndex( from->final );
  737. for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
  738. fillNameIndex( *name );
  739. }
  740. void ParseData::makeRootNames()
  741. {
  742. /* Create the root name. */
  743. rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
  744. exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
  745. }
  746. /* Build the name tree and supporting data structures. */
  747. void ParseData::makeNameTree( GraphDictEl *dictEl )
  748. {
  749. /* Set up curNameInst for the walk. */
  750. initNameWalk();
  751. if ( dictEl != 0 ) {
  752. /* A start location has been specified. */
  753. dictEl->value->makeNameTree( dictEl->loc, this );
  754. }
  755. else {
  756. /* First make the name tree. */
  757. for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
  758. /* Recurse on the instance. */
  759. glel->value->makeNameTree( glel->loc, this );
  760. }
  761. }
  762. /* The number of nodes in the tree can now be given by nextNameId */
  763. nameIndex = new NameInst*[nextNameId];
  764. memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
  765. fillNameIndex( rootName );
  766. fillNameIndex( exportsRootName );
  767. }
  768. void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
  769. {
  770. Expression *expression = new Expression( builtin );
  771. Join *join = new Join( expression );
  772. MachineDef *machineDef = new MachineDef( join );
  773. VarDef *varDef = new VarDef( name, machineDef );
  774. GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
  775. graphDict.insert( graphDictEl );
  776. }
  777. /* Initialize the graph dict with builtin types. */
  778. void ParseData::initGraphDict( )
  779. {
  780. createBuiltin( "any", BT_Any );
  781. createBuiltin( "ascii", BT_Ascii );
  782. createBuiltin( "extend", BT_Extend );
  783. createBuiltin( "alpha", BT_Alpha );
  784. createBuiltin( "digit", BT_Digit );
  785. createBuiltin( "alnum", BT_Alnum );
  786. createBuiltin( "lower", BT_Lower );
  787. createBuiltin( "upper", BT_Upper );
  788. createBuiltin( "cntrl", BT_Cntrl );
  789. createBuiltin( "graph", BT_Graph );
  790. createBuiltin( "print", BT_Print );
  791. createBuiltin( "punct", BT_Punct );
  792. createBuiltin( "space", BT_Space );
  793. createBuiltin( "xdigit", BT_Xdigit );
  794. createBuiltin( "null", BT_Lambda );
  795. createBuiltin( "zlen", BT_Lambda );
  796. createBuiltin( "empty", BT_Empty );
  797. }
  798. /* Set the alphabet type. If the types are not valid returns false. */
  799. bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
  800. {
  801. alphTypeLoc = loc;
  802. userAlphType = findAlphType( s1, s2 );
  803. alphTypeSet = true;
  804. return userAlphType != 0;
  805. }
  806. /* Set the alphabet type. If the types are not valid returns false. */
  807. bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
  808. {
  809. alphTypeLoc = loc;
  810. userAlphType = findAlphType( s1 );
  811. alphTypeSet = true;
  812. return userAlphType != 0;
  813. }
  814. bool ParseData::setVariable( char *var, InlineList *inlineList )
  815. {
  816. bool set = true;
  817. if ( strcmp( var, "p" ) == 0 )
  818. pExpr = inlineList;
  819. else if ( strcmp( var, "pe" ) == 0 )
  820. peExpr = inlineList;
  821. else if ( strcmp( var, "eof" ) == 0 )
  822. eofExpr = inlineList;
  823. else if ( strcmp( var, "cs" ) == 0 )
  824. csExpr = inlineList;
  825. else if ( strcmp( var, "data" ) == 0 )
  826. dataExpr = inlineList;
  827. else if ( strcmp( var, "top" ) == 0 )
  828. topExpr = inlineList;
  829. else if ( strcmp( var, "stack" ) == 0 )
  830. stackExpr = inlineList;
  831. else if ( strcmp( var, "act" ) == 0 )
  832. actExpr = inlineList;
  833. else if ( strcmp( var, "ts" ) == 0 )
  834. tokstartExpr = inlineList;
  835. else if ( strcmp( var, "te" ) == 0 )
  836. tokendExpr = inlineList;
  837. else
  838. set = false;
  839. return set;
  840. }
  841. /* Initialize the key operators object that will be referenced by all fsms
  842. * created. */
  843. void ParseData::initKeyOps( )
  844. {
  845. /* Signedness and bounds. */
  846. HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
  847. thisKeyOps.setAlphType( alphType );
  848. if ( lowerNum != 0 ) {
  849. /* If ranges are given then interpret the alphabet type. */
  850. thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
  851. thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
  852. }
  853. thisCondData.lastCondKey = thisKeyOps.maxKey;
  854. }
  855. void ParseData::printNameInst( NameInst *nameInst, int level )
  856. {
  857. for ( int i = 0; i < level; i++ )
  858. cerr << " ";
  859. cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
  860. " id: " << nameInst->id <<
  861. " refs: " << nameInst->numRefs <<
  862. " uses: " << nameInst->numUses << endl;
  863. for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
  864. printNameInst( *name, level+1 );
  865. }
  866. /* Remove duplicates of unique actions from an action table. */
  867. void ParseData::removeDups( ActionTable &table )
  868. {
  869. /* Scan through the table looking for unique actions to
  870. * remove duplicates of. */
  871. for ( int i = 0; i < table.length(); i++ ) {
  872. /* Remove any duplicates ahead of i. */
  873. for ( int r = i+1; r < table.length(); ) {
  874. if ( table[r].value == table[i].value )
  875. table.vremove(r);
  876. else
  877. r += 1;
  878. }
  879. }
  880. }
  881. /* Remove duplicates from action lists. This operates only on transition and
  882. * eof action lists and so should be called once all actions have been
  883. * transfered to their final resting place. */
  884. void ParseData::removeActionDups( FsmAp *graph )
  885. {
  886. /* Loop all states. */
  887. for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
  888. /* Loop all transitions. */
  889. for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
  890. removeDups( trans->actionTable );
  891. removeDups( state->toStateActionTable );
  892. removeDups( state->fromStateActionTable );
  893. removeDups( state->eofActionTable );
  894. }
  895. }
  896. Action *ParseData::newAction( const char *name, InlineList *inlineList )
  897. {
  898. InputLoc loc;
  899. loc.line = 1;
  900. loc.col = 1;
  901. loc.fileName = "NONE";
  902. Action *action = new Action( loc, name, inlineList, nextCondId++ );
  903. action->actionRefs.append( rootName );
  904. actionList.append( action );
  905. return action;
  906. }
  907. void ParseData::initLongestMatchData()
  908. {
  909. if ( lmList.length() > 0 ) {
  910. /* The initTokStart action resets the token start. */
  911. InlineList *il1 = new InlineList;
  912. il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
  913. initTokStart = newAction( "initts", il1 );
  914. initTokStart->isLmAction = true;
  915. /* The initActId action gives act a default value. */
  916. InlineList *il4 = new InlineList;
  917. il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
  918. initActId = newAction( "initact", il4 );
  919. initActId->isLmAction = true;
  920. /* The setTokStart action sets tokstart. */
  921. InlineList *il5 = new InlineList;
  922. il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
  923. setTokStart = newAction( "ts", il5 );
  924. setTokStart->isLmAction = true;
  925. /* The setTokEnd action sets tokend. */
  926. InlineList *il3 = new InlineList;
  927. il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
  928. setTokEnd = newAction( "te", il3 );
  929. setTokEnd->isLmAction = true;
  930. /* The action will also need an ordering: ahead of all user action
  931. * embeddings. */
  932. initTokStartOrd = curActionOrd++;
  933. initActIdOrd = curActionOrd++;
  934. setTokStartOrd = curActionOrd++;
  935. setTokEndOrd = curActionOrd++;
  936. }
  937. }
  938. /* After building the graph, do some extra processing to ensure the runtime
  939. * data of the longest mactch operators is consistent. */
  940. void ParseData::setLongestMatchData( FsmAp *graph )
  941. {
  942. if ( lmList.length() > 0 ) {
  943. /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
  944. * init the tokstart. */
  945. for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
  946. /* This is run after duplicates are removed, we must guard against
  947. * inserting a duplicate. */
  948. ActionTable &actionTable = en->value->toStateActionTable;
  949. if ( ! actionTable.hasAction( initTokStart ) )
  950. actionTable.setAction( initTokStartOrd, initTokStart );
  951. }
  952. /* Find the set of states that are the target of transitions with
  953. * actions that have calls. These states will be targeted by fret
  954. * statements. */
  955. StateSet states;
  956. for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
  957. for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
  958. for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
  959. if ( ati->value->anyCall && trans->toState != 0 )
  960. states.insert( trans->toState );
  961. }
  962. }
  963. }
  964. /* Init tokstart upon entering the above collected states. */
  965. for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
  966. /* This is run after duplicates are removed, we must guard against
  967. * inserting a duplicate. */
  968. ActionTable &actionTable = (*ps)->toStateActionTable;
  969. if ( ! actionTable.hasAction( initTokStart ) )
  970. actionTable.setAction( initTokStartOrd, initTokStart );
  971. }
  972. }
  973. }
  974. /* Make the graph from a graph dict node. Does minimization and state sorting. */
  975. FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
  976. {
  977. /* Build the graph from a walk of the parse tree. */
  978. FsmAp *graph = gdNode->value->walk( this );
  979. /* Resolve any labels that point to multiple states. Any labels that are
  980. * still around are referenced only by gotos and calls and they need to be
  981. * made into deterministic entry points. */
  982. graph->deterministicEntry();
  983. /*
  984. * All state construction is now complete.
  985. */
  986. /* Transfer actions from the out action tables to eof action tables. */
  987. for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
  988. graph->transferOutActions( *state );
  989. /* Transfer global error actions. */
  990. for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
  991. graph->transferErrorActions( state, 0 );
  992. if ( ::wantDupsRemoved )
  993. removeActionDups( graph );
  994. /* Remove unreachable states. There should be no dead end states. The
  995. * subtract and intersection operators are the only places where they may
  996. * be created and those operators clean them up. */
  997. graph->removeUnreachableStates();
  998. /* No more fsm operations are to be done. Action ordering numbers are
  999. * no longer of use and will just hinder minimization. Clear them. */
  1000. graph->nullActionKeys();
  1001. /* Transition priorities are no longer of use. We can clear them
  1002. * because they will just hinder minimization as well. Clear them. */
  1003. graph->clearAllPriorities();
  1004. if ( minimizeOpt != MinimizeNone ) {
  1005. /* Minimize here even if we minimized at every op. Now that function
  1006. * keys have been cleared we may get a more minimal fsm. */
  1007. switch ( minimizeLevel ) {
  1008. case MinimizeApprox:
  1009. graph->minimizeApproximate();
  1010. break;
  1011. case MinimizeStable:
  1012. graph->minimizeStable();
  1013. break;
  1014. case MinimizePartition1:
  1015. graph->minimizePartition1();
  1016. break;
  1017. case MinimizePartition2:
  1018. graph->minimizePartition2();
  1019. break;
  1020. }
  1021. }
  1022. graph->compressTransitions();
  1023. return graph;
  1024. }
  1025. void ParseData::printNameTree()
  1026. {
  1027. /* Print the name instance map. */
  1028. for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
  1029. printNameInst( *name, 0 );
  1030. cerr << "name index:" << endl;
  1031. /* Show that the name index is correct. */
  1032. for ( int ni = 0; ni < nextNameId; ni++ ) {
  1033. cerr << ni << ": ";
  1034. const char *name = nameIndex[ni]->name;
  1035. cerr << ( name != 0 ? name : "<ANON>" ) << endl;
  1036. }
  1037. }
  1038. FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
  1039. {
  1040. /* Build the name tree and supporting data structures. */
  1041. makeNameTree( gdNode );
  1042. /* Resove name references from gdNode. */
  1043. initNameWalk();
  1044. gdNode->value->resolveNameRefs( this );
  1045. /* Do not resolve action references. Since we are not building the entire
  1046. * graph there's a good chance that many name references will fail. This
  1047. * is okay since generating part of the graph is usually only done when
  1048. * inspecting the compiled machine. */
  1049. /* Same story for extern entry point references. */
  1050. /* Flag this case so that the XML code generator is aware that we haven't
  1051. * looked up name references in actions. It can then avoid segfaulting. */
  1052. generatingSectionSubset = true;
  1053. /* Just building the specified graph. */
  1054. initNameWalk();
  1055. FsmAp *mainGraph = makeInstance( gdNode );
  1056. return mainGraph;
  1057. }
  1058. FsmAp *ParseData::makeAll()
  1059. {
  1060. /* Build the name tree and supporting data structures. */
  1061. makeNameTree( 0 );
  1062. /* Resove name references in the tree. */
  1063. initNameWalk();
  1064. for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
  1065. glel->value->resolveNameRefs( this );
  1066. /* Resolve action code name references. */
  1067. resolveActionNameRefs();
  1068. /* Force name references to the top level instantiations. */
  1069. for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
  1070. (*inst)->numRefs += 1;
  1071. FsmAp *mainGraph = 0;
  1072. FsmAp **graphs = new FsmAp*[instanceList.length()];
  1073. int numOthers = 0;
  1074. /* Make all the instantiations, we know that main exists in this list. */
  1075. initNameWalk();
  1076. for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
  1077. if ( strcmp( glel->key, mainMachine ) == 0 ) {
  1078. /* Main graph is always instantiated. */
  1079. mainGraph = makeInstance( glel );
  1080. }
  1081. else {
  1082. /* Instantiate and store in others array. */
  1083. graphs[numOthers++] = makeInstance( glel );
  1084. }
  1085. }
  1086. if ( mainGraph == 0 )
  1087. mainGraph = graphs[--numOthers];
  1088. if ( numOthers > 0 ) {
  1089. /* Add all the other graphs into main. */
  1090. mainGraph->globOp( graphs, numOthers );
  1091. }
  1092. delete[] graphs;
  1093. return mainGraph;
  1094. }
  1095. void ParseData::analyzeAction( Action *action, InlineList *inlineList )
  1096. {
  1097. /* FIXME: Actions used as conditions should be very constrained. */
  1098. for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
  1099. if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
  1100. action->anyCall = true;
  1101. /* Need to recurse into longest match items. */
  1102. if ( item->type == InlineItem::LmSwitch ) {
  1103. LongestMatch *lm = item->longestMatch;
  1104. for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
  1105. if ( lmi->action != 0 )
  1106. analyzeAction( action, lmi->action->inlineList );
  1107. }
  1108. }
  1109. if ( item->type == InlineItem::LmOnLast ||
  1110. item->type == InlineItem::LmOnNext ||
  1111. item->type == InlineItem::LmOnLagBehind )
  1112. {
  1113. LongestMatchPart *lmi = item->longestMatchPart;
  1114. if ( lmi->action != 0 )
  1115. analyzeAction( action, lmi->action->inlineList );
  1116. }
  1117. if ( item->children != 0 )
  1118. analyzeAction( action, item->children );
  1119. }
  1120. }
  1121. /* Check actions for bad uses of fsm directives. We don't go inside longest
  1122. * match items in actions created by ragel, since we just want the user
  1123. * actions. */
  1124. void ParseData::checkInlineList( Action *act, InlineList *inlineList )
  1125. {
  1126. for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
  1127. /* EOF checks. */
  1128. if ( act->numEofRefs > 0 ) {
  1129. switch ( item->type ) {
  1130. /* Currently no checks. */
  1131. default:
  1132. break;
  1133. }
  1134. }
  1135. /* Recurse. */
  1136. if ( item->children != 0 )
  1137. checkInlineList( act, item->children );
  1138. }
  1139. }
  1140. void ParseData::checkAction( Action *action )
  1141. {
  1142. /* Check for actions with calls that are embedded within a longest match
  1143. * machine. */
  1144. if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
  1145. for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
  1146. NameInst *check = *ar;
  1147. while ( check != 0 ) {
  1148. if ( check->isLongestMatch ) {
  1149. error(action->loc) << "within a scanner, fcall is permitted"
  1150. " only in pattern actions" << endl;
  1151. break;
  1152. }
  1153. check = check->parent;
  1154. }
  1155. }
  1156. }
  1157. checkInlineList( action, action->inlineList );
  1158. }
  1159. void ParseData::analyzeGraph( FsmAp *graph )
  1160. {
  1161. for ( ActionList::Iter act = actionList; act.lte(); act++ )
  1162. analyzeAction( act, act->inlineList );
  1163. for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
  1164. /* The transition list. */
  1165. for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
  1166. for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
  1167. at->value->numTransRefs += 1;
  1168. }
  1169. for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
  1170. at->value->numToStateRefs += 1;
  1171. for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
  1172. at->value->numFromStateRefs += 1;
  1173. for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
  1174. at->value->numEofRefs += 1;
  1175. for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
  1176. for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
  1177. (*sci)->numCondRefs += 1;
  1178. }
  1179. }
  1180. /* Checks for bad usage of directives in action code. */
  1181. for ( ActionList::Iter act = actionList; act.lte(); act++ )
  1182. checkAction( act );
  1183. }
  1184. void ParseData::makeExportsNameTree()
  1185. {
  1186. /* Make a name tree for the exports. */
  1187. initExportsNameWalk();
  1188. /* First make the name tree. */
  1189. for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
  1190. if ( gdel->value->isExport ) {
  1191. /* Recurse on the instance. */
  1192. gdel->value->makeNameTree( gdel->loc, this );
  1193. }
  1194. }
  1195. }
  1196. void ParseData::makeExports()
  1197. {
  1198. makeExportsNameTree();
  1199. /* Resove name references in the tree. */
  1200. initExportsNameWalk();
  1201. for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
  1202. if ( gdel->value->isExport )
  1203. gdel->value->resolveNameRefs( this );
  1204. }
  1205. /* Make all the instantiations, we know that main exists in this list. */
  1206. initExportsNameWalk();
  1207. for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
  1208. /* Check if this var def is an export. */
  1209. if ( gdel->value->isExport ) {
  1210. /* Build the graph from a walk of the parse tree. */
  1211. FsmAp *graph = gdel->value->walk( this );
  1212. /* Build the graph from a walk of the parse tree. */
  1213. if ( !graph->checkSingleCharMachine() ) {
  1214. error(gdel->loc) << "bad export machine, must define "
  1215. "a single character" << endl;
  1216. }
  1217. else {
  1218. /* Safe to extract the key and declare the export. */
  1219. Key exportKey = graph->startState->outList.head->lowKey;
  1220. exportList.append( new Export( gdel->value->name, exportKey ) );
  1221. }
  1222. }
  1223. }
  1224. }
  1225. /* Construct the machine and catch failures which can occur during
  1226. * construction. */
  1227. void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
  1228. {
  1229. try {
  1230. /* This machine construction can fail. */
  1231. prepareMachineGenTBWrapped( graphDictEl );
  1232. }
  1233. catch ( FsmConstructFail fail ) {
  1234. switch ( fail.reason ) {
  1235. case FsmConstructFail::CondNoKeySpace: {
  1236. InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
  1237. error(loc) << "sorry, no more characters are "
  1238. "available in the alphabet space" << endl;
  1239. error(loc) << " for conditions, please use a "
  1240. "smaller alphtype or reduce" << endl;
  1241. error(loc) << " the span of characters on which "
  1242. "conditions are embedded" << endl;
  1243. break;
  1244. }
  1245. }
  1246. }
  1247. }
  1248. void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
  1249. {
  1250. beginProcessing();
  1251. initKeyOps();
  1252. makeRootNames();
  1253. initLongestMatchData();
  1254. /* Make the graph, do minimization. */
  1255. if ( graphDictEl == 0 )
  1256. sectionGraph = makeAll();
  1257. else
  1258. sectionGraph = makeSpecific( graphDictEl );
  1259. /* Compute exports from the export definitions. */
  1260. makeExports();
  1261. /* If any errors have occured in the input file then don't write anything. */
  1262. if ( gblErrorCount > 0 )
  1263. return;
  1264. analyzeGraph( sectionGraph );
  1265. /* Depends on the graph analysis. */
  1266. setLongestMatchData( sectionGraph );
  1267. /* Decide if an error state is necessary.
  1268. * 1. There is an error transition
  1269. * 2. There is a gap in the transitions
  1270. * 3. The longest match operator requires it. */
  1271. if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
  1272. sectionGraph->errState = sectionGraph->addState();
  1273. /* State numbers need to be assigned such that all final states have a
  1274. * larger state id number than all non-final states. This enables the
  1275. * first_final mechanism to function correctly. We also want states to be
  1276. * ordered in a predictable fashion. So we first apply a depth-first
  1277. * search, then do a stable sort by final state status, then assign
  1278. * numbers. */
  1279. sectionGraph->depthFirstOrdering();
  1280. sectionGraph->sortStatesByFinal();
  1281. sectionGraph->setStateNumbers( 0 );
  1282. }
  1283. void ParseData::generateReduced( InputData &inputData )
  1284. {
  1285. beginProcessing();
  1286. cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
  1287. /* Make the generator. */
  1288. BackendGen backendGen( sectionName, this, sectionGraph, cgd );
  1289. /* Write out with it. */
  1290. backendGen.makeBackend();
  1291. if ( printStatistics ) {
  1292. cerr << "fsm name : " << sectionName << endl;
  1293. cerr << "num states: " << sectionGraph->stateList.length() << endl;
  1294. cerr << endl;
  1295. }
  1296. }
  1297. void ParseData::generateXML( ostream &out )
  1298. {
  1299. beginProcessing();
  1300. /* Make the generator. */
  1301. XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
  1302. /* Write out with it. */
  1303. codeGen.writeXML();
  1304. if ( printStatistics ) {
  1305. cerr << "fsm name : " << sectionName << endl;
  1306. cerr << "num states: " << sectionGraph->stateList.length() << endl;
  1307. cerr << endl;
  1308. }
  1309. }