nfa.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. /* nfa - NFA construction routines */
  2. /*-
  3. * Copyright (c) 1990 The Regents of the University of California.
  4. * All rights reserved.
  5. *
  6. * This code is derived from software contributed to Berkeley by
  7. * Vern Paxson.
  8. *
  9. * The United States Government has rights in this work pursuant
  10. * to contract no. DE-AC03-76SF00098 between the United States
  11. * Department of Energy and the University of California.
  12. *
  13. * Redistribution and use in source and binary forms with or without
  14. * modification are permitted provided that: (1) source distributions retain
  15. * this entire copyright notice and comment, and (2) distributions including
  16. * binaries display the following acknowledgement: ``This product includes
  17. * software developed by the University of California, Berkeley and its
  18. * contributors'' in the documentation or other materials provided with the
  19. * distribution and in all advertising materials mentioning features or use
  20. * of this software. Neither the name of the University nor the names of
  21. * its contributors may be used to endorse or promote products derived from
  22. * this software without specific prior written permission.
  23. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  24. * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  25. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  26. */
  27. /* $Header: /opt/vlysenkov/CVSROOT/arcadia/contrib/tools/flex-old/nfa.c,v 1.2 2007-11-30 02:28:15 pg Exp $ */
  28. #include "flexdef.h"
  29. /* declare functions that have forward references */
  30. int dupmachine PROTO((int));
  31. void mkxtion PROTO((int, int));
  32. /* add_accept - add an accepting state to a machine
  33. *
  34. * accepting_number becomes mach's accepting number.
  35. */
  36. void add_accept( mach, accepting_number )
  37. int mach, accepting_number;
  38. {
  39. /* Hang the accepting number off an epsilon state. if it is associated
  40. * with a state that has a non-epsilon out-transition, then the state
  41. * will accept BEFORE it makes that transition, i.e., one character
  42. * too soon.
  43. */
  44. if ( transchar[finalst[mach]] == SYM_EPSILON )
  45. accptnum[finalst[mach]] = accepting_number;
  46. else
  47. {
  48. int astate = mkstate( SYM_EPSILON );
  49. accptnum[astate] = accepting_number;
  50. (void) link_machines( mach, astate );
  51. }
  52. }
  53. /* copysingl - make a given number of copies of a singleton machine
  54. *
  55. * synopsis
  56. *
  57. * newsng = copysingl( singl, num );
  58. *
  59. * newsng - a new singleton composed of num copies of singl
  60. * singl - a singleton machine
  61. * num - the number of copies of singl to be present in newsng
  62. */
  63. int copysingl( singl, num )
  64. int singl, num;
  65. {
  66. int copy, i;
  67. copy = mkstate( SYM_EPSILON );
  68. for ( i = 1; i <= num; ++i )
  69. copy = link_machines( copy, dupmachine( singl ) );
  70. return copy;
  71. }
  72. /* dumpnfa - debugging routine to write out an nfa */
  73. void dumpnfa( state1 )
  74. int state1;
  75. {
  76. int sym, tsp1, tsp2, anum, ns;
  77. fprintf( stderr,
  78. _( "\n\n********** beginning dump of nfa with start state %d\n" ),
  79. state1 );
  80. /* We probably should loop starting at firstst[state1] and going to
  81. * lastst[state1], but they're not maintained properly when we "or"
  82. * all of the rules together. So we use our knowledge that the machine
  83. * starts at state 1 and ends at lastnfa.
  84. */
  85. /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
  86. for ( ns = 1; ns <= lastnfa; ++ns )
  87. {
  88. fprintf( stderr, _( "state # %4d\t" ), ns );
  89. sym = transchar[ns];
  90. tsp1 = trans1[ns];
  91. tsp2 = trans2[ns];
  92. anum = accptnum[ns];
  93. fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
  94. if ( anum != NIL )
  95. fprintf( stderr, " [%d]", anum );
  96. fprintf( stderr, "\n" );
  97. }
  98. fprintf( stderr, _( "********** end of dump\n" ) );
  99. }
  100. /* dupmachine - make a duplicate of a given machine
  101. *
  102. * synopsis
  103. *
  104. * copy = dupmachine( mach );
  105. *
  106. * copy - holds duplicate of mach
  107. * mach - machine to be duplicated
  108. *
  109. * note that the copy of mach is NOT an exact duplicate; rather, all the
  110. * transition states values are adjusted so that the copy is self-contained,
  111. * as the original should have been.
  112. *
  113. * also note that the original MUST be contiguous, with its low and high
  114. * states accessible by the arrays firstst and lastst
  115. */
  116. int dupmachine( mach )
  117. int mach;
  118. {
  119. int i, init, state_offset;
  120. int state = 0;
  121. int last = lastst[mach];
  122. for ( i = firstst[mach]; i <= last; ++i )
  123. {
  124. state = mkstate( transchar[i] );
  125. if ( trans1[i] != NO_TRANSITION )
  126. {
  127. mkxtion( finalst[state], trans1[i] + state - i );
  128. if ( transchar[i] == SYM_EPSILON &&
  129. trans2[i] != NO_TRANSITION )
  130. mkxtion( finalst[state],
  131. trans2[i] + state - i );
  132. }
  133. accptnum[state] = accptnum[i];
  134. }
  135. if ( state == 0 )
  136. flexfatal( _( "empty machine in dupmachine()" ) );
  137. state_offset = state - i + 1;
  138. init = mach + state_offset;
  139. firstst[init] = firstst[mach] + state_offset;
  140. finalst[init] = finalst[mach] + state_offset;
  141. lastst[init] = lastst[mach] + state_offset;
  142. return init;
  143. }
  144. /* finish_rule - finish up the processing for a rule
  145. *
  146. * An accepting number is added to the given machine. If variable_trail_rule
  147. * is true then the rule has trailing context and both the head and trail
  148. * are variable size. Otherwise if headcnt or trailcnt is non-zero then
  149. * the machine recognizes a pattern with trailing context and headcnt is
  150. * the number of characters in the matched part of the pattern, or zero
  151. * if the matched part has variable length. trailcnt is the number of
  152. * trailing context characters in the pattern, or zero if the trailing
  153. * context has variable length.
  154. */
  155. void finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
  156. int mach, variable_trail_rule, headcnt, trailcnt;
  157. {
  158. char action_text[MAXLINE];
  159. add_accept( mach, num_rules );
  160. /* We did this in new_rule(), but it often gets the wrong
  161. * number because we do it before we start parsing the current rule.
  162. */
  163. rule_linenum[num_rules] = linenum;
  164. /* If this is a continued action, then the line-number has already
  165. * been updated, giving us the wrong number.
  166. */
  167. if ( continued_action )
  168. --rule_linenum[num_rules];
  169. sprintf( action_text, "case %d:\n", num_rules );
  170. add_action( action_text );
  171. if ( variable_trail_rule )
  172. {
  173. rule_type[num_rules] = RULE_VARIABLE;
  174. if ( performance_report > 0 )
  175. fprintf( stderr,
  176. _( "Variable trailing context rule at line %d\n" ),
  177. rule_linenum[num_rules] );
  178. variable_trailing_context_rules = true;
  179. }
  180. else
  181. {
  182. rule_type[num_rules] = RULE_NORMAL;
  183. if ( headcnt > 0 || trailcnt > 0 )
  184. {
  185. /* Do trailing context magic to not match the trailing
  186. * characters.
  187. */
  188. char *scanner_cp = "yy_c_buf_p = yy_cp";
  189. char *scanner_bp = "yy_bp";
  190. add_action(
  191. "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
  192. if ( headcnt > 0 )
  193. {
  194. sprintf( action_text, "%s = %s + %d;\n",
  195. scanner_cp, scanner_bp, headcnt );
  196. add_action( action_text );
  197. }
  198. else
  199. {
  200. sprintf( action_text, "%s -= %d;\n",
  201. scanner_cp, trailcnt );
  202. add_action( action_text );
  203. }
  204. add_action(
  205. "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
  206. }
  207. }
  208. /* Okay, in the action code at this point yytext and yyleng have
  209. * their proper final values for this rule, so here's the point
  210. * to do any user action. But don't do it for continued actions,
  211. * as that'll result in multiple YY_RULE_SETUP's.
  212. */
  213. if ( ! continued_action )
  214. add_action( "YY_RULE_SETUP\n" );
  215. line_directive_out( (FILE *) 0, 1 );
  216. }
  217. /* link_machines - connect two machines together
  218. *
  219. * synopsis
  220. *
  221. * new = link_machines( first, last );
  222. *
  223. * new - a machine constructed by connecting first to last
  224. * first - the machine whose successor is to be last
  225. * last - the machine whose predecessor is to be first
  226. *
  227. * note: this routine concatenates the machine first with the machine
  228. * last to produce a machine new which will pattern-match first first
  229. * and then last, and will fail if either of the sub-patterns fails.
  230. * FIRST is set to new by the operation. last is unmolested.
  231. */
  232. int link_machines( first, last )
  233. int first, last;
  234. {
  235. if ( first == NIL )
  236. return last;
  237. else if ( last == NIL )
  238. return first;
  239. else
  240. {
  241. mkxtion( finalst[first], last );
  242. finalst[first] = finalst[last];
  243. lastst[first] = MAX( lastst[first], lastst[last] );
  244. firstst[first] = MIN( firstst[first], firstst[last] );
  245. return first;
  246. }
  247. }
  248. /* mark_beginning_as_normal - mark each "beginning" state in a machine
  249. * as being a "normal" (i.e., not trailing context-
  250. * associated) states
  251. *
  252. * The "beginning" states are the epsilon closure of the first state
  253. */
  254. void mark_beginning_as_normal( mach )
  255. int mach;
  256. {
  257. switch ( state_type[mach] )
  258. {
  259. case STATE_NORMAL:
  260. /* Oh, we've already visited here. */
  261. return;
  262. case STATE_TRAILING_CONTEXT:
  263. state_type[mach] = STATE_NORMAL;
  264. if ( transchar[mach] == SYM_EPSILON )
  265. {
  266. if ( trans1[mach] != NO_TRANSITION )
  267. mark_beginning_as_normal(
  268. trans1[mach] );
  269. if ( trans2[mach] != NO_TRANSITION )
  270. mark_beginning_as_normal(
  271. trans2[mach] );
  272. }
  273. break;
  274. default:
  275. flexerror(
  276. _( "bad state type in mark_beginning_as_normal()" ) );
  277. break;
  278. }
  279. }
  280. /* mkbranch - make a machine that branches to two machines
  281. *
  282. * synopsis
  283. *
  284. * branch = mkbranch( first, second );
  285. *
  286. * branch - a machine which matches either first's pattern or second's
  287. * first, second - machines whose patterns are to be or'ed (the | operator)
  288. *
  289. * Note that first and second are NEITHER destroyed by the operation. Also,
  290. * the resulting machine CANNOT be used with any other "mk" operation except
  291. * more mkbranch's. Compare with mkor()
  292. */
  293. int mkbranch( first, second )
  294. int first, second;
  295. {
  296. int eps;
  297. if ( first == NO_TRANSITION )
  298. return second;
  299. else if ( second == NO_TRANSITION )
  300. return first;
  301. eps = mkstate( SYM_EPSILON );
  302. mkxtion( eps, first );
  303. mkxtion( eps, second );
  304. return eps;
  305. }
  306. /* mkclos - convert a machine into a closure
  307. *
  308. * synopsis
  309. * new = mkclos( state );
  310. *
  311. * new - a new state which matches the closure of "state"
  312. */
  313. int mkclos( state )
  314. int state;
  315. {
  316. return mkopt( mkposcl( state ) );
  317. }
  318. /* mkopt - make a machine optional
  319. *
  320. * synopsis
  321. *
  322. * new = mkopt( mach );
  323. *
  324. * new - a machine which optionally matches whatever mach matched
  325. * mach - the machine to make optional
  326. *
  327. * notes:
  328. * 1. mach must be the last machine created
  329. * 2. mach is destroyed by the call
  330. */
  331. int mkopt( mach )
  332. int mach;
  333. {
  334. int eps;
  335. if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
  336. {
  337. eps = mkstate( SYM_EPSILON );
  338. mach = link_machines( mach, eps );
  339. }
  340. /* Can't skimp on the following if FREE_EPSILON(mach) is true because
  341. * some state interior to "mach" might point back to the beginning
  342. * for a closure.
  343. */
  344. eps = mkstate( SYM_EPSILON );
  345. mach = link_machines( eps, mach );
  346. mkxtion( mach, finalst[mach] );
  347. return mach;
  348. }
  349. /* mkor - make a machine that matches either one of two machines
  350. *
  351. * synopsis
  352. *
  353. * new = mkor( first, second );
  354. *
  355. * new - a machine which matches either first's pattern or second's
  356. * first, second - machines whose patterns are to be or'ed (the | operator)
  357. *
  358. * note that first and second are both destroyed by the operation
  359. * the code is rather convoluted because an attempt is made to minimize
  360. * the number of epsilon states needed
  361. */
  362. int mkor( first, second )
  363. int first, second;
  364. {
  365. int eps, orend;
  366. if ( first == NIL )
  367. return second;
  368. else if ( second == NIL )
  369. return first;
  370. else
  371. {
  372. /* See comment in mkopt() about why we can't use the first
  373. * state of "first" or "second" if they satisfy "FREE_EPSILON".
  374. */
  375. eps = mkstate( SYM_EPSILON );
  376. first = link_machines( eps, first );
  377. mkxtion( first, second );
  378. if ( SUPER_FREE_EPSILON(finalst[first]) &&
  379. accptnum[finalst[first]] == NIL )
  380. {
  381. orend = finalst[first];
  382. mkxtion( finalst[second], orend );
  383. }
  384. else if ( SUPER_FREE_EPSILON(finalst[second]) &&
  385. accptnum[finalst[second]] == NIL )
  386. {
  387. orend = finalst[second];
  388. mkxtion( finalst[first], orend );
  389. }
  390. else
  391. {
  392. eps = mkstate( SYM_EPSILON );
  393. first = link_machines( first, eps );
  394. orend = finalst[first];
  395. mkxtion( finalst[second], orend );
  396. }
  397. }
  398. finalst[first] = orend;
  399. return first;
  400. }
  401. /* mkposcl - convert a machine into a positive closure
  402. *
  403. * synopsis
  404. * new = mkposcl( state );
  405. *
  406. * new - a machine matching the positive closure of "state"
  407. */
  408. int mkposcl( state )
  409. int state;
  410. {
  411. int eps;
  412. if ( SUPER_FREE_EPSILON(finalst[state]) )
  413. {
  414. mkxtion( finalst[state], state );
  415. return state;
  416. }
  417. else
  418. {
  419. eps = mkstate( SYM_EPSILON );
  420. mkxtion( eps, state );
  421. return link_machines( state, eps );
  422. }
  423. }
  424. /* mkrep - make a replicated machine
  425. *
  426. * synopsis
  427. * new = mkrep( mach, lb, ub );
  428. *
  429. * new - a machine that matches whatever "mach" matched from "lb"
  430. * number of times to "ub" number of times
  431. *
  432. * note
  433. * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
  434. */
  435. int mkrep( mach, lb, ub )
  436. int mach, lb, ub;
  437. {
  438. int base_mach, tail, copy, i;
  439. base_mach = copysingl( mach, lb - 1 );
  440. if ( ub == INFINITY )
  441. {
  442. copy = dupmachine( mach );
  443. mach = link_machines( mach,
  444. link_machines( base_mach, mkclos( copy ) ) );
  445. }
  446. else
  447. {
  448. tail = mkstate( SYM_EPSILON );
  449. for ( i = lb; i < ub; ++i )
  450. {
  451. copy = dupmachine( mach );
  452. tail = mkopt( link_machines( copy, tail ) );
  453. }
  454. mach = link_machines( mach, link_machines( base_mach, tail ) );
  455. }
  456. return mach;
  457. }
  458. /* mkstate - create a state with a transition on a given symbol
  459. *
  460. * synopsis
  461. *
  462. * state = mkstate( sym );
  463. *
  464. * state - a new state matching sym
  465. * sym - the symbol the new state is to have an out-transition on
  466. *
  467. * note that this routine makes new states in ascending order through the
  468. * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
  469. * relies on machines being made in ascending order and that they are
  470. * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
  471. * that it admittedly is)
  472. */
  473. int mkstate( sym )
  474. int sym;
  475. {
  476. if ( ++lastnfa >= current_mns )
  477. {
  478. if ( (current_mns += MNS_INCREMENT) >= maximum_mns )
  479. lerrif(
  480. _( "input rules are too complicated (>= %d NFA states)" ),
  481. current_mns );
  482. ++num_reallocs;
  483. firstst = reallocate_integer_array( firstst, current_mns );
  484. lastst = reallocate_integer_array( lastst, current_mns );
  485. finalst = reallocate_integer_array( finalst, current_mns );
  486. transchar = reallocate_integer_array( transchar, current_mns );
  487. trans1 = reallocate_integer_array( trans1, current_mns );
  488. trans2 = reallocate_integer_array( trans2, current_mns );
  489. accptnum = reallocate_integer_array( accptnum, current_mns );
  490. assoc_rule =
  491. reallocate_integer_array( assoc_rule, current_mns );
  492. state_type =
  493. reallocate_integer_array( state_type, current_mns );
  494. }
  495. firstst[lastnfa] = lastnfa;
  496. finalst[lastnfa] = lastnfa;
  497. lastst[lastnfa] = lastnfa;
  498. transchar[lastnfa] = sym;
  499. trans1[lastnfa] = NO_TRANSITION;
  500. trans2[lastnfa] = NO_TRANSITION;
  501. accptnum[lastnfa] = NIL;
  502. assoc_rule[lastnfa] = num_rules;
  503. state_type[lastnfa] = current_state_type;
  504. /* Fix up equivalence classes base on this transition. Note that any
  505. * character which has its own transition gets its own equivalence
  506. * class. Thus only characters which are only in character classes
  507. * have a chance at being in the same equivalence class. E.g. "a|b"
  508. * puts 'a' and 'b' into two different equivalence classes. "[ab]"
  509. * puts them in the same equivalence class (barring other differences
  510. * elsewhere in the input).
  511. */
  512. if ( sym < 0 )
  513. {
  514. /* We don't have to update the equivalence classes since
  515. * that was already done when the ccl was created for the
  516. * first time.
  517. */
  518. }
  519. else if ( sym == SYM_EPSILON )
  520. ++numeps;
  521. else
  522. {
  523. check_char( sym );
  524. if ( useecs )
  525. /* Map NUL's to csize. */
  526. mkechar( sym ? sym : csize, nextecm, ecgroup );
  527. }
  528. return lastnfa;
  529. }
  530. /* mkxtion - make a transition from one state to another
  531. *
  532. * synopsis
  533. *
  534. * mkxtion( statefrom, stateto );
  535. *
  536. * statefrom - the state from which the transition is to be made
  537. * stateto - the state to which the transition is to be made
  538. */
  539. void mkxtion( statefrom, stateto )
  540. int statefrom, stateto;
  541. {
  542. if ( trans1[statefrom] == NO_TRANSITION )
  543. trans1[statefrom] = stateto;
  544. else if ( (transchar[statefrom] != SYM_EPSILON) ||
  545. (trans2[statefrom] != NO_TRANSITION) )
  546. flexfatal( _( "found too many transitions in mkxtion()" ) );
  547. else
  548. { /* second out-transition for an epsilon state */
  549. ++eps2;
  550. trans2[statefrom] = stateto;
  551. }
  552. }
  553. /* new_rule - initialize for a new rule */
  554. void new_rule()
  555. {
  556. if ( ++num_rules >= current_max_rules )
  557. {
  558. ++num_reallocs;
  559. current_max_rules += MAX_RULES_INCREMENT;
  560. rule_type = reallocate_integer_array( rule_type,
  561. current_max_rules );
  562. rule_linenum = reallocate_integer_array( rule_linenum,
  563. current_max_rules );
  564. rule_useful = reallocate_integer_array( rule_useful,
  565. current_max_rules );
  566. }
  567. if ( num_rules > MAX_RULE )
  568. lerrif( _( "too many rules (> %d)!" ), MAX_RULE );
  569. rule_linenum[num_rules] = linenum;
  570. rule_useful[num_rules] = false;
  571. }