rlscan.rl 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272
  1. /*
  2. * Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
  3. * Copyright 2011 Josef Goettgens
  4. */
  5. /* This file is part of Ragel.
  6. *
  7. * Ragel is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * Ragel is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with Ragel; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. */
  21. #include <iostream>
  22. #include <fstream>
  23. #include <string.h>
  24. #include "ragel.h"
  25. #include "rlscan.h"
  26. #include "inputdata.h"
  27. //#define LOG_TOKENS
  28. using std::ifstream;
  29. using std::istream;
  30. using std::ostream;
  31. using std::cout;
  32. using std::cerr;
  33. using std::endl;
  34. enum InlineBlockType
  35. {
  36. CurlyDelimited,
  37. SemiTerminated
  38. };
  39. /*
  40. * The Scanner for Importing
  41. */
  42. %%{
  43. machine inline_token_scan;
  44. alphtype int;
  45. access tok_;
  46. # Import scanner tokens.
  47. import "rlparse.h";
  48. main := |*
  49. # Define of number.
  50. IMP_Define IMP_Word IMP_UInt => {
  51. int base = tok_ts - token_data;
  52. int nameOff = 1;
  53. int numOff = 2;
  54. directToParser( inclToParser, fileName, line, column, TK_Word,
  55. token_strings[base+nameOff], token_lens[base+nameOff] );
  56. directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
  57. directToParser( inclToParser, fileName, line, column, TK_UInt,
  58. token_strings[base+numOff], token_lens[base+numOff] );
  59. directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
  60. };
  61. # Assignment of number.
  62. IMP_Word '=' IMP_UInt => {
  63. int base = tok_ts - token_data;
  64. int nameOff = 0;
  65. int numOff = 2;
  66. directToParser( inclToParser, fileName, line, column, TK_Word,
  67. token_strings[base+nameOff], token_lens[base+nameOff] );
  68. directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
  69. directToParser( inclToParser, fileName, line, column, TK_UInt,
  70. token_strings[base+numOff], token_lens[base+numOff] );
  71. directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
  72. };
  73. # Define of literal.
  74. IMP_Define IMP_Word IMP_Literal => {
  75. int base = tok_ts - token_data;
  76. int nameOff = 1;
  77. int litOff = 2;
  78. directToParser( inclToParser, fileName, line, column, TK_Word,
  79. token_strings[base+nameOff], token_lens[base+nameOff] );
  80. directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
  81. directToParser( inclToParser, fileName, line, column, TK_Literal,
  82. token_strings[base+litOff], token_lens[base+litOff] );
  83. directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
  84. };
  85. # Assignment of literal.
  86. IMP_Word '=' IMP_Literal => {
  87. int base = tok_ts - token_data;
  88. int nameOff = 0;
  89. int litOff = 2;
  90. directToParser( inclToParser, fileName, line, column, TK_Word,
  91. token_strings[base+nameOff], token_lens[base+nameOff] );
  92. directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
  93. directToParser( inclToParser, fileName, line, column, TK_Literal,
  94. token_strings[base+litOff], token_lens[base+litOff] );
  95. directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
  96. };
  97. # Catch everything else.
  98. any;
  99. *|;
  100. }%%
  101. %% write data;
  102. void Scanner::flushImport()
  103. {
  104. int *p = token_data;
  105. int *pe = token_data + cur_token;
  106. int *eof = 0;
  107. %%{
  108. machine inline_token_scan;
  109. write init;
  110. write exec;
  111. }%%
  112. if ( tok_ts == 0 )
  113. cur_token = 0;
  114. else {
  115. cur_token = pe - tok_ts;
  116. int ts_offset = tok_ts - token_data;
  117. memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
  118. memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
  119. memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
  120. }
  121. }
  122. void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine,
  123. int tokColumn, int type, char *tokdata, int toklen )
  124. {
  125. InputLoc loc;
  126. #ifdef LOG_TOKENS
  127. cerr << "scanner:" << tokLine << ":" << tokColumn <<
  128. ": sending token to the parser " << Parser_lelNames[type];
  129. cerr << " " << toklen;
  130. if ( tokdata != 0 )
  131. cerr << " " << tokdata;
  132. cerr << endl;
  133. #endif
  134. loc.fileName = tokFileName;
  135. loc.line = tokLine;
  136. loc.col = tokColumn;
  137. toParser->token( loc, type, tokdata, toklen );
  138. }
  139. void Scanner::importToken( int token, char *start, char *end )
  140. {
  141. if ( cur_token == max_tokens )
  142. flushImport();
  143. token_data[cur_token] = token;
  144. if ( start == 0 ) {
  145. token_strings[cur_token] = 0;
  146. token_lens[cur_token] = 0;
  147. }
  148. else {
  149. int toklen = end-start;
  150. token_lens[cur_token] = toklen;
  151. token_strings[cur_token] = new char[toklen+1];
  152. memcpy( token_strings[cur_token], start, toklen );
  153. token_strings[cur_token][toklen] = 0;
  154. }
  155. cur_token++;
  156. }
  157. void Scanner::pass( int token, char *start, char *end )
  158. {
  159. if ( importMachines )
  160. importToken( token, start, end );
  161. pass();
  162. }
  163. void Scanner::pass()
  164. {
  165. updateCol();
  166. /* If no errors and we are at the bottom of the include stack (the
  167. * source file listed on the command line) then write out the data. */
  168. if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
  169. id.inputItems.tail->data.write( ts, te-ts );
  170. }
  171. /*
  172. * The scanner for processing sections, includes, imports, etc.
  173. */
  174. %%{
  175. machine section_parse;
  176. alphtype int;
  177. write data;
  178. }%%
  179. void Scanner::init( )
  180. {
  181. %% write init;
  182. }
  183. bool Scanner::active()
  184. {
  185. if ( ignoreSection )
  186. return false;
  187. if ( parser == 0 && ! parserExistsError ) {
  188. scan_error() << "this specification has no name, nor does any previous"
  189. " specification" << endl;
  190. parserExistsError = true;
  191. }
  192. if ( parser == 0 )
  193. return false;
  194. return true;
  195. }
  196. ostream &Scanner::scan_error()
  197. {
  198. /* Maintain the error count. */
  199. gblErrorCount += 1;
  200. cerr << makeInputLoc( fileName, line, column ) << ": ";
  201. return cerr;
  202. }
  203. /* An approximate check for duplicate includes. Due to aliasing of files it's
  204. * possible for duplicates to creep in. */
  205. bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
  206. {
  207. for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
  208. if ( strcmp( hi->fileName, inclFileName ) == 0 &&
  209. strcmp( hi->sectionName, inclSectionName ) == 0 )
  210. {
  211. return true;
  212. }
  213. }
  214. return false;
  215. }
  216. void Scanner::updateCol()
  217. {
  218. char *from = lastnl;
  219. if ( from == 0 )
  220. from = ts;
  221. //cerr << "adding " << te - from << " to column" << endl;
  222. column += te - from;
  223. lastnl = 0;
  224. }
  225. void Scanner::handleMachine()
  226. {
  227. /* Assign a name to the machine. */
  228. char *machine = word;
  229. if ( !importMachines && inclSectionTarg == 0 ) {
  230. ignoreSection = false;
  231. ParserDictEl *pdEl = id.parserDict.find( machine );
  232. if ( pdEl == 0 ) {
  233. pdEl = new ParserDictEl( machine );
  234. pdEl->value = new Parser( fileName, machine, sectionLoc );
  235. pdEl->value->init();
  236. id.parserDict.insert( pdEl );
  237. id.parserList.append( pdEl->value );
  238. }
  239. parser = pdEl->value;
  240. }
  241. else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
  242. /* found include target */
  243. ignoreSection = false;
  244. parser = inclToParser;
  245. }
  246. else {
  247. /* ignoring section */
  248. ignoreSection = true;
  249. parser = 0;
  250. }
  251. }
  252. void Scanner::handleInclude()
  253. {
  254. if ( active() ) {
  255. char *inclSectionName = word;
  256. char **includeChecks = 0;
  257. /* Implement defaults for the input file and section name. */
  258. if ( inclSectionName == 0 )
  259. inclSectionName = parser->sectionName;
  260. if ( lit != 0 )
  261. includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
  262. else {
  263. char *test = new char[strlen(fileName)+1];
  264. strcpy( test, fileName );
  265. includeChecks = new char*[2];
  266. includeChecks[0] = test;
  267. includeChecks[1] = 0;
  268. }
  269. long found = 0;
  270. ifstream *inFile = tryOpenInclude( includeChecks, found );
  271. if ( inFile == 0 ) {
  272. scan_error() << "include: failed to locate file" << endl;
  273. char **tried = includeChecks;
  274. while ( *tried != 0 )
  275. scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
  276. }
  277. else {
  278. /* Don't include anything that's already been included. */
  279. if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
  280. parser->includeHistory.append( IncludeHistoryItem(
  281. includeChecks[found], inclSectionName ) );
  282. Scanner scanner( id, includeChecks[found], *inFile, parser,
  283. inclSectionName, includeDepth+1, false );
  284. scanner.do_scan( );
  285. delete inFile;
  286. }
  287. }
  288. }
  289. }
  290. void Scanner::handleImport()
  291. {
  292. if ( active() ) {
  293. char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
  294. /* Open the input file for reading. */
  295. long found = 0;
  296. ifstream *inFile = tryOpenInclude( importChecks, found );
  297. if ( inFile == 0 ) {
  298. scan_error() << "import: could not open import file " <<
  299. "for reading" << endl;
  300. char **tried = importChecks;
  301. while ( *tried != 0 )
  302. scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
  303. }
  304. Scanner scanner( id, importChecks[found], *inFile, parser,
  305. 0, includeDepth+1, true );
  306. scanner.do_scan( );
  307. scanner.importToken( 0, 0, 0 );
  308. scanner.flushImport();
  309. delete inFile;
  310. }
  311. }
  312. %%{
  313. machine section_parse;
  314. # Need the defines representing tokens.
  315. import "rlparse.h";
  316. action clear_words { word = lit = 0; word_len = lit_len = 0; }
  317. action store_word { word = tokdata; word_len = toklen; }
  318. action store_lit { lit = tokdata; lit_len = toklen; }
  319. action mach_err { scan_error() << "bad machine statement" << endl; }
  320. action incl_err { scan_error() << "bad include statement" << endl; }
  321. action import_err { scan_error() << "bad import statement" << endl; }
  322. action write_err { scan_error() << "bad write statement" << endl; }
  323. action handle_machine { handleMachine(); }
  324. action handle_include { handleInclude(); }
  325. action handle_import { handleImport(); }
  326. machine_stmt =
  327. ( KW_Machine TK_Word @store_word ';' ) @handle_machine
  328. <>err mach_err <>eof mach_err;
  329. include_names = (
  330. TK_Word @store_word ( TK_Literal @store_lit )? |
  331. TK_Literal @store_lit
  332. ) >clear_words;
  333. include_stmt =
  334. ( KW_Include include_names ';' ) @handle_include
  335. <>err incl_err <>eof incl_err;
  336. import_stmt =
  337. ( KW_Import TK_Literal @store_lit ';' ) @handle_import
  338. <>err import_err <>eof import_err;
  339. action write_command
  340. {
  341. if ( active() && machineSpec == 0 && machineName == 0 ) {
  342. InputItem *inputItem = new InputItem;
  343. inputItem->type = InputItem::Write;
  344. inputItem->loc.fileName = fileName;
  345. inputItem->loc.line = line;
  346. inputItem->loc.col = column;
  347. inputItem->name = parser->sectionName;
  348. inputItem->pd = parser->pd;
  349. id.inputItems.append( inputItem );
  350. }
  351. }
  352. action write_arg
  353. {
  354. if ( active() && machineSpec == 0 && machineName == 0 )
  355. id.inputItems.tail->writeArgs.append( strdup(tokdata) );
  356. }
  357. action write_close
  358. {
  359. if ( active() && machineSpec == 0 && machineName == 0 )
  360. id.inputItems.tail->writeArgs.append( 0 );
  361. }
  362. write_stmt =
  363. ( KW_Write @write_command
  364. ( TK_Word @write_arg )+ ';' @write_close )
  365. <>err write_err <>eof write_err;
  366. action handle_token
  367. {
  368. /* Send the token off to the parser. */
  369. if ( active() )
  370. directToParser( parser, fileName, line, column, type, tokdata, toklen );
  371. }
  372. # Catch everything else.
  373. everything_else =
  374. ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
  375. main := (
  376. machine_stmt |
  377. include_stmt |
  378. import_stmt |
  379. write_stmt |
  380. everything_else
  381. )*;
  382. }%%
  383. void Scanner::token( int type, char c )
  384. {
  385. token( type, &c, &c + 1 );
  386. }
  387. void Scanner::token( int type )
  388. {
  389. token( type, 0, 0 );
  390. }
  391. void Scanner::token( int type, char *start, char *end )
  392. {
  393. char *tokdata = 0;
  394. int toklen = 0;
  395. if ( start != 0 ) {
  396. toklen = end-start;
  397. tokdata = new char[toklen+1];
  398. memcpy( tokdata, start, toklen );
  399. tokdata[toklen] = 0;
  400. }
  401. processToken( type, tokdata, toklen );
  402. }
  403. void Scanner::processToken( int type, char *tokdata, int toklen )
  404. {
  405. int *p, *pe, *eof;
  406. if ( type < 0 )
  407. p = pe = eof = 0;
  408. else {
  409. p = &type;
  410. pe = &type + 1;
  411. eof = 0;
  412. }
  413. %%{
  414. machine section_parse;
  415. write exec;
  416. }%%
  417. updateCol();
  418. /* Record the last token for use in controlling the scan of subsequent
  419. * tokens. */
  420. lastToken = type;
  421. }
  422. void Scanner::startSection( )
  423. {
  424. parserExistsError = false;
  425. sectionLoc.fileName = fileName;
  426. sectionLoc.line = line;
  427. sectionLoc.col = column;
  428. }
  429. void Scanner::endSection( )
  430. {
  431. /* Execute the eof actions for the section parser. */
  432. processToken( -1, 0, 0 );
  433. /* Close off the section with the parser. */
  434. if ( active() ) {
  435. InputLoc loc;
  436. loc.fileName = fileName;
  437. loc.line = line;
  438. loc.col = column;
  439. parser->token( loc, TK_EndSection, 0, 0 );
  440. }
  441. if ( includeDepth == 0 ) {
  442. if ( machineSpec == 0 && machineName == 0 ) {
  443. /* The end section may include a newline on the end, so
  444. * we use the last line, which will count the newline. */
  445. InputItem *inputItem = new InputItem;
  446. inputItem->type = InputItem::HostData;
  447. inputItem->loc.line = line;
  448. inputItem->loc.col = column;
  449. id.inputItems.append( inputItem );
  450. }
  451. }
  452. }
  453. bool isAbsolutePath( const char *path )
  454. {
  455. #ifdef _WIN32
  456. return isalpha( path[0] ) && path[1] == ':' && (path[2] == '\\' || path[2] == '/');
  457. #else
  458. return path[0] == '/';
  459. #endif
  460. }
  461. inline char* resolvePath(const char* rel, const char* abs) {
  462. const size_t l1 = strlen(rel);
  463. const size_t l2 = strlen(abs);
  464. char* ret = new char[l1 + l2 + 1];
  465. const char* p = strrchr(abs, '/') + 1;
  466. const size_t l3 = p - abs;
  467. memcpy(ret, abs, l3);
  468. strcpy(ret + l3, rel);
  469. return ret;
  470. }
  471. char **Scanner::makeIncludePathChecks( const char *thisFileName,
  472. const char *fileName, int fnlen )
  473. {
  474. char **checks = 0;
  475. long nextCheck = 0;
  476. long length = 0;
  477. bool caseInsensitive = false;
  478. char *data = prepareLitString( InputLoc(), fileName, fnlen,
  479. length, caseInsensitive );
  480. /* Absolute path? */
  481. if ( isAbsolutePath( data ) ) {
  482. checks = new char*[2];
  483. checks[nextCheck++] = data;
  484. }
  485. else {
  486. checks = new char*[2 + id.includePaths.length()];
  487. /* Search from the the location of the current file. */
  488. const char *lastSlash = strrchr( thisFileName, '/' );
  489. if ( lastSlash == 0 )
  490. checks[nextCheck++] = data;
  491. else {
  492. checks[nextCheck++] = resolvePath(data, thisFileName);
  493. }
  494. /* Search from the include paths given on the command line. */
  495. for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
  496. long pathLen = strlen( *incp );
  497. long checkLen = pathLen + 1 + length;
  498. char *check = new char[checkLen+1];
  499. memcpy( check, *incp, pathLen );
  500. check[pathLen] = '/';
  501. memcpy( check+pathLen+1, data, length );
  502. check[checkLen] = 0;
  503. checks[nextCheck++] = check;
  504. }
  505. }
  506. checks[nextCheck] = 0;
  507. return checks;
  508. }
  509. ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
  510. {
  511. char **check = pathChecks;
  512. ifstream *inFile = new ifstream;
  513. while ( *check != 0 ) {
  514. inFile->open( *check );
  515. if ( inFile->is_open() ) {
  516. found = check - pathChecks;
  517. return inFile;
  518. }
  519. /*
  520. * 03/26/2011 jg:
  521. * Don't rely on sloppy runtime behaviour: reset the state of the stream explicitly.
  522. * If inFile->open() fails, which happens when include dirs are tested, the fail bit
  523. * is set by the runtime library. Currently the VS runtime library opens new files,
  524. * but when it comes to reading it refuses to work.
  525. */
  526. inFile->clear();
  527. check += 1;
  528. }
  529. found = -1;
  530. delete inFile;
  531. return 0;
  532. }
  533. %%{
  534. machine rlscan;
  535. # This is sent by the driver code.
  536. EOF = 0;
  537. action inc_nl {
  538. lastnl = p;
  539. column = 0;
  540. line++;
  541. }
  542. NL = '\n' @inc_nl;
  543. # Identifiers, numbers, commetns, and other common things.
  544. ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
  545. number = digit+;
  546. hex_number = '0x' [0-9a-fA-F]+;
  547. c_comment =
  548. '/*' ( any | NL )* :>> '*/';
  549. cpp_comment =
  550. '//' [^\n]* NL;
  551. c_cpp_comment = c_comment | cpp_comment;
  552. ruby_comment = '#' [^\n]* NL;
  553. # These literal forms are common to host code and ragel.
  554. s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
  555. d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
  556. host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
  557. whitespace = [ \t] | NL;
  558. pound_comment = '#' [^\n]* NL;
  559. # An inline block of code for Ruby.
  560. inline_code_ruby := |*
  561. # Inline expression keywords.
  562. "fpc" => { token( KW_PChar ); };
  563. "fc" => { token( KW_Char ); };
  564. "fcurs" => { token( KW_CurState ); };
  565. "ftargs" => { token( KW_TargState ); };
  566. "fentry" => {
  567. whitespaceOn = false;
  568. token( KW_Entry );
  569. };
  570. # Inline statement keywords.
  571. "fhold" => {
  572. whitespaceOn = false;
  573. token( KW_Hold );
  574. };
  575. "fexec" => { token( KW_Exec, 0, 0 ); };
  576. "fgoto" => {
  577. whitespaceOn = false;
  578. token( KW_Goto );
  579. };
  580. "fnext" => {
  581. whitespaceOn = false;
  582. token( KW_Next );
  583. };
  584. "fcall" => {
  585. whitespaceOn = false;
  586. token( KW_Call );
  587. };
  588. "fret" => {
  589. whitespaceOn = false;
  590. token( KW_Ret );
  591. };
  592. "fbreak" => {
  593. whitespaceOn = false;
  594. token( KW_Break );
  595. };
  596. ident => { token( TK_Word, ts, te ); };
  597. number => { token( TK_UInt, ts, te ); };
  598. hex_number => { token( TK_Hex, ts, te ); };
  599. ( s_literal | d_literal | host_re_literal )
  600. => { token( IL_Literal, ts, te ); };
  601. whitespace+ => {
  602. if ( whitespaceOn )
  603. token( IL_WhiteSpace, ts, te );
  604. };
  605. ruby_comment => { token( IL_Comment, ts, te ); };
  606. "::" => { token( TK_NameSep, ts, te ); };
  607. # Some symbols need to go to the parser as with their cardinal value as
  608. # the token type (as opposed to being sent as anonymous symbols)
  609. # because they are part of the sequences which we interpret. The * ) ;
  610. # symbols cause whitespace parsing to come back on. This gets turned
  611. # off by some keywords.
  612. ";" => {
  613. whitespaceOn = true;
  614. token( *ts, ts, te );
  615. if ( inlineBlockType == SemiTerminated )
  616. fret;
  617. };
  618. [*)] => {
  619. whitespaceOn = true;
  620. token( *ts, ts, te );
  621. };
  622. [,(] => { token( *ts, ts, te ); };
  623. '{' => {
  624. token( IL_Symbol, ts, te );
  625. curly_count += 1;
  626. };
  627. '}' => {
  628. if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
  629. /* Inline code block ends. */
  630. token( '}' );
  631. fret;
  632. }
  633. else {
  634. /* Either a semi terminated inline block or only the closing
  635. * brace of some inner scope, not the block's closing brace. */
  636. token( IL_Symbol, ts, te );
  637. }
  638. };
  639. EOF => {
  640. scan_error() << "unterminated code block" << endl;
  641. };
  642. # Send every other character as a symbol.
  643. any => { token( IL_Symbol, ts, te ); };
  644. *|;
  645. # An inline block of code for languages other than Ruby.
  646. inline_code := |*
  647. # Inline expression keywords.
  648. "fpc" => { token( KW_PChar ); };
  649. "fc" => { token( KW_Char ); };
  650. "fcurs" => { token( KW_CurState ); };
  651. "ftargs" => { token( KW_TargState ); };
  652. "fentry" => {
  653. whitespaceOn = false;
  654. token( KW_Entry );
  655. };
  656. # Inline statement keywords.
  657. "fhold" => {
  658. whitespaceOn = false;
  659. token( KW_Hold );
  660. };
  661. "fexec" => { token( KW_Exec, 0, 0 ); };
  662. "fgoto" => {
  663. whitespaceOn = false;
  664. token( KW_Goto );
  665. };
  666. "fnext" => {
  667. whitespaceOn = false;
  668. token( KW_Next );
  669. };
  670. "fcall" => {
  671. whitespaceOn = false;
  672. token( KW_Call );
  673. };
  674. "fret" => {
  675. whitespaceOn = false;
  676. token( KW_Ret );
  677. };
  678. "fbreak" => {
  679. whitespaceOn = false;
  680. token( KW_Break );
  681. };
  682. ident => { token( TK_Word, ts, te ); };
  683. number => { token( TK_UInt, ts, te ); };
  684. hex_number => { token( TK_Hex, ts, te ); };
  685. ( s_literal | d_literal )
  686. => { token( IL_Literal, ts, te ); };
  687. whitespace+ => {
  688. if ( whitespaceOn )
  689. token( IL_WhiteSpace, ts, te );
  690. };
  691. c_cpp_comment => { token( IL_Comment, ts, te ); };
  692. "::" => { token( TK_NameSep, ts, te ); };
  693. # Some symbols need to go to the parser as with their cardinal value as
  694. # the token type (as opposed to being sent as anonymous symbols)
  695. # because they are part of the sequences which we interpret. The * ) ;
  696. # symbols cause whitespace parsing to come back on. This gets turned
  697. # off by some keywords.
  698. ";" => {
  699. whitespaceOn = true;
  700. token( *ts, ts, te );
  701. if ( inlineBlockType == SemiTerminated )
  702. fret;
  703. };
  704. [*)] => {
  705. whitespaceOn = true;
  706. token( *ts, ts, te );
  707. };
  708. [,(] => { token( *ts, ts, te ); };
  709. '{' => {
  710. token( IL_Symbol, ts, te );
  711. curly_count += 1;
  712. };
  713. '}' => {
  714. if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
  715. /* Inline code block ends. */
  716. token( '}' );
  717. fret;
  718. }
  719. else {
  720. /* Either a semi terminated inline block or only the closing
  721. * brace of some inner scope, not the block's closing brace. */
  722. token( IL_Symbol, ts, te );
  723. }
  724. };
  725. EOF => {
  726. scan_error() << "unterminated code block" << endl;
  727. };
  728. # Send every other character as a symbol.
  729. any => { token( IL_Symbol, ts, te ); };
  730. *|;
  731. or_literal := |*
  732. # Escape sequences in OR expressions.
  733. '\\0' => { token( RE_Char, '\0' ); };
  734. '\\a' => { token( RE_Char, '\a' ); };
  735. '\\b' => { token( RE_Char, '\b' ); };
  736. '\\t' => { token( RE_Char, '\t' ); };
  737. '\\n' => { token( RE_Char, '\n' ); };
  738. '\\v' => { token( RE_Char, '\v' ); };
  739. '\\f' => { token( RE_Char, '\f' ); };
  740. '\\r' => { token( RE_Char, '\r' ); };
  741. '\\\n' => { updateCol(); };
  742. '\\' any => { token( RE_Char, ts+1, te ); };
  743. # Range dash in an OR expression.
  744. '-' => { token( RE_Dash, 0, 0 ); };
  745. # Terminate an OR expression.
  746. ']' => { token( RE_SqClose ); fret; };
  747. EOF => {
  748. scan_error() << "unterminated OR literal" << endl;
  749. };
  750. # Characters in an OR expression.
  751. [^\]] => { token( RE_Char, ts, te ); };
  752. *|;
  753. ragel_re_literal := |*
  754. # Escape sequences in regular expressions.
  755. '\\0' => { token( RE_Char, '\0' ); };
  756. '\\a' => { token( RE_Char, '\a' ); };
  757. '\\b' => { token( RE_Char, '\b' ); };
  758. '\\t' => { token( RE_Char, '\t' ); };
  759. '\\n' => { token( RE_Char, '\n' ); };
  760. '\\v' => { token( RE_Char, '\v' ); };
  761. '\\f' => { token( RE_Char, '\f' ); };
  762. '\\r' => { token( RE_Char, '\r' ); };
  763. '\\\n' => { updateCol(); };
  764. '\\' any => { token( RE_Char, ts+1, te ); };
  765. # Terminate an OR expression.
  766. '/' [i]? => {
  767. token( RE_Slash, ts, te );
  768. fgoto parser_def;
  769. };
  770. # Special characters.
  771. '.' => { token( RE_Dot ); };
  772. '*' => { token( RE_Star ); };
  773. '[' => { token( RE_SqOpen ); fcall or_literal; };
  774. '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
  775. EOF => {
  776. scan_error() << "unterminated regular expression" << endl;
  777. };
  778. # Characters in an OR expression.
  779. [^\/] => { token( RE_Char, ts, te ); };
  780. *|;
  781. # We need a separate token space here to avoid the ragel keywords.
  782. write_statement := |*
  783. ident => { token( TK_Word, ts, te ); } ;
  784. [ \t\n]+ => { updateCol(); };
  785. ';' => { token( ';' ); fgoto parser_def; };
  786. EOF => {
  787. scan_error() << "unterminated write statement" << endl;
  788. };
  789. *|;
  790. # Parser definitions.
  791. parser_def := |*
  792. #'length_cond' => { token( KW_Length ); };
  793. 'machine' => { token( KW_Machine ); };
  794. 'include' => { token( KW_Include ); };
  795. 'import' => { token( KW_Import ); };
  796. 'write' => {
  797. token( KW_Write );
  798. fgoto write_statement;
  799. };
  800. 'action' => { token( KW_Action ); };
  801. 'alphtype' => { token( KW_AlphType ); };
  802. 'prepush' => { token( KW_PrePush ); };
  803. 'postpop' => { token( KW_PostPop ); };
  804. # FIXME: Enable this post 5.17.
  805. # 'range' => { token( KW_Range ); };
  806. 'getkey' => {
  807. token( KW_GetKey );
  808. inlineBlockType = SemiTerminated;
  809. if ( hostLang->lang == HostLang::Ruby )
  810. fcall inline_code_ruby;
  811. else
  812. fcall inline_code;
  813. };
  814. 'access' => {
  815. token( KW_Access );
  816. inlineBlockType = SemiTerminated;
  817. if ( hostLang->lang == HostLang::Ruby )
  818. fcall inline_code_ruby;
  819. else
  820. fcall inline_code;
  821. };
  822. 'variable' => {
  823. token( KW_Variable );
  824. inlineBlockType = SemiTerminated;
  825. if ( hostLang->lang == HostLang::Ruby )
  826. fcall inline_code_ruby;
  827. else
  828. fcall inline_code;
  829. };
  830. 'when' => { token( KW_When ); };
  831. 'inwhen' => { token( KW_InWhen ); };
  832. 'outwhen' => { token( KW_OutWhen ); };
  833. 'eof' => { token( KW_Eof ); };
  834. 'err' => { token( KW_Err ); };
  835. 'lerr' => { token( KW_Lerr ); };
  836. 'to' => { token( KW_To ); };
  837. 'from' => { token( KW_From ); };
  838. 'export' => { token( KW_Export ); };
  839. # Identifiers.
  840. ident => { token( TK_Word, ts, te ); } ;
  841. # Numbers
  842. number => { token( TK_UInt, ts, te ); };
  843. hex_number => { token( TK_Hex, ts, te ); };
  844. # Literals, with optionals.
  845. ( s_literal | d_literal ) [i]?
  846. => { token( TK_Literal, ts, te ); };
  847. '[' => { token( RE_SqOpen ); fcall or_literal; };
  848. '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
  849. '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
  850. # Ignore.
  851. pound_comment => { updateCol(); };
  852. ':=' => { token( TK_ColonEquals ); };
  853. # To State Actions.
  854. ">~" => { token( TK_StartToState ); };
  855. "$~" => { token( TK_AllToState ); };
  856. "%~" => { token( TK_FinalToState ); };
  857. "<~" => { token( TK_NotStartToState ); };
  858. "@~" => { token( TK_NotFinalToState ); };
  859. "<>~" => { token( TK_MiddleToState ); };
  860. # From State actions
  861. ">*" => { token( TK_StartFromState ); };
  862. "$*" => { token( TK_AllFromState ); };
  863. "%*" => { token( TK_FinalFromState ); };
  864. "<*" => { token( TK_NotStartFromState ); };
  865. "@*" => { token( TK_NotFinalFromState ); };
  866. "<>*" => { token( TK_MiddleFromState ); };
  867. # EOF Actions.
  868. ">/" => { token( TK_StartEOF ); };
  869. "$/" => { token( TK_AllEOF ); };
  870. "%/" => { token( TK_FinalEOF ); };
  871. "</" => { token( TK_NotStartEOF ); };
  872. "@/" => { token( TK_NotFinalEOF ); };
  873. "<>/" => { token( TK_MiddleEOF ); };
  874. # Global Error actions.
  875. ">!" => { token( TK_StartGblError ); };
  876. "$!" => { token( TK_AllGblError ); };
  877. "%!" => { token( TK_FinalGblError ); };
  878. "<!" => { token( TK_NotStartGblError ); };
  879. "@!" => { token( TK_NotFinalGblError ); };
  880. "<>!" => { token( TK_MiddleGblError ); };
  881. # Local error actions.
  882. ">^" => { token( TK_StartLocalError ); };
  883. "$^" => { token( TK_AllLocalError ); };
  884. "%^" => { token( TK_FinalLocalError ); };
  885. "<^" => { token( TK_NotStartLocalError ); };
  886. "@^" => { token( TK_NotFinalLocalError ); };
  887. "<>^" => { token( TK_MiddleLocalError ); };
  888. # Middle.
  889. "<>" => { token( TK_Middle ); };
  890. # Conditions.
  891. '>?' => { token( TK_StartCond ); };
  892. '$?' => { token( TK_AllCond ); };
  893. '%?' => { token( TK_LeavingCond ); };
  894. '..' => { token( TK_DotDot ); };
  895. '**' => { token( TK_StarStar ); };
  896. '--' => { token( TK_DashDash ); };
  897. '->' => { token( TK_Arrow ); };
  898. '=>' => { token( TK_DoubleArrow ); };
  899. ":>" => { token( TK_ColonGt ); };
  900. ":>>" => { token( TK_ColonGtGt ); };
  901. "<:" => { token( TK_LtColon ); };
  902. # Opening of longest match.
  903. "|*" => { token( TK_BarStar ); };
  904. # Separater for name references.
  905. "::" => { token( TK_NameSep, ts, te ); };
  906. '}%%' => {
  907. updateCol();
  908. endSection();
  909. fret;
  910. };
  911. [ \t\r]+ => { updateCol(); };
  912. # If we are in a single line machine then newline may end the spec.
  913. NL => {
  914. updateCol();
  915. if ( singleLineSpec ) {
  916. endSection();
  917. fret;
  918. }
  919. };
  920. '{' => {
  921. if ( lastToken == KW_Export || lastToken == KW_Entry )
  922. token( '{' );
  923. else {
  924. token( '{' );
  925. curly_count = 1;
  926. inlineBlockType = CurlyDelimited;
  927. if ( hostLang->lang == HostLang::Ruby )
  928. fcall inline_code_ruby;
  929. else
  930. fcall inline_code;
  931. }
  932. };
  933. EOF => {
  934. scan_error() << "unterminated ragel section" << endl;
  935. };
  936. any => { token( *ts ); } ;
  937. *|;
  938. # Outside code scanner. These tokens get passed through.
  939. main_ruby := |*
  940. ident => { pass( IMP_Word, ts, te ); };
  941. number => { pass( IMP_UInt, ts, te ); };
  942. ruby_comment => { pass(); };
  943. ( s_literal | d_literal | host_re_literal )
  944. => { pass( IMP_Literal, ts, te ); };
  945. '%%{' => {
  946. updateCol();
  947. singleLineSpec = false;
  948. startSection();
  949. fcall parser_def;
  950. };
  951. '%%' => {
  952. updateCol();
  953. singleLineSpec = true;
  954. startSection();
  955. fcall parser_def;
  956. };
  957. whitespace+ => { pass(); };
  958. EOF;
  959. any => { pass( *ts, 0, 0 ); };
  960. *|;
  961. # Outside code scanner. These tokens get passed through.
  962. main := |*
  963. 'define' => { pass( IMP_Define, 0, 0 ); };
  964. ident => { pass( IMP_Word, ts, te ); };
  965. number => { pass( IMP_UInt, ts, te ); };
  966. c_cpp_comment => { pass(); };
  967. ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
  968. '%%{' => {
  969. updateCol();
  970. singleLineSpec = false;
  971. startSection();
  972. fcall parser_def;
  973. };
  974. '%%' => {
  975. updateCol();
  976. singleLineSpec = true;
  977. startSection();
  978. fcall parser_def;
  979. };
  980. whitespace+ => { pass(); };
  981. EOF;
  982. any => { pass( *ts, 0, 0 ); };
  983. *|;
  984. }%%
  985. %% write data;
  986. void Scanner::do_scan()
  987. {
  988. int bufsize = 8;
  989. char *buf = new char[bufsize];
  990. int cs, act, have = 0;
  991. int top;
  992. /* The stack is two deep, one level for going into ragel defs from the main
  993. * machines which process outside code, and another for going into or literals
  994. * from either a ragel spec, or a regular expression. */
  995. int stack[2];
  996. int curly_count = 0;
  997. bool execute = true;
  998. bool singleLineSpec = false;
  999. InlineBlockType inlineBlockType = CurlyDelimited;
  1000. /* Init the section parser and the character scanner. */
  1001. init();
  1002. %% write init;
  1003. /* Set up the start state. FIXME: After 5.20 is released the nocs write
  1004. * init option should be used, the main machine eliminated and this statement moved
  1005. * above the write init. */
  1006. if ( hostLang->lang == HostLang::Ruby )
  1007. cs = rlscan_en_main_ruby;
  1008. else
  1009. cs = rlscan_en_main;
  1010. while ( execute ) {
  1011. char *p = buf + have;
  1012. int space = bufsize - have;
  1013. if ( space == 0 ) {
  1014. /* We filled up the buffer trying to scan a token. Grow it. */
  1015. bufsize = bufsize * 2;
  1016. char *newbuf = new char[bufsize];
  1017. /* Recompute p and space. */
  1018. p = newbuf + have;
  1019. space = bufsize - have;
  1020. /* Patch up pointers possibly in use. */
  1021. if ( ts != 0 )
  1022. ts = newbuf + ( ts - buf );
  1023. te = newbuf + ( te - buf );
  1024. /* Copy the new buffer in. */
  1025. memcpy( newbuf, buf, have );
  1026. delete[] buf;
  1027. buf = newbuf;
  1028. }
  1029. input.read( p, space );
  1030. int len = input.gcount();
  1031. char *pe = p + len;
  1032. /* If we see eof then append the eof var. */
  1033. char *eof = 0;
  1034. if ( len == 0 ) {
  1035. eof = pe;
  1036. execute = false;
  1037. }
  1038. %% write exec;
  1039. /* Check if we failed. */
  1040. if ( cs == rlscan_error ) {
  1041. /* Machine failed before finding a token. I'm not yet sure if this
  1042. * is reachable. */
  1043. scan_error() << "scanner error" << endl;
  1044. exit(1);
  1045. }
  1046. /* Decide if we need to preserve anything. */
  1047. char *preserve = ts;
  1048. /* Now set up the prefix. */
  1049. if ( preserve == 0 )
  1050. have = 0;
  1051. else {
  1052. /* There is data that needs to be shifted over. */
  1053. have = pe - preserve;
  1054. memmove( buf, preserve, have );
  1055. unsigned int shiftback = preserve - buf;
  1056. if ( ts != 0 )
  1057. ts -= shiftback;
  1058. te -= shiftback;
  1059. preserve = buf;
  1060. }
  1061. }
  1062. delete[] buf;
  1063. }