main.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. /*
  2. * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
  3. */
  4. /* This file is part of Ragel.
  5. *
  6. * Ragel is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Ragel is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Ragel; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. */
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <stdio.h>
  23. #include <iostream>
  24. #include <fstream>
  25. #ifndef _WIN32
  26. # include <unistd.h>
  27. #endif
  28. #include <sstream>
  29. #include <sys/types.h>
  30. #include <sys/stat.h>
  31. #include <fcntl.h>
  32. #include <errno.h>
  33. #ifdef _WIN32
  34. #include <windows.h>
  35. #include <psapi.h>
  36. #include <time.h>
  37. #include <io.h>
  38. #include <process.h>
  39. #if _MSC_VER
  40. #define S_IRUSR _S_IREAD
  41. #define S_IWUSR _S_IWRITE
  42. #endif
  43. #endif
  44. /* Parsing. */
  45. #include "ragel.h"
  46. #include "rlscan.h"
  47. /* Parameters and output. */
  48. #include "pcheck.h"
  49. #include "vector.h"
  50. #include "version.h"
  51. #include "common.h"
  52. #include "inputdata.h"
  53. using std::istream;
  54. using std::ostream;
  55. using std::ifstream;
  56. using std::ofstream;
  57. using std::cin;
  58. using std::cout;
  59. using std::cerr;
  60. using std::endl;
  61. using std::ios;
  62. using std::streamsize;
  63. /* Controls minimization. */
  64. MinimizeLevel minimizeLevel = MinimizePartition2;
  65. MinimizeOpt minimizeOpt = MinimizeMostOps;
  66. /* Graphviz dot file generation. */
  67. const char *machineSpec = 0, *machineName = 0;
  68. bool machineSpecFound = false;
  69. bool wantDupsRemoved = true;
  70. bool printStatistics = false;
  71. bool generateXML = false;
  72. bool generateDot = false;
  73. /* Target language and output style. */
  74. CodeStyle codeStyle = GenTables;
  75. int numSplitPartitions = 0;
  76. bool noLineDirectives = false;
  77. bool displayPrintables = false;
  78. /* Target ruby impl */
  79. RubyImplEnum rubyImpl = MRI;
  80. /* Print a summary of the options. */
  81. void usage()
  82. {
  83. cout <<
  84. "usage: ragel [options] file\n"
  85. "general:\n"
  86. " -h, -H, -?, --help Print this usage and exit\n"
  87. " -v, --version Print version information and exit\n"
  88. " -o <file> Write output to <file>\n"
  89. " -s Print some statistics on stderr\n"
  90. " -d Do not remove duplicates from action lists\n"
  91. " -I <dir> Add <dir> to the list of directories to search\n"
  92. " for included an imported files\n"
  93. "error reporting format:\n"
  94. " --error-format=gnu file:line:column: message (default)\n"
  95. " --error-format=msvc file(line,column): message\n"
  96. "fsm minimization:\n"
  97. " -n Do not perform minimization\n"
  98. " -m Minimize at the end of the compilation\n"
  99. " -l Minimize after most operations (default)\n"
  100. " -e Minimize after every operation\n"
  101. "visualization:\n"
  102. " -x Run the frontend only: emit XML intermediate format\n"
  103. " -V Generate a dot file for Graphviz\n"
  104. " -p Display printable characters on labels\n"
  105. " -S <spec> FSM specification to output (for graphviz output)\n"
  106. " -M <machine> Machine definition/instantiation to output (for graphviz output)\n"
  107. "host language:\n"
  108. " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
  109. " -D The host language is D\n"
  110. " -Z The host language is Go\n"
  111. " -J The host language is Java\n"
  112. " -R The host language is Ruby\n"
  113. " -A The host language is C#\n"
  114. " -O The host language is OCaml\n"
  115. "line directives: (C/D/Ruby/C#/OCaml)\n"
  116. " -L Inhibit writing of #line directives\n"
  117. "code style: (C/D/Java/Ruby/C#/OCaml)\n"
  118. " -T0 Table driven FSM (default)\n"
  119. "code style: (C/D/Ruby/C#/OCaml)\n"
  120. " -T1 Faster table driven FSM\n"
  121. " -F0 Flat table driven FSM\n"
  122. " -F1 Faster flat table-driven FSM\n"
  123. "code style: (C/D/C#/OCaml)\n"
  124. " -G0 Goto-driven FSM\n"
  125. " -G1 Faster goto-driven FSM\n"
  126. "code style: (C/D)\n"
  127. " -G2 Really fast goto-driven FSM\n"
  128. " -P<N> N-Way Split really fast goto-driven FSM\n"
  129. ;
  130. exit(0);
  131. }
  132. /* Print version information and exit. */
  133. void version()
  134. {
  135. cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
  136. "Copyright (c) 2001-2009 by Adrian Thurston" << endl;
  137. exit(0);
  138. }
  139. /* Error reporting format. */
  140. ErrorFormat errorFormat = ErrorFormatGNU;
  141. InputLoc makeInputLoc( const char *fileName, int line, int col)
  142. {
  143. InputLoc loc = { fileName, line, col };
  144. return loc;
  145. }
  146. ostream &operator<<( ostream &out, const InputLoc &loc )
  147. {
  148. assert( loc.fileName != 0 );
  149. switch ( errorFormat ) {
  150. case ErrorFormatMSVC:
  151. out << loc.fileName << "(" << loc.line;
  152. if ( loc.col )
  153. out << "," << loc.col;
  154. out << ")";
  155. break;
  156. default:
  157. out << loc.fileName << ":" << loc.line;
  158. if ( loc.col )
  159. out << ":" << loc.col;
  160. break;
  161. }
  162. return out;
  163. }
  164. /* Total error count. */
  165. int gblErrorCount = 0;
  166. /* Print the opening to a warning in the input, then return the error ostream. */
  167. ostream &warning( const InputLoc &loc )
  168. {
  169. cerr << loc << ": warning: ";
  170. return cerr;
  171. }
  172. /* Print the opening to a program error, then return the error stream. */
  173. ostream &error()
  174. {
  175. gblErrorCount += 1;
  176. cerr << PROGNAME ": ";
  177. return cerr;
  178. }
  179. ostream &error( const InputLoc &loc )
  180. {
  181. gblErrorCount += 1;
  182. cerr << loc << ": ";
  183. return cerr;
  184. }
  185. void escapeLineDirectivePath( std::ostream &out, char *path )
  186. {
  187. for ( char *pc = path; *pc != 0; pc++ ) {
  188. if ( *pc == '\\' )
  189. out << "\\\\";
  190. else
  191. out << *pc;
  192. }
  193. }
  194. void processArgs( int argc, const char **argv, InputData &id )
  195. {
  196. ParamCheck pc("xo:dnmleabjkS:M:I:CDEJZRAOvHh?-:sT:F:G:P:LpV", argc, argv);
  197. /* FIXME: Need to check code styles VS langauge. */
  198. while ( pc.check() ) {
  199. switch ( pc.state ) {
  200. case ParamCheck::match:
  201. switch ( pc.parameter ) {
  202. case 'V':
  203. generateDot = true;
  204. break;
  205. case 'x':
  206. generateXML = true;
  207. break;
  208. /* Output. */
  209. case 'o':
  210. if ( *pc.paramArg == 0 )
  211. error() << "a zero length output file name was given" << endl;
  212. else if ( id.outputFileName != 0 )
  213. error() << "more than one output file name was given" << endl;
  214. else {
  215. /* Ok, remember the output file name. */
  216. id.outputFileName = pc.paramArg;
  217. }
  218. break;
  219. /* Flag for turning off duplicate action removal. */
  220. case 'd':
  221. wantDupsRemoved = false;
  222. break;
  223. /* Minimization, mostly hidden options. */
  224. case 'n':
  225. minimizeOpt = MinimizeNone;
  226. break;
  227. case 'm':
  228. minimizeOpt = MinimizeEnd;
  229. break;
  230. case 'l':
  231. minimizeOpt = MinimizeMostOps;
  232. break;
  233. case 'e':
  234. minimizeOpt = MinimizeEveryOp;
  235. break;
  236. case 'a':
  237. minimizeLevel = MinimizeApprox;
  238. break;
  239. case 'b':
  240. minimizeLevel = MinimizeStable;
  241. break;
  242. case 'j':
  243. minimizeLevel = MinimizePartition1;
  244. break;
  245. case 'k':
  246. minimizeLevel = MinimizePartition2;
  247. break;
  248. /* Machine spec. */
  249. case 'S':
  250. if ( *pc.paramArg == 0 )
  251. error() << "please specify an argument to -S" << endl;
  252. else if ( machineSpec != 0 )
  253. error() << "more than one -S argument was given" << endl;
  254. else {
  255. /* Ok, remember the path to the machine to generate. */
  256. machineSpec = pc.paramArg;
  257. }
  258. break;
  259. /* Machine path. */
  260. case 'M':
  261. if ( *pc.paramArg == 0 )
  262. error() << "please specify an argument to -M" << endl;
  263. else if ( machineName != 0 )
  264. error() << "more than one -M argument was given" << endl;
  265. else {
  266. /* Ok, remember the machine name to generate. */
  267. machineName = pc.paramArg;
  268. }
  269. break;
  270. case 'I':
  271. if ( *pc.paramArg == 0 )
  272. error() << "please specify an argument to -I" << endl;
  273. else {
  274. id.includePaths.append( pc.paramArg );
  275. }
  276. break;
  277. /* Host language types. */
  278. case 'C':
  279. hostLang = &hostLangC;
  280. break;
  281. case 'D':
  282. hostLang = &hostLangD;
  283. break;
  284. case 'E':
  285. hostLang = &hostLangD2;
  286. break;
  287. case 'Z':
  288. hostLang = &hostLangGo;
  289. break;
  290. case 'J':
  291. hostLang = &hostLangJava;
  292. break;
  293. case 'R':
  294. hostLang = &hostLangRuby;
  295. break;
  296. case 'A':
  297. hostLang = &hostLangCSharp;
  298. break;
  299. case 'O':
  300. hostLang = &hostLangOCaml;
  301. break;
  302. /* Version and help. */
  303. case 'v':
  304. version();
  305. break;
  306. case 'H': case 'h': case '?':
  307. usage();
  308. break;
  309. case 's':
  310. printStatistics = true;
  311. break;
  312. case '-': {
  313. char *arg = strdup( pc.paramArg );
  314. char *eq = strchr( arg, '=' );
  315. if ( eq != 0 )
  316. *eq++ = 0;
  317. if ( strcmp( arg, "help" ) == 0 )
  318. usage();
  319. else if ( strcmp( arg, "version" ) == 0 )
  320. version();
  321. else if ( strcmp( arg, "error-format" ) == 0 ) {
  322. if ( eq == 0 )
  323. error() << "expecting '=value' for error-format" << endl;
  324. else if ( strcmp( eq, "gnu" ) == 0 )
  325. errorFormat = ErrorFormatGNU;
  326. else if ( strcmp( eq, "msvc" ) == 0 )
  327. errorFormat = ErrorFormatMSVC;
  328. else
  329. error() << "invalid value for error-format" << endl;
  330. }
  331. else if ( strcmp( arg, "rbx" ) == 0 )
  332. rubyImpl = Rubinius;
  333. else {
  334. error() << "--" << pc.paramArg <<
  335. " is an invalid argument" << endl;
  336. }
  337. free( arg );
  338. break;
  339. }
  340. /* Passthrough args. */
  341. case 'T':
  342. if ( pc.paramArg[0] == '0' )
  343. codeStyle = GenTables;
  344. else if ( pc.paramArg[0] == '1' )
  345. codeStyle = GenFTables;
  346. else {
  347. error() << "-T" << pc.paramArg[0] <<
  348. " is an invalid argument" << endl;
  349. exit(1);
  350. }
  351. break;
  352. case 'F':
  353. if ( pc.paramArg[0] == '0' )
  354. codeStyle = GenFlat;
  355. else if ( pc.paramArg[0] == '1' )
  356. codeStyle = GenFFlat;
  357. else {
  358. error() << "-F" << pc.paramArg[0] <<
  359. " is an invalid argument" << endl;
  360. exit(1);
  361. }
  362. break;
  363. case 'G':
  364. if ( pc.paramArg[0] == '0' )
  365. codeStyle = GenGoto;
  366. else if ( pc.paramArg[0] == '1' )
  367. codeStyle = GenFGoto;
  368. else if ( pc.paramArg[0] == '2' )
  369. codeStyle = GenIpGoto;
  370. else {
  371. error() << "-G" << pc.paramArg[0] <<
  372. " is an invalid argument" << endl;
  373. exit(1);
  374. }
  375. break;
  376. case 'P':
  377. codeStyle = GenSplit;
  378. numSplitPartitions = atoi( pc.paramArg );
  379. break;
  380. case 'p':
  381. displayPrintables = true;
  382. break;
  383. case 'L':
  384. noLineDirectives = true;
  385. break;
  386. }
  387. break;
  388. case ParamCheck::invalid:
  389. error() << "-" << pc.parameter << " is an invalid argument" << endl;
  390. break;
  391. case ParamCheck::noparam:
  392. /* It is interpreted as an input file. */
  393. if ( *pc.curArg == 0 )
  394. error() << "a zero length input file name was given" << endl;
  395. else if ( id.inputFileName != 0 )
  396. error() << "more than one input file name was given" << endl;
  397. else {
  398. /* OK, Remember the filename. */
  399. id.inputFileName = pc.curArg;
  400. }
  401. break;
  402. }
  403. }
  404. }
  405. void process( InputData &id )
  406. {
  407. /* Open the input file for reading. */
  408. assert( id.inputFileName != 0 );
  409. ifstream *inFile = new ifstream( id.inputFileName );
  410. if ( ! inFile->is_open() )
  411. error() << "could not open " << id.inputFileName << " for reading" << endp;
  412. /* Used for just a few things. */
  413. std::ostringstream hostData;
  414. /* Make the first input item. */
  415. InputItem *firstInputItem = new InputItem;
  416. firstInputItem->type = InputItem::HostData;
  417. firstInputItem->loc.fileName = id.inputFileName;
  418. firstInputItem->loc.line = 1;
  419. firstInputItem->loc.col = 1;
  420. id.inputItems.append( firstInputItem );
  421. Scanner scanner( id, id.inputFileName, *inFile, 0, 0, 0, false );
  422. scanner.do_scan();
  423. /* Finished, final check for errors.. */
  424. if ( gblErrorCount > 0 )
  425. exit(1);
  426. /* Now send EOF to all parsers. */
  427. id.terminateAllParsers();
  428. /* Bail on above error. */
  429. if ( gblErrorCount > 0 )
  430. exit(1);
  431. /* Locate the backend program */
  432. /* Compiles machines. */
  433. id.prepareMachineGen();
  434. if ( gblErrorCount > 0 )
  435. exit(1);
  436. id.makeOutputStream();
  437. /* Generates the reduced machine, which we use to write output. */
  438. if ( !generateXML ) {
  439. id.generateReduced();
  440. if ( gblErrorCount > 0 )
  441. exit(1);
  442. }
  443. id.verifyWritesHaveData();
  444. if ( gblErrorCount > 0 )
  445. exit(1);
  446. /*
  447. * From this point on we should not be reporting any errors.
  448. */
  449. id.openOutput();
  450. id.writeOutput();
  451. /* Close the input and the intermediate file. */
  452. delete inFile;
  453. /* If writing to a file, delete the ostream, causing it to flush.
  454. * Standard out is flushed automatically. */
  455. if ( id.outputFileName != 0 ) {
  456. delete id.outStream;
  457. delete id.outFilter;
  458. }
  459. assert( gblErrorCount == 0 );
  460. }
  461. char *makeIntermedTemplate( const char *baseFileName )
  462. {
  463. char *result = 0;
  464. const char *templ = "ragel-XXXXXX.xml";
  465. const char *lastSlash = strrchr( baseFileName, '/' );
  466. if ( lastSlash == 0 ) {
  467. result = new char[strlen(templ)+1];
  468. strcpy( result, templ );
  469. }
  470. else {
  471. int baseLen = lastSlash - baseFileName + 1;
  472. result = new char[baseLen + strlen(templ) + 1];
  473. memcpy( result, baseFileName, baseLen );
  474. strcpy( result+baseLen, templ );
  475. }
  476. return result;
  477. };
  478. /* Main, process args and call yyparse to start scanning input. */
  479. int main( int argc, const char **argv )
  480. {
  481. InputData id;
  482. processArgs( argc, argv, id );
  483. /* Require an input file. If we use standard in then we won't have a file
  484. * name on which to base the output. */
  485. if ( id.inputFileName == 0 )
  486. error() << "no input file given" << endl;
  487. /* Bail on argument processing errors. */
  488. if ( gblErrorCount > 0 )
  489. exit(1);
  490. /* Make sure we are not writing to the same file as the input file. */
  491. if ( id.inputFileName != 0 && id.outputFileName != 0 &&
  492. strcmp( id.inputFileName, id.outputFileName ) == 0 )
  493. {
  494. error() << "output file \"" << id.outputFileName <<
  495. "\" is the same as the input file" << endp;
  496. }
  497. for (char* p = (char*)id.inputFileName; *p != 0; p++) {
  498. if (*p == '\\')
  499. *p = '/';
  500. }
  501. process( id );
  502. return 0;
  503. }