parsedata.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /*
  2. * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
  3. */
  4. /* This file is part of Ragel.
  5. *
  6. * Ragel is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Ragel is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Ragel; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. */
  20. #ifndef _PARSEDATA_H
  21. #define _PARSEDATA_H
  22. #include <iostream>
  23. #include <limits.h>
  24. #include <sstream>
  25. #include "avlmap.h"
  26. #include "bstmap.h"
  27. #include "vector.h"
  28. #include "dlist.h"
  29. #include "fsmgraph.h"
  30. #include "compare.h"
  31. #include "vector.h"
  32. #include "common.h"
  33. #include "parsetree.h"
  34. /* Forwards. */
  35. using std::ostream;
  36. struct VarDef;
  37. struct Join;
  38. struct Expression;
  39. struct Term;
  40. struct FactorWithAug;
  41. struct FactorWithLabel;
  42. struct FactorWithRep;
  43. struct FactorWithNeg;
  44. struct Factor;
  45. struct Literal;
  46. struct Range;
  47. struct RegExpr;
  48. struct ReItem;
  49. struct ReOrBlock;
  50. struct ReOrItem;
  51. struct LongestMatch;
  52. struct InputData;
  53. struct CodeGenData;
  54. typedef DList<LongestMatch> LmList;
  55. /* Graph dictionary. */
  56. struct GraphDictEl
  57. :
  58. public AvlTreeEl<GraphDictEl>,
  59. public DListEl<GraphDictEl>
  60. {
  61. GraphDictEl( const char *k )
  62. : key(k), value(0), isInstance(false) { }
  63. GraphDictEl( const char *k, VarDef *value )
  64. : key(k), value(value), isInstance(false) { }
  65. const char *getKey() { return key; }
  66. const char *key;
  67. VarDef *value;
  68. bool isInstance;
  69. /* Location info of graph definition. Points to variable name of assignment. */
  70. InputLoc loc;
  71. };
  72. typedef AvlTree<GraphDictEl, const char*, CmpStr> GraphDict;
  73. typedef DList<GraphDictEl> GraphList;
  74. /* Priority name dictionary. */
  75. typedef AvlMapEl<char*, int> PriorDictEl;
  76. typedef AvlMap<char*, int, CmpStr> PriorDict;
  77. /* Local error name dictionary. */
  78. typedef AvlMapEl<const char*, int> LocalErrDictEl;
  79. typedef AvlMap<const char*, int, CmpStr> LocalErrDict;
  80. /* Tree of instantiated names. */
  81. typedef BstMapEl<const char*, NameInst*> NameMapEl;
  82. typedef BstMap<const char*, NameInst*, CmpStr> NameMap;
  83. typedef Vector<NameInst*> NameVect;
  84. typedef BstSet<NameInst*> NameSet;
  85. /* Node in the tree of instantiated names. */
  86. struct NameInst
  87. {
  88. NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) :
  89. loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
  90. isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
  91. InputLoc loc;
  92. /* Keep parent pointers in the name tree to retrieve
  93. * fully qulified names. */
  94. NameInst *parent;
  95. const char *name;
  96. int id;
  97. bool isLabel;
  98. bool isLongestMatch;
  99. int numRefs;
  100. int numUses;
  101. /* Names underneath us, excludes anonymous names. */
  102. NameMap children;
  103. /* All names underneath us in order of appearance. */
  104. NameVect childVect;
  105. /* Join scopes need an implicit "final" target. */
  106. NameInst *start, *final;
  107. /* During a fsm generation walk, lists the names that are referenced by
  108. * epsilon operations in the current scope. After the link is made by the
  109. * epsilon reference and the join operation is complete, the label can
  110. * have its refcount decremented. Once there are no more references the
  111. * entry point can be removed from the fsm returned. */
  112. NameVect referencedNames;
  113. /* Pointers for the name search queue. */
  114. NameInst *prev, *next;
  115. /* Check if this name inst or any name inst below is referenced. */
  116. bool anyRefsRec();
  117. };
  118. typedef DList<NameInst> NameInstList;
  119. /* Stack frame used in walking the name tree. */
  120. struct NameFrame
  121. {
  122. NameInst *prevNameInst;
  123. int prevNameChild;
  124. NameInst *prevLocalScope;
  125. };
  126. struct LengthDef
  127. {
  128. LengthDef( char *name )
  129. : name(name) {}
  130. char *name;
  131. LengthDef *prev, *next;
  132. };
  133. typedef DList<LengthDef> LengthDefList;
  134. /* Class to collect information about the machine during the
  135. * parse of input. */
  136. struct ParseData
  137. {
  138. /* Create a new parse data object. This is done at the beginning of every
  139. * fsm specification. */
  140. ParseData( const char *fileName, char *sectionName, const InputLoc &sectionLoc );
  141. ~ParseData();
  142. /*
  143. * Setting up the graph dict.
  144. */
  145. /* Initialize a graph dict with the basic fsms. */
  146. void initGraphDict();
  147. void createBuiltin( const char *name, BuiltinMachine builtin );
  148. /* Make a name id in the current name instantiation scope if it is not
  149. * already there. */
  150. NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel );
  151. void makeRootNames();
  152. void makeNameTree( GraphDictEl *gdNode );
  153. void makeExportsNameTree();
  154. void fillNameIndex( NameInst *from );
  155. void printNameTree();
  156. /* Increments the usage count on entry names. Names that are no longer
  157. * needed will have their entry points unset. */
  158. void unsetObsoleteEntries( FsmAp *graph );
  159. /* Resove name references in action code and epsilon transitions. */
  160. NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
  161. void resolveFrom( NameSet &result, NameInst *refFrom,
  162. const NameRef &nameRef, int namePos );
  163. NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
  164. void resolveNameRefs( InlineList *inlineList, Action *action );
  165. void resolveActionNameRefs();
  166. /* Set the alphabet type. If type types are not valid returns false. */
  167. bool setAlphType( const InputLoc &loc, char *s1, char *s2 );
  168. bool setAlphType( const InputLoc &loc, char *s1 );
  169. /* Override one of the variables ragel uses. */
  170. bool setVariable( char *var, InlineList *inlineList );
  171. /* Unique actions. */
  172. void removeDups( ActionTable &actionTable );
  173. void removeActionDups( FsmAp *graph );
  174. /* Dumping the name instantiation tree. */
  175. void printNameInst( NameInst *nameInst, int level );
  176. /* Make the graph from a graph dict node. Does minimization. */
  177. FsmAp *makeInstance( GraphDictEl *gdNode );
  178. FsmAp *makeSpecific( GraphDictEl *gdNode );
  179. FsmAp *makeAll();
  180. /* Checking the contents of actions. */
  181. void checkAction( Action *action );
  182. void checkInlineList( Action *act, InlineList *inlineList );
  183. void analyzeAction( Action *action, InlineList *inlineList );
  184. void analyzeGraph( FsmAp *graph );
  185. void makeExports();
  186. void prepareMachineGen( GraphDictEl *graphDictEl );
  187. void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl );
  188. void generateXML( ostream &out );
  189. void generateReduced( InputData &inputData );
  190. FsmAp *sectionGraph;
  191. bool generatingSectionSubset;
  192. void initKeyOps();
  193. /*
  194. * Data collected during the parse.
  195. */
  196. /* Dictionary of graphs. Both instances and non-instances go here. */
  197. GraphDict graphDict;
  198. /* The list of instances. */
  199. GraphList instanceList;
  200. /* Dictionary of actions. Lets actions be defined and then referenced. */
  201. ActionDict actionDict;
  202. /* Dictionary of named priorities. */
  203. PriorDict priorDict;
  204. /* Dictionary of named local errors. */
  205. LocalErrDict localErrDict;
  206. /* List of actions. Will be pasted into a switch statement. */
  207. ActionList actionList;
  208. /* The id of the next priority name and label. */
  209. int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId;
  210. /* The default priority number key for a machine. This is active during
  211. * the parse of the rhs of a machine assignment. */
  212. int curDefPriorKey;
  213. int curDefLocalErrKey;
  214. /* Alphabet type. */
  215. HostType *userAlphType;
  216. bool alphTypeSet;
  217. InputLoc alphTypeLoc;
  218. /* Element type and get key expression. */
  219. InlineList *getKeyExpr;
  220. InlineList *accessExpr;
  221. /* Stack management */
  222. InlineList *prePushExpr;
  223. InlineList *postPopExpr;
  224. /* Overriding variables. */
  225. InlineList *pExpr;
  226. InlineList *peExpr;
  227. InlineList *eofExpr;
  228. InlineList *csExpr;
  229. InlineList *topExpr;
  230. InlineList *stackExpr;
  231. InlineList *actExpr;
  232. InlineList *tokstartExpr;
  233. InlineList *tokendExpr;
  234. InlineList *dataExpr;
  235. /* The alphabet range. */
  236. char *lowerNum, *upperNum;
  237. Key lowKey, highKey;
  238. InputLoc rangeLowLoc, rangeHighLoc;
  239. /* The name of the file the fsm is from, and the spec name. */
  240. const char *fileName;
  241. char *sectionName;
  242. InputLoc sectionLoc;
  243. /* Counting the action and priority ordering. */
  244. int curActionOrd;
  245. int curPriorOrd;
  246. /* Root of the name tree. One root is for the instantiated machines. The
  247. * other root is for exported definitions. */
  248. NameInst *rootName;
  249. NameInst *exportsRootName;
  250. /* Name tree walking. */
  251. NameInst *curNameInst;
  252. int curNameChild;
  253. /* The place where resolved epsilon transitions go. These cannot go into
  254. * the parse tree because a single epsilon op can resolve more than once
  255. * to different nameInsts if the machine it's in is used more than once. */
  256. NameVect epsilonResolvedLinks;
  257. int nextEpsilonResolvedLink;
  258. /* Root of the name tree used for doing local name searches. */
  259. NameInst *localNameScope;
  260. void setLmInRetLoc( InlineList *inlineList );
  261. void initLongestMatchData();
  262. void setLongestMatchData( FsmAp *graph );
  263. void initNameWalk();
  264. void initExportsNameWalk();
  265. NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
  266. NameFrame enterNameScope( bool isLocal, int numScopes );
  267. void popNameScope( const NameFrame &frame );
  268. void resetNameScope( const NameFrame &frame );
  269. /* Make name ids to name inst pointers. */
  270. NameInst **nameIndex;
  271. /* Counter for assigning ids to longest match items. */
  272. int nextLongestMatchId;
  273. bool lmRequiresErrorState;
  274. /* List of all longest match parse tree items. */
  275. LmList lmList;
  276. Action *newAction( const char *name, InlineList *inlineList );
  277. Action *initTokStart;
  278. int initTokStartOrd;
  279. Action *setTokStart;
  280. int setTokStartOrd;
  281. Action *initActId;
  282. int initActIdOrd;
  283. Action *setTokEnd;
  284. int setTokEndOrd;
  285. void beginProcessing()
  286. {
  287. ::condData = &thisCondData;
  288. ::keyOps = &thisKeyOps;
  289. }
  290. CondData thisCondData;
  291. KeyOps thisKeyOps;
  292. ExportList exportList;
  293. LengthDefList lengthDefList;
  294. CodeGenData *cgd;
  295. };
  296. void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true );
  297. Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
  298. Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
  299. Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
  300. Key makeFsmKeyChar( char c, ParseData *pd );
  301. void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
  302. void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
  303. bool caseInsensitive, ParseData *pd );
  304. FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
  305. FsmAp *dotFsm( ParseData *pd );
  306. FsmAp *dotStarFsm( ParseData *pd );
  307. void errorStateLabels( const NameSet &locations );
  308. #endif