123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772 |
- /*
- * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
- */
- /* This file is part of Ragel.
- *
- * Ragel is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Ragel is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Ragel; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
- #ifndef _PARSETREE_H
- #define _PARSETREE_H
- #include "ragel.h"
- #include "avlmap.h"
- #include "bstmap.h"
- #include "vector.h"
- #include "dlist.h"
- struct NameInst;
- /* Types of builtin machines. */
- enum BuiltinMachine
- {
- BT_Any,
- BT_Ascii,
- BT_Extend,
- BT_Alpha,
- BT_Digit,
- BT_Alnum,
- BT_Lower,
- BT_Upper,
- BT_Cntrl,
- BT_Graph,
- BT_Print,
- BT_Punct,
- BT_Space,
- BT_Xdigit,
- BT_Lambda,
- BT_Empty
- };
- struct ParseData;
- /* Leaf type. */
- struct Literal;
- /* Tree nodes. */
- struct Term;
- struct FactorWithAug;
- struct FactorWithRep;
- struct FactorWithNeg;
- struct Factor;
- struct Expression;
- struct Join;
- struct MachineDef;
- struct LongestMatch;
- struct LongestMatchPart;
- struct LmPartList;
- struct Range;
- struct LengthDef;
- /* Type of augmentation. Describes locations in the machine. */
- enum AugType
- {
- /* Transition actions/priorities. */
- at_start,
- at_all,
- at_finish,
- at_leave,
- /* Global error actions. */
- at_start_gbl_error,
- at_all_gbl_error,
- at_final_gbl_error,
- at_not_start_gbl_error,
- at_not_final_gbl_error,
- at_middle_gbl_error,
- /* Local error actions. */
- at_start_local_error,
- at_all_local_error,
- at_final_local_error,
- at_not_start_local_error,
- at_not_final_local_error,
- at_middle_local_error,
-
- /* To State Action embedding. */
- at_start_to_state,
- at_all_to_state,
- at_final_to_state,
- at_not_start_to_state,
- at_not_final_to_state,
- at_middle_to_state,
- /* From State Action embedding. */
- at_start_from_state,
- at_all_from_state,
- at_final_from_state,
- at_not_start_from_state,
- at_not_final_from_state,
- at_middle_from_state,
- /* EOF Action embedding. */
- at_start_eof,
- at_all_eof,
- at_final_eof,
- at_not_start_eof,
- at_not_final_eof,
- at_middle_eof
- };
- /* IMPORTANT: These must follow the same order as the state augs in AugType
- * since we will be using this to compose AugType. */
- enum StateAugType
- {
- sat_start = 0,
- sat_all,
- sat_final,
- sat_not_start,
- sat_not_final,
- sat_middle
- };
- struct Action;
- struct PriorDesc;
- struct RegExpr;
- struct ReItem;
- struct ReOrBlock;
- struct ReOrItem;
- struct ExplicitMachine;
- struct InlineItem;
- struct InlineList;
- /* Reference to a named state. */
- typedef Vector<char*> NameRef;
- typedef Vector<NameRef*> NameRefList;
- typedef Vector<NameInst*> NameTargList;
- /* Structure for storing location of epsilon transitons. */
- struct EpsilonLink
- {
- EpsilonLink( const InputLoc &loc, NameRef &target )
- : loc(loc), target(target) { }
- InputLoc loc;
- NameRef target;
- };
- struct Label
- {
- Label( const InputLoc &loc, char *data )
- : loc(loc), data(data) { }
- InputLoc loc;
- char *data;
- };
- /* Structrue represents an action assigned to some FactorWithAug node. The
- * factor with aug will keep an array of these. */
- struct ParserAction
- {
- ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
- : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
- InputLoc loc;
- AugType type;
- int localErrKey;
- Action *action;
- };
- struct ConditionTest
- {
- ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
- loc(loc), type(type), action(action), sense(sense) { }
- InputLoc loc;
- AugType type;
- Action *action;
- bool sense;
- };
- struct Token
- {
- char *data;
- int length;
- InputLoc loc;
- void append( const Token &other );
- void set( const char *str, int len );
- };
- char *prepareLitString( const InputLoc &loc, const char *src, long length,
- long &resLen, bool &caseInsensitive );
- /* Store the value and type of a priority augmentation. */
- struct PriorityAug
- {
- PriorityAug( AugType type, int priorKey, int priorValue ) :
- type(type), priorKey(priorKey), priorValue(priorValue) { }
- AugType type;
- int priorKey;
- int priorValue;
- };
- /*
- * A Variable Definition
- */
- struct VarDef
- {
- VarDef( const char *name, MachineDef *machineDef )
- : name(name), machineDef(machineDef), isExport(false) { }
-
- /* Parse tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( const InputLoc &loc, ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- const char *name;
- MachineDef *machineDef;
- bool isExport;
- };
- /*
- * LongestMatch
- *
- * Wherever possible the item match will execute on the character. If not
- * possible the item match will execute on a lookahead character and either
- * hold the current char (if one away) or backup.
- *
- * How to handle the problem of backing up over a buffer break?
- *
- * Don't want to use pending out transitions for embedding item match because
- * the role of item match action is different: it may sometimes match on the
- * final transition, or may match on a lookahead character.
- *
- * Don't want to invent a new operator just for this. So just trail action
- * after machine, this means we can only use literal actions.
- *
- * The item action may
- *
- * What states of the machine will be final. The item actions that wrap around
- * on the last character will go straight to the start state.
- *
- * Some transitions will be lookahead transitions, they will hold the current
- * character. Crossing them with regular transitions must be restricted
- * because it does not make sense. The transition cannot simultaneously hold
- * and consume the current character.
- */
- struct LongestMatchPart
- {
- LongestMatchPart( Join *join, Action *action,
- InputLoc &semiLoc, int longestMatchId )
- :
- join(join), action(action), semiLoc(semiLoc),
- longestMatchId(longestMatchId), inLmSelect(false) { }
- InputLoc getLoc();
-
- Join *join;
- Action *action;
- InputLoc semiLoc;
- Action *setActId;
- Action *actOnLast;
- Action *actOnNext;
- Action *actLagBehind;
- int longestMatchId;
- bool inLmSelect;
- LongestMatch *longestMatch;
- LongestMatchPart *prev, *next;
- };
- /* Declare a new type so that ptreetypes.h need not include dlist.h. */
- struct LmPartList : DList<LongestMatchPart> {};
- struct LongestMatch
- {
- /* Construct with a list of joins */
- LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
- loc(loc), longestMatchList(longestMatchList), name(0),
- lmSwitchHandlesError(false) { }
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- void transferScannerLeavingActions( FsmAp *graph );
- void runLongestMatch( ParseData *pd, FsmAp *graph );
- Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
- InlineList *inlineList );
- void makeActions( ParseData *pd );
- void findName( ParseData *pd );
- void restart( FsmAp *graph, TransAp *trans );
- InputLoc loc;
- LmPartList *longestMatchList;
- const char *name;
- Action *lmActSelect;
- bool lmSwitchHandlesError;
- LongestMatch *next, *prev;
- };
- /* List of Expressions. */
- typedef DList<Expression> ExprList;
- struct MachineDef
- {
- enum Type {
- JoinType,
- LongestMatchType,
- LengthDefType
- };
- MachineDef( Join *join )
- : join(join), longestMatch(0), lengthDef(0), type(JoinType) {}
- MachineDef( LongestMatch *longestMatch )
- : join(0), longestMatch(longestMatch), lengthDef(0), type(LongestMatchType) {}
- MachineDef( LengthDef *lengthDef )
- : join(0), longestMatch(0), lengthDef(lengthDef), type(LengthDefType) {}
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- Join *join;
- LongestMatch *longestMatch;
- LengthDef *lengthDef;
- Type type;
- };
- /*
- * Join
- */
- struct Join
- {
- /* Construct with the first expression. */
- Join( Expression *expr );
- Join( const InputLoc &loc, Expression *expr );
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- FsmAp *walkJoin( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- /* Data. */
- InputLoc loc;
- ExprList exprList;
- };
- /*
- * Expression
- */
- struct Expression
- {
- enum Type {
- OrType,
- IntersectType,
- SubtractType,
- StrongSubtractType,
- TermType,
- BuiltinType
- };
- /* Construct with an expression on the left and a term on the right. */
- Expression( Expression *expression, Term *term, Type type ) :
- expression(expression), term(term),
- type(type), prev(this), next(this) { }
- /* Construct with only a term. */
- Expression( Term *term ) :
- expression(0), term(term),
- type(TermType) , prev(this), next(this) { }
-
- /* Construct with a builtin type. */
- Expression( BuiltinMachine builtin ) :
- expression(0), term(0), builtin(builtin),
- type(BuiltinType), prev(this), next(this) { }
- ~Expression();
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd, bool lastInSeq = true );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- /* Node data. */
- Expression *expression;
- Term *term;
- BuiltinMachine builtin;
- Type type;
- Expression *prev, *next;
- };
- /*
- * Term
- */
- struct Term
- {
- enum Type {
- ConcatType,
- RightStartType,
- RightFinishType,
- LeftType,
- FactorWithAugType
- };
- Term( Term *term, FactorWithAug *factorWithAug ) :
- term(term), factorWithAug(factorWithAug), type(ConcatType) { }
- Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
- term(term), factorWithAug(factorWithAug), type(type) { }
- Term( FactorWithAug *factorWithAug ) :
- term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
-
- ~Term();
- FsmAp *walk( ParseData *pd, bool lastInSeq = true );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- Term *term;
- FactorWithAug *factorWithAug;
- Type type;
- /* Priority descriptor for RightFinish type. */
- PriorDesc priorDescs[2];
- };
- /* Third level of precedence. Augmenting nodes with actions and priorities. */
- struct FactorWithAug
- {
- FactorWithAug( FactorWithRep *factorWithRep ) :
- priorDescs(0), factorWithRep(factorWithRep) { }
- ~FactorWithAug();
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
- void assignPriorities( FsmAp *graph, int *priorOrd );
- void assignConditions( FsmAp *graph );
- /* Actions and priorities assigned to the factor node. */
- Vector<ParserAction> actions;
- Vector<PriorityAug> priorityAugs;
- PriorDesc *priorDescs;
- Vector<Label> labels;
- Vector<EpsilonLink> epsilonLinks;
- Vector<ConditionTest> conditions;
- FactorWithRep *factorWithRep;
- };
- /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
- * optional and plus. */
- struct FactorWithRep
- {
- enum Type {
- StarType,
- StarStarType,
- OptionalType,
- PlusType,
- ExactType,
- MaxType,
- MinType,
- RangeType,
- FactorWithNegType
- };
- FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
- int lowerRep, int upperRep, Type type ) :
- loc(loc), factorWithRep(factorWithRep),
- factorWithNeg(0), lowerRep(lowerRep),
- upperRep(upperRep), type(type) { }
-
- FactorWithRep( FactorWithNeg *factorWithNeg )
- : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
- ~FactorWithRep();
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- InputLoc loc;
- FactorWithRep *factorWithRep;
- FactorWithNeg *factorWithNeg;
- int lowerRep, upperRep;
- Type type;
- /* Priority descriptor for StarStar type. */
- PriorDesc priorDescs[2];
- };
- /* Fifth level of precedence. Provides Negation. */
- struct FactorWithNeg
- {
- enum Type {
- NegateType,
- CharNegateType,
- FactorType
- };
- FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
- loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
- FactorWithNeg( Factor *factor ) :
- factorWithNeg(0), factor(factor), type(FactorType) { }
- ~FactorWithNeg();
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- InputLoc loc;
- FactorWithNeg *factorWithNeg;
- Factor *factor;
- Type type;
- };
- /*
- * Factor
- */
- struct Factor
- {
- /* Language elements a factor node can be. */
- enum Type {
- LiteralType,
- RangeType,
- OrExprType,
- RegExprType,
- ReferenceType,
- ParenType,
- LongestMatchType,
- };
- /* Construct with a literal fsm. */
- Factor( Literal *literal ) :
- literal(literal), type(LiteralType) { }
- /* Construct with a range. */
- Factor( Range *range ) :
- range(range), type(RangeType) { }
-
- /* Construct with the or part of a regular expression. */
- Factor( ReItem *reItem ) :
- reItem(reItem), type(OrExprType) { }
- /* Construct with a regular expression. */
- Factor( RegExpr *regExpr ) :
- regExpr(regExpr), type(RegExprType) { }
- /* Construct with a reference to a var def. */
- Factor( const InputLoc &loc, VarDef *varDef ) :
- loc(loc), varDef(varDef), type(ReferenceType) {}
- /* Construct with a parenthesized join. */
- Factor( Join *join ) :
- join(join), type(ParenType) {}
-
- /* Construct with a longest match operator. */
- Factor( LongestMatch *longestMatch ) :
- longestMatch(longestMatch), type(LongestMatchType) {}
- /* Cleanup. */
- ~Factor();
- /* Tree traversal. */
- FsmAp *walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- InputLoc loc;
- Literal *literal;
- Range *range;
- ReItem *reItem;
- RegExpr *regExpr;
- VarDef *varDef;
- Join *join;
- LongestMatch *longestMatch;
- int lower, upper;
- Type type;
- };
- /* A range machine. Only ever composed of two literals. */
- struct Range
- {
- Range( Literal *lowerLit, Literal *upperLit )
- : lowerLit(lowerLit), upperLit(upperLit) { }
- ~Range();
- FsmAp *walk( ParseData *pd );
- Literal *lowerLit;
- Literal *upperLit;
- };
- /* Some literal machine. Can be a number or literal string. */
- struct Literal
- {
- enum LiteralType { Number, LitString };
- Literal( const Token &token, LiteralType type )
- : token(token), type(type) { }
- FsmAp *walk( ParseData *pd );
-
- Token token;
- LiteralType type;
- };
- /* Regular expression. */
- struct RegExpr
- {
- enum RegExpType { RecurseItem, Empty };
- /* Constructors. */
- RegExpr() :
- type(Empty), caseInsensitive(false) { }
- RegExpr(RegExpr *regExpr, ReItem *item) :
- regExpr(regExpr), item(item),
- type(RecurseItem), caseInsensitive(false) { }
- ~RegExpr();
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
- RegExpr *regExpr;
- ReItem *item;
- RegExpType type;
- bool caseInsensitive;
- };
- /* An item in a regular expression. */
- struct ReItem
- {
- enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
-
- ReItem( const InputLoc &loc, const Token &token )
- : loc(loc), token(token), star(false), type(Data) { }
- ReItem( const InputLoc &loc, ReItemType type )
- : loc(loc), star(false), type(type) { }
- ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
- : loc(loc), orBlock(orBlock), star(false), type(type) { }
- ~ReItem();
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
- InputLoc loc;
- Token token;
- ReOrBlock *orBlock;
- bool star;
- ReItemType type;
- };
- /* An or block item. */
- struct ReOrBlock
- {
- enum ReOrBlockType { RecurseItem, Empty };
- /* Constructors. */
- ReOrBlock()
- : type(Empty) { }
- ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
- : orBlock(orBlock), item(item), type(RecurseItem) { }
- ~ReOrBlock();
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
-
- ReOrBlock *orBlock;
- ReOrItem *item;
- ReOrBlockType type;
- };
- /* An item in an or block. */
- struct ReOrItem
- {
- enum ReOrItemType { Data, Range };
- ReOrItem( const InputLoc &loc, const Token &token )
- : loc(loc), token(token), type(Data) {}
- ReOrItem( const InputLoc &loc, char lower, char upper )
- : loc(loc), lower(lower), upper(upper), type(Range) { }
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
- InputLoc loc;
- Token token;
- char lower;
- char upper;
- ReOrItemType type;
- };
- /*
- * Inline code tree
- */
- struct InlineList;
- struct InlineItem
- {
- enum Type
- {
- Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
- Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
- LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
- LmInitTokStart, LmSetTokStart, Break
- };
- InlineItem( const InputLoc &loc, char *data, Type type ) :
- loc(loc), data(data), nameRef(0), children(0), type(type) { }
- InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
- loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
- InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
- LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
- nameRef(0), children(0), longestMatch(longestMatch),
- longestMatchPart(longestMatchPart), type(type) { }
- InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
- loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
- type(type) { }
- InlineItem( const InputLoc &loc, Type type ) :
- loc(loc), data(0), nameRef(0), children(0), type(type) { }
-
- InputLoc loc;
- char *data;
- NameRef *nameRef;
- NameInst *nameTarg;
- InlineList *children;
- LongestMatch *longestMatch;
- LongestMatchPart *longestMatchPart;
- Type type;
- InlineItem *prev, *next;
- };
- /* Normally this would be atypedef, but that would entail including DList from
- * ptreetypes, which should be just typedef forwards. */
- struct InlineList : public DList<InlineItem> { };
- #endif
|