123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- //---------------------------------------------------------------------------------
- //
- // Generated Header File. Do not edit by hand.
- // This file contains the state table for the ICU Regular Expression Pattern Parser
- // It is generated by the Perl script "regexcst.pl" from
- // the rule parser state definitions file "regexcst.txt".
- //
- // Copyright (C) 2002-2016 International Business Machines Corporation
- // and others. All rights reserved.
- //
- //---------------------------------------------------------------------------------
- #ifndef RBBIRPT_H
- #define RBBIRPT_H
- #include "unicode/utypes.h"
- U_NAMESPACE_BEGIN
- //
- // Character classes for regex pattern scanning.
- //
- static const uint8_t kRuleSet_digit_char = 128;
- static const uint8_t kRuleSet_ascii_letter = 129;
- static const uint8_t kRuleSet_rule_char = 130;
- constexpr uint32_t kRuleSet_count = 131-128;
- enum Regex_PatternParseAction {
- doSetBackslash_D,
- doBackslashh,
- doBackslashH,
- doSetLiteralEscaped,
- doOpenLookAheadNeg,
- doCompleteNamedBackRef,
- doPatStart,
- doBackslashS,
- doBackslashD,
- doNGStar,
- doNOP,
- doBackslashX,
- doSetLiteral,
- doContinueNamedCapture,
- doBackslashG,
- doBackslashR,
- doSetBegin,
- doSetBackslash_v,
- doPossessivePlus,
- doPerlInline,
- doBackslashZ,
- doSetAddAmp,
- doSetBeginDifference1,
- doIntervalError,
- doSetNegate,
- doIntervalInit,
- doSetIntersection2,
- doPossessiveInterval,
- doRuleError,
- doBackslashW,
- doContinueNamedBackRef,
- doOpenNonCaptureParen,
- doExit,
- doSetNamedChar,
- doSetBackslash_V,
- doConditionalExpr,
- doEscapeError,
- doBadOpenParenType,
- doPossessiveStar,
- doSetAddDash,
- doEscapedLiteralChar,
- doSetBackslash_w,
- doIntervalUpperDigit,
- doBackslashv,
- doSetBackslash_S,
- doSetNoCloseError,
- doSetProp,
- doBackslashB,
- doSetEnd,
- doSetRange,
- doMatchModeParen,
- doPlus,
- doBackslashV,
- doSetMatchMode,
- doBackslashz,
- doSetNamedRange,
- doOpenLookBehindNeg,
- doInterval,
- doBadNamedCapture,
- doBeginMatchMode,
- doBackslashd,
- doPatFinish,
- doNamedChar,
- doNGPlus,
- doSetDifference2,
- doSetBackslash_H,
- doCloseParen,
- doDotAny,
- doOpenCaptureParen,
- doEnterQuoteMode,
- doOpenAtomicParen,
- doBadModeFlag,
- doSetBackslash_d,
- doSetFinish,
- doProperty,
- doBeginNamedBackRef,
- doBackRef,
- doOpt,
- doDollar,
- doBeginNamedCapture,
- doNGInterval,
- doSetOpError,
- doSetPosixProp,
- doSetBeginIntersection1,
- doBackslashb,
- doSetBeginUnion,
- doIntevalLowerDigit,
- doSetBackslash_h,
- doStar,
- doMatchMode,
- doBackslashA,
- doOpenLookBehind,
- doPossessiveOpt,
- doOrOperator,
- doBackslashw,
- doBackslashs,
- doLiteralChar,
- doSuppressComments,
- doCaret,
- doIntervalSame,
- doNGOpt,
- doOpenLookAhead,
- doSetBackslash_W,
- doMismatchedParenErr,
- doSetBackslash_s,
- rbbiLastAction};
- //-------------------------------------------------------------------------------
- //
- // RegexTableEl represents the structure of a row in the transition table
- // for the pattern parser state machine.
- //-------------------------------------------------------------------------------
- struct RegexTableEl {
- Regex_PatternParseAction fAction;
- uint8_t fCharClass; // 0-127: an individual ASCII character
- // 128-255: character class index
- uint8_t fNextState; // 0-250: normal next-state numbers
- // 255: pop next-state from stack.
- uint8_t fPushState;
- UBool fNextChar;
- };
- static const struct RegexTableEl gRuleParseStateTable[] = {
- {doNOP, 0, 0, 0, true}
- , {doPatStart, 255, 2,0, false} // 1 start
- , {doLiteralChar, 254, 14,0, true} // 2 term
- , {doLiteralChar, 130, 14,0, true} // 3
- , {doSetBegin, 91 /* [ */, 123, 205, true} // 4
- , {doNOP, 40 /* ( */, 27,0, true} // 5
- , {doDotAny, 46 /* . */, 14,0, true} // 6
- , {doCaret, 94 /* ^ */, 14,0, true} // 7
- , {doDollar, 36 /* $ */, 14,0, true} // 8
- , {doNOP, 92 /* \ */, 89,0, true} // 9
- , {doOrOperator, 124 /* | */, 2,0, true} // 10
- , {doCloseParen, 41 /* ) */, 255,0, true} // 11
- , {doPatFinish, 253, 2,0, false} // 12
- , {doRuleError, 255, 206,0, false} // 13
- , {doNOP, 42 /* * */, 68,0, true} // 14 expr-quant
- , {doNOP, 43 /* + */, 71,0, true} // 15
- , {doNOP, 63 /* ? */, 74,0, true} // 16
- , {doIntervalInit, 123 /* { */, 77,0, true} // 17
- , {doNOP, 40 /* ( */, 23,0, true} // 18
- , {doNOP, 255, 20,0, false} // 19
- , {doOrOperator, 124 /* | */, 2,0, true} // 20 expr-cont
- , {doCloseParen, 41 /* ) */, 255,0, true} // 21
- , {doNOP, 255, 2,0, false} // 22
- , {doSuppressComments, 63 /* ? */, 25,0, true} // 23 open-paren-quant
- , {doNOP, 255, 27,0, false} // 24
- , {doNOP, 35 /* # */, 50, 14, true} // 25 open-paren-quant2
- , {doNOP, 255, 29,0, false} // 26
- , {doSuppressComments, 63 /* ? */, 29,0, true} // 27 open-paren
- , {doOpenCaptureParen, 255, 2, 14, false} // 28
- , {doOpenNonCaptureParen, 58 /* : */, 2, 14, true} // 29 open-paren-extended
- , {doOpenAtomicParen, 62 /* > */, 2, 14, true} // 30
- , {doOpenLookAhead, 61 /* = */, 2, 20, true} // 31
- , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, true} // 32
- , {doNOP, 60 /* < */, 46,0, true} // 33
- , {doNOP, 35 /* # */, 50, 2, true} // 34
- , {doBeginMatchMode, 105 /* i */, 53,0, false} // 35
- , {doBeginMatchMode, 100 /* d */, 53,0, false} // 36
- , {doBeginMatchMode, 109 /* m */, 53,0, false} // 37
- , {doBeginMatchMode, 115 /* s */, 53,0, false} // 38
- , {doBeginMatchMode, 117 /* u */, 53,0, false} // 39
- , {doBeginMatchMode, 119 /* w */, 53,0, false} // 40
- , {doBeginMatchMode, 120 /* x */, 53,0, false} // 41
- , {doBeginMatchMode, 45 /* - */, 53,0, false} // 42
- , {doConditionalExpr, 40 /* ( */, 206,0, true} // 43
- , {doPerlInline, 123 /* { */, 206,0, true} // 44
- , {doBadOpenParenType, 255, 206,0, false} // 45
- , {doOpenLookBehind, 61 /* = */, 2, 20, true} // 46 open-paren-lookbehind
- , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, true} // 47
- , {doBeginNamedCapture, 129, 64,0, false} // 48
- , {doBadOpenParenType, 255, 206,0, false} // 49
- , {doNOP, 41 /* ) */, 255,0, true} // 50 paren-comment
- , {doMismatchedParenErr, 253, 206,0, false} // 51
- , {doNOP, 255, 50,0, true} // 52
- , {doMatchMode, 105 /* i */, 53,0, true} // 53 paren-flag
- , {doMatchMode, 100 /* d */, 53,0, true} // 54
- , {doMatchMode, 109 /* m */, 53,0, true} // 55
- , {doMatchMode, 115 /* s */, 53,0, true} // 56
- , {doMatchMode, 117 /* u */, 53,0, true} // 57
- , {doMatchMode, 119 /* w */, 53,0, true} // 58
- , {doMatchMode, 120 /* x */, 53,0, true} // 59
- , {doMatchMode, 45 /* - */, 53,0, true} // 60
- , {doSetMatchMode, 41 /* ) */, 2,0, true} // 61
- , {doMatchModeParen, 58 /* : */, 2, 14, true} // 62
- , {doBadModeFlag, 255, 206,0, false} // 63
- , {doContinueNamedCapture, 129, 64,0, true} // 64 named-capture
- , {doContinueNamedCapture, 128, 64,0, true} // 65
- , {doOpenCaptureParen, 62 /* > */, 2, 14, true} // 66
- , {doBadNamedCapture, 255, 206,0, false} // 67
- , {doNGStar, 63 /* ? */, 20,0, true} // 68 quant-star
- , {doPossessiveStar, 43 /* + */, 20,0, true} // 69
- , {doStar, 255, 20,0, false} // 70
- , {doNGPlus, 63 /* ? */, 20,0, true} // 71 quant-plus
- , {doPossessivePlus, 43 /* + */, 20,0, true} // 72
- , {doPlus, 255, 20,0, false} // 73
- , {doNGOpt, 63 /* ? */, 20,0, true} // 74 quant-opt
- , {doPossessiveOpt, 43 /* + */, 20,0, true} // 75
- , {doOpt, 255, 20,0, false} // 76
- , {doNOP, 128, 79,0, false} // 77 interval-open
- , {doIntervalError, 255, 206,0, false} // 78
- , {doIntevalLowerDigit, 128, 79,0, true} // 79 interval-lower
- , {doNOP, 44 /* , */, 83,0, true} // 80
- , {doIntervalSame, 125 /* } */, 86,0, true} // 81
- , {doIntervalError, 255, 206,0, false} // 82
- , {doIntervalUpperDigit, 128, 83,0, true} // 83 interval-upper
- , {doNOP, 125 /* } */, 86,0, true} // 84
- , {doIntervalError, 255, 206,0, false} // 85
- , {doNGInterval, 63 /* ? */, 20,0, true} // 86 interval-type
- , {doPossessiveInterval, 43 /* + */, 20,0, true} // 87
- , {doInterval, 255, 20,0, false} // 88
- , {doBackslashA, 65 /* A */, 2,0, true} // 89 backslash
- , {doBackslashB, 66 /* B */, 2,0, true} // 90
- , {doBackslashb, 98 /* b */, 2,0, true} // 91
- , {doBackslashd, 100 /* d */, 14,0, true} // 92
- , {doBackslashD, 68 /* D */, 14,0, true} // 93
- , {doBackslashG, 71 /* G */, 2,0, true} // 94
- , {doBackslashh, 104 /* h */, 14,0, true} // 95
- , {doBackslashH, 72 /* H */, 14,0, true} // 96
- , {doNOP, 107 /* k */, 115,0, true} // 97
- , {doNamedChar, 78 /* N */, 14,0, false} // 98
- , {doProperty, 112 /* p */, 14,0, false} // 99
- , {doProperty, 80 /* P */, 14,0, false} // 100
- , {doBackslashR, 82 /* R */, 14,0, true} // 101
- , {doEnterQuoteMode, 81 /* Q */, 2,0, true} // 102
- , {doBackslashS, 83 /* S */, 14,0, true} // 103
- , {doBackslashs, 115 /* s */, 14,0, true} // 104
- , {doBackslashv, 118 /* v */, 14,0, true} // 105
- , {doBackslashV, 86 /* V */, 14,0, true} // 106
- , {doBackslashW, 87 /* W */, 14,0, true} // 107
- , {doBackslashw, 119 /* w */, 14,0, true} // 108
- , {doBackslashX, 88 /* X */, 14,0, true} // 109
- , {doBackslashZ, 90 /* Z */, 2,0, true} // 110
- , {doBackslashz, 122 /* z */, 2,0, true} // 111
- , {doBackRef, 128, 14,0, true} // 112
- , {doEscapeError, 253, 206,0, false} // 113
- , {doEscapedLiteralChar, 255, 14,0, true} // 114
- , {doBeginNamedBackRef, 60 /* < */, 117,0, true} // 115 named-backref
- , {doBadNamedCapture, 255, 206,0, false} // 116
- , {doContinueNamedBackRef, 129, 119,0, true} // 117 named-backref-2
- , {doBadNamedCapture, 255, 206,0, false} // 118
- , {doContinueNamedBackRef, 129, 119,0, true} // 119 named-backref-3
- , {doContinueNamedBackRef, 128, 119,0, true} // 120
- , {doCompleteNamedBackRef, 62 /* > */, 14,0, true} // 121
- , {doBadNamedCapture, 255, 206,0, false} // 122
- , {doSetNegate, 94 /* ^ */, 126,0, true} // 123 set-open
- , {doSetPosixProp, 58 /* : */, 128,0, false} // 124
- , {doNOP, 255, 126,0, false} // 125
- , {doSetLiteral, 93 /* ] */, 141,0, true} // 126 set-open2
- , {doNOP, 255, 131,0, false} // 127
- , {doSetEnd, 93 /* ] */, 255,0, true} // 128 set-posix
- , {doNOP, 58 /* : */, 131,0, false} // 129
- , {doRuleError, 255, 206,0, false} // 130
- , {doSetEnd, 93 /* ] */, 255,0, true} // 131 set-start
- , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 132
- , {doNOP, 92 /* \ */, 191,0, true} // 133
- , {doNOP, 45 /* - */, 137,0, true} // 134
- , {doNOP, 38 /* & */, 139,0, true} // 135
- , {doSetLiteral, 255, 141,0, true} // 136
- , {doRuleError, 45 /* - */, 206,0, false} // 137 set-start-dash
- , {doSetAddDash, 255, 141,0, false} // 138
- , {doRuleError, 38 /* & */, 206,0, false} // 139 set-start-amp
- , {doSetAddAmp, 255, 141,0, false} // 140
- , {doSetEnd, 93 /* ] */, 255,0, true} // 141 set-after-lit
- , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 142
- , {doNOP, 45 /* - */, 178,0, true} // 143
- , {doNOP, 38 /* & */, 169,0, true} // 144
- , {doNOP, 92 /* \ */, 191,0, true} // 145
- , {doSetNoCloseError, 253, 206,0, false} // 146
- , {doSetLiteral, 255, 141,0, true} // 147
- , {doSetEnd, 93 /* ] */, 255,0, true} // 148 set-after-set
- , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 149
- , {doNOP, 45 /* - */, 171,0, true} // 150
- , {doNOP, 38 /* & */, 166,0, true} // 151
- , {doNOP, 92 /* \ */, 191,0, true} // 152
- , {doSetNoCloseError, 253, 206,0, false} // 153
- , {doSetLiteral, 255, 141,0, true} // 154
- , {doSetEnd, 93 /* ] */, 255,0, true} // 155 set-after-range
- , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 156
- , {doNOP, 45 /* - */, 174,0, true} // 157
- , {doNOP, 38 /* & */, 176,0, true} // 158
- , {doNOP, 92 /* \ */, 191,0, true} // 159
- , {doSetNoCloseError, 253, 206,0, false} // 160
- , {doSetLiteral, 255, 141,0, true} // 161
- , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 162 set-after-op
- , {doSetOpError, 93 /* ] */, 206,0, false} // 163
- , {doNOP, 92 /* \ */, 191,0, true} // 164
- , {doSetLiteral, 255, 141,0, true} // 165
- , {doSetBeginIntersection1, 91 /* [ */, 123, 148, true} // 166 set-set-amp
- , {doSetIntersection2, 38 /* & */, 162,0, true} // 167
- , {doSetAddAmp, 255, 141,0, false} // 168
- , {doSetIntersection2, 38 /* & */, 162,0, true} // 169 set-lit-amp
- , {doSetAddAmp, 255, 141,0, false} // 170
- , {doSetBeginDifference1, 91 /* [ */, 123, 148, true} // 171 set-set-dash
- , {doSetDifference2, 45 /* - */, 162,0, true} // 172
- , {doSetAddDash, 255, 141,0, false} // 173
- , {doSetDifference2, 45 /* - */, 162,0, true} // 174 set-range-dash
- , {doSetAddDash, 255, 141,0, false} // 175
- , {doSetIntersection2, 38 /* & */, 162,0, true} // 176 set-range-amp
- , {doSetAddAmp, 255, 141,0, false} // 177
- , {doSetDifference2, 45 /* - */, 162,0, true} // 178 set-lit-dash
- , {doSetAddDash, 91 /* [ */, 141,0, false} // 179
- , {doSetAddDash, 93 /* ] */, 141,0, false} // 180
- , {doNOP, 92 /* \ */, 183,0, true} // 181
- , {doSetRange, 255, 155,0, true} // 182
- , {doSetOpError, 115 /* s */, 206,0, false} // 183 set-lit-dash-escape
- , {doSetOpError, 83 /* S */, 206,0, false} // 184
- , {doSetOpError, 119 /* w */, 206,0, false} // 185
- , {doSetOpError, 87 /* W */, 206,0, false} // 186
- , {doSetOpError, 100 /* d */, 206,0, false} // 187
- , {doSetOpError, 68 /* D */, 206,0, false} // 188
- , {doSetNamedRange, 78 /* N */, 155,0, false} // 189
- , {doSetRange, 255, 155,0, true} // 190
- , {doSetProp, 112 /* p */, 148,0, false} // 191 set-escape
- , {doSetProp, 80 /* P */, 148,0, false} // 192
- , {doSetNamedChar, 78 /* N */, 141,0, false} // 193
- , {doSetBackslash_s, 115 /* s */, 155,0, true} // 194
- , {doSetBackslash_S, 83 /* S */, 155,0, true} // 195
- , {doSetBackslash_w, 119 /* w */, 155,0, true} // 196
- , {doSetBackslash_W, 87 /* W */, 155,0, true} // 197
- , {doSetBackslash_d, 100 /* d */, 155,0, true} // 198
- , {doSetBackslash_D, 68 /* D */, 155,0, true} // 199
- , {doSetBackslash_h, 104 /* h */, 155,0, true} // 200
- , {doSetBackslash_H, 72 /* H */, 155,0, true} // 201
- , {doSetBackslash_v, 118 /* v */, 155,0, true} // 202
- , {doSetBackslash_V, 86 /* V */, 155,0, true} // 203
- , {doSetLiteralEscaped, 255, 141,0, true} // 204
- , {doSetFinish, 255, 14,0, false} // 205 set-finish
- , {doExit, 255, 206,0, true} // 206 errorDeath
- };
- static const char * const RegexStateNames[] = { 0,
- "start",
- "term",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "expr-quant",
- 0,
- 0,
- 0,
- 0,
- 0,
- "expr-cont",
- 0,
- 0,
- "open-paren-quant",
- 0,
- "open-paren-quant2",
- 0,
- "open-paren",
- 0,
- "open-paren-extended",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "open-paren-lookbehind",
- 0,
- 0,
- 0,
- "paren-comment",
- 0,
- 0,
- "paren-flag",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "named-capture",
- 0,
- 0,
- 0,
- "quant-star",
- 0,
- 0,
- "quant-plus",
- 0,
- 0,
- "quant-opt",
- 0,
- 0,
- "interval-open",
- 0,
- "interval-lower",
- 0,
- 0,
- 0,
- "interval-upper",
- 0,
- 0,
- "interval-type",
- 0,
- 0,
- "backslash",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "named-backref",
- 0,
- "named-backref-2",
- 0,
- "named-backref-3",
- 0,
- 0,
- 0,
- "set-open",
- 0,
- 0,
- "set-open2",
- 0,
- "set-posix",
- 0,
- 0,
- "set-start",
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-start-dash",
- 0,
- "set-start-amp",
- 0,
- "set-after-lit",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-after-set",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-after-range",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-after-op",
- 0,
- 0,
- 0,
- "set-set-amp",
- 0,
- 0,
- "set-lit-amp",
- 0,
- "set-set-dash",
- 0,
- 0,
- "set-range-dash",
- 0,
- "set-range-amp",
- 0,
- "set-lit-dash",
- 0,
- 0,
- 0,
- 0,
- "set-lit-dash-escape",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-escape",
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- "set-finish",
- "errorDeath",
- 0};
- U_NAMESPACE_END
- #endif
|