determine.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. /*
  2. * determine.h -- the FSM determination routine.
  3. *
  4. * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
  5. * Alexander Gololobov <agololobov@gmail.com>
  6. *
  7. * This file is part of Pire, the Perl Incompatible
  8. * Regular Expressions library.
  9. *
  10. * Pire is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU Lesser Public License as published by
  12. * the Free Software Foundation, either version 3 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * Pire is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Lesser Public License for more details.
  19. * You should have received a copy of the GNU Lesser Public License
  20. * along with Pire. If not, see <http://www.gnu.org/licenses>.
  21. */
  22. #ifndef PIRE_DETERMINE_H
  23. #define PIRE_DETERMINE_H
  24. #include <contrib/libs/pire/pire/stub/stl.h>
  25. #include "partition.h"
  26. namespace Pire {
  27. namespace Impl {
  28. /**
  29. * An interface of a determination task.
  30. * You don't have to derive from this class; it is just a start point template.
  31. */
  32. class DetermineTask {
  33. private:
  34. struct ImplementationSpecific1;
  35. struct ImplementationSpecific2;
  36. public:
  37. /// A type representing a new state (may be a set of old states, a pair of them, etc...)
  38. typedef ImplementationSpecific1 State;
  39. /// A type of letter equivalence classes table.
  40. typedef Partition<char, ImplementationSpecific2> LettersTbl;
  41. /// A container used for storing map of states to thier indices.
  42. typedef TMap<State, size_t> InvStates;
  43. /// Should return used letters' partition.
  44. const LettersTbl& Letters() const;
  45. /// Should return initial state (surprise!)
  46. State Initial() const;
  47. /// Should calculate next state, given the current state and a letter.
  48. State Next(State state, Char letter) const;
  49. /// Should return true iff the state need to be processed.
  50. bool IsRequired(const State& /*state*/) const { return true; }
  51. /// Called when the set of new states is closed.
  52. void AcceptStates(const TVector<State>& newstates);
  53. /// Called for each transition from one new state to another.
  54. void Connect(size_t from, size_t to, Char letter);
  55. typedef bool Result;
  56. Result Success() { return true; }
  57. Result Failure() { return false; }
  58. };
  59. /**
  60. * A helper function for FSM determining and all determine-like algorithms
  61. * like scanners' agglutination.
  62. *
  63. * Given an indirectly specified automaton (through Task::Initial() and Task::Next()
  64. * functions, see above), performs a breadth-first traversal, finding and enumerating
  65. * all effectively reachable states. Then passes all found states and transitions
  66. * between them back to the task.
  67. *
  68. * Initial state is always placed at zero position.
  69. *
  70. * Please note that the function does not take care of any payload (including final flags);
  71. * it is the task's responsibility to agglutinate them properly.
  72. *
  73. * Returns task.Succeed() if everything was done; task.Failure() if maximum limit of state count was reached.
  74. */
  75. template<class Task>
  76. typename Task::Result Determine(Task& task, size_t maxSize)
  77. {
  78. typedef typename Task::State State;
  79. typedef typename Task::InvStates InvStates;
  80. typedef TDeque< TVector<size_t> > TransitionTable;
  81. TVector<State> states;
  82. InvStates invstates;
  83. TransitionTable transitions;
  84. TVector<size_t> stateIndices;
  85. states.push_back(task.Initial());
  86. invstates.insert(typename InvStates::value_type(states[0], 0));
  87. for (size_t stateIdx = 0; stateIdx < states.size(); ++stateIdx) {
  88. if (!task.IsRequired(states[stateIdx]))
  89. continue;
  90. TransitionTable::value_type row(task.Letters().Size());
  91. for (auto&& letter : task.Letters()) {
  92. State newState = task.Next(states[stateIdx], letter.first);
  93. auto i = invstates.find(newState);
  94. if (i == invstates.end()) {
  95. if (!maxSize--)
  96. return task.Failure();
  97. i = invstates.insert(typename InvStates::value_type(newState, states.size())).first;
  98. states.push_back(newState);
  99. }
  100. row[letter.second.first] = i->second;
  101. }
  102. transitions.push_back(row);
  103. stateIndices.push_back(stateIdx);
  104. }
  105. TVector<Char> invletters(task.Letters().Size());
  106. for (auto&& letter : task.Letters())
  107. invletters[letter.second.first] = letter.first;
  108. task.AcceptStates(states);
  109. size_t from = 0;
  110. for (TransitionTable::iterator i = transitions.begin(), ie = transitions.end(); i != ie; ++i, ++from) {
  111. TVector<Char>::iterator l = invletters.begin();
  112. for (TransitionTable::value_type::iterator j = i->begin(), je = i->end(); j != je; ++j, ++l)
  113. task.Connect(stateIndices[from], *j, *l);
  114. }
  115. return task.Success();
  116. }
  117. // Faster transition table representation for determined FSM
  118. typedef TVector<size_t> DeterminedTransitions;
  119. }
  120. }
  121. #endif