antlr3collections.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #ifndef ANTLR3COLLECTIONS_HPP
  2. #define ANTLR3COLLECTIONS_HPP
  3. // [The "BSD licence"]
  4. // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
  5. //
  6. // All rights reserved.
  7. //
  8. // Redistribution and use in source and binary forms, with or without
  9. // modification, are permitted provided that the following conditions
  10. // are met:
  11. // 1. Redistributions of source code must retain the above copyright
  12. // notice, this list of conditions and the following disclaimer.
  13. // 2. Redistributions in binary form must reproduce the above copyright
  14. // notice, this list of conditions and the following disclaimer in the
  15. // documentation and/or other materials provided with the distribution.
  16. // 3. The name of the author may not be used to endorse or promote products
  17. // derived from this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  20. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  21. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  23. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  24. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  28. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. namespace antlr3 {
  30. /* -------------- TRIE Interfaces ---------------- */
  31. /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
  32. */
  33. template< class ImplTraits, class DataType >
  34. class TrieEntry : public ImplTraits::AllocPolicyType
  35. {
  36. public:
  37. typedef typename ImplTraits::AllocPolicyType AllocPolicy;
  38. private:
  39. DataType m_data;
  40. TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */
  41. public:
  42. TrieEntry(const DataType& data, TrieEntry* next);
  43. DataType& get_data();
  44. const DataType& get_data() const;
  45. TrieEntry* get_next() const;
  46. void set_next( TrieEntry* next );
  47. };
  48. /** Structure that defines an element/node in an ANTLR_INT_TRIE
  49. */
  50. template< class ImplTraits, class DataType >
  51. class IntTrieNode : public ImplTraits::AllocPolicyType
  52. {
  53. public:
  54. typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
  55. typedef TrieEntryType BucketsType;
  56. private:
  57. ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */
  58. ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */
  59. BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */
  60. IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */
  61. IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */
  62. public:
  63. IntTrieNode();
  64. ~IntTrieNode();
  65. ANTLR_UINT32 get_bitNum() const;
  66. ANTLR_INTKEY get_key() const;
  67. BucketsType* get_buckets() const;
  68. IntTrieNode* get_leftN() const;
  69. IntTrieNode* get_rightN() const;
  70. void set_bitNum( ANTLR_UINT32 bitNum );
  71. void set_key( ANTLR_INTKEY key );
  72. void set_buckets( BucketsType* buckets );
  73. void set_leftN( IntTrieNode* leftN );
  74. void set_rightN( IntTrieNode* rightN );
  75. };
  76. /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
  77. * as you might expect, the key is turned into a "string" by looking at bit(key, depth)
  78. * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
  79. * and potentially a huge trie. This is the algorithm for a Patricia Trie.
  80. * Note also that this trie [can] accept multiple entries for the same key and is
  81. * therefore a kind of elastic bucket patricia trie.
  82. *
  83. * If you find this code useful, please feel free to 'steal' it for any purpose
  84. * as covered by the BSD license under which ANTLR is issued. You can cut the code
  85. * but as the ANTLR library is only about 50K (Windows Vista), you might find it
  86. * easier to just link the library. Please keep all comments and licenses and so on
  87. * in any version of this you create of course.
  88. *
  89. * Jim Idle.
  90. *
  91. */
  92. class IntTrieBase
  93. {
  94. public:
  95. static const ANTLR_UINT8* get_bitIndex();
  96. static const ANTLR_UINT64* get_bitMask();
  97. };
  98. template< class ImplTraits, class DataType >
  99. class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase
  100. {
  101. public:
  102. typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
  103. typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType;
  104. private:
  105. IntTrieNodeType* m_root; /* Root node of this integer trie */
  106. IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */
  107. ANTLR_UINT32 m_count; /* Current entry count */
  108. bool m_allowDups; /* Whether this trie accepts duplicate keys */
  109. public:
  110. /* INT TRIE Implementation of depth 64 bits, being the number of bits
  111. * in a 64 bit integer.
  112. */
  113. IntTrie( ANTLR_UINT32 depth );
  114. /** Search the int Trie and return a pointer to the first bucket indexed
  115. * by the key if it is contained in the trie, otherwise NULL.
  116. */
  117. TrieEntryType* get( ANTLR_INTKEY key);
  118. bool del( ANTLR_INTKEY key);
  119. /** Add an entry into the INT trie.
  120. * Basically we descend the trie as we do when searching it, which will
  121. * locate the only node in the trie that can be reached by the bit pattern of the
  122. * key. If the key is actually at that node, then if the trie accepts duplicates
  123. * we add the supplied data in a new chained bucket to that data node. If it does
  124. * not accept duplicates then we merely return FALSE in case the caller wants to know
  125. * whether the key was already in the trie.
  126. * If the node we locate is not the key we are looking to add, then we insert a new node
  127. * into the trie with a bit index of the leftmost differing bit and the left or right
  128. * node pointing to itself or the data node we are inserting 'before'.
  129. */
  130. bool add( ANTLR_INTKEY key, const DataType& data );
  131. ~IntTrie();
  132. };
  133. /**
  134. * A topological sort system that given a set of dependencies of a node m on node n,
  135. * can sort them in dependency order. This is a generally useful utility object
  136. * that does not care what the things are it is sorting. Generally the set
  137. * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
  138. * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
  139. * the vector entries in place, as well as a sort method that just returns an
  140. * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
  141. * some set of your own device.
  142. *
  143. * Of the two main algorithms that could be used, I chose to use the depth first
  144. * search for unvisited nodes as a) This runs in linear time, and b) it is what
  145. * we used in the ANTLR Tool to perform a topological sort of the input grammar files
  146. * based on their dependencies.
  147. */
  148. template<class ImplTraits>
  149. class Topo : public ImplTraits::AllocPolicyType
  150. {
  151. public:
  152. typedef typename ImplTraits::BitsetType BitsetType;
  153. typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
  154. private:
  155. /**
  156. * A vector of vectors of edges, built by calling the addEdge method()
  157. * to indicate that node number n depends on node number m. Each entry in the vector
  158. * contains a bitset, which has a bit index set for each node upon which the
  159. * entry node depends.
  160. */
  161. BitsetType** m_edges;
  162. /**
  163. * A vector used to build up the sorted output order. Note that
  164. * as the vector contains UINT32 then the maximum node index is
  165. * 'limited' to 2^32, as nodes should be zero based.
  166. */
  167. ANTLR_UINT32* m_sorted;
  168. /**
  169. * A vector used to detect cycles in the edge dependecies. It is used
  170. * as a stack and each time we descend a node to one of its edges we
  171. * add the node into this stack. If we find a node that we have already
  172. * visited in the stack, then it means there wasa cycle such as 9->8->1->9
  173. * as the only way a node can be on the stack is if we are currently
  174. * descnding from it as we remove it from the stack as we exit from
  175. * descending its dependencies
  176. */
  177. ANTLR_UINT32* m_cycle;
  178. /**
  179. * A flag that indicates the algorithm found a cycle in the edges
  180. * such as 9->8->1->9
  181. * If this flag is set after you have called one of the sort routines
  182. * then the detected cycle will be contained in the cycle array and
  183. * cycleLimit will point to the one after the last entry in the cycle.
  184. */
  185. bool m_hasCycle;
  186. /**
  187. * A watermark used to accumulate potential cycles in the cycle array.
  188. * This should be zero when we are done. Check hasCycle after calling one
  189. * of the sort methods and if it is true then you can find the cycle
  190. * in cycle[0]...cycle[cycleMark-1]
  191. */
  192. ANTLR_UINT32 m_cycleMark;
  193. /**
  194. * One more than the largest node index that is contained in edges/sorted.
  195. */
  196. ANTLR_UINT32 m_limit;
  197. /**
  198. * The set of visited nodes as determined by a set entry in
  199. * the bitmap.
  200. */
  201. BitsetType* m_visited;
  202. public:
  203. Topo();
  204. /**
  205. * A method that adds an edge from one node to another. An edge
  206. * of n -> m indicates that node n is dependent on node m. Note that
  207. * while building these edges, it is perfectly OK to add nodes out of
  208. * sequence. So, if you have edges:
  209. *
  210. * 3 -> 0
  211. * 2 -> 1
  212. * 1 -> 3
  213. *
  214. * The you can add them in that order and so add node 3 before nodes 2 and 1
  215. *
  216. */
  217. void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency);
  218. /**
  219. * A method that returns a pointer to an array of sorted node indexes.
  220. * The array is sorted in topological sorted order. Note that the array
  221. * is only as large as the largest node index you created an edge for. This means
  222. * that if you had an input of 32 nodes, but that largest node with an edge
  223. * was 16, then the returned array will be the sorted order of the first 16
  224. * nodes and the last 16 nodes of your array are basically fine as they are
  225. * as they had no dependencies and do not need any particular sort order.
  226. *
  227. * NB: If the structure that contains the array is freed, then the sorted
  228. * array will be freed too so you should use the value of limit to
  229. * make a long term copy of this array if you do not want to keep the topo
  230. * structure around as well.
  231. */
  232. ANTLR_UINT32* sortToArray();
  233. /**
  234. * A method that sorts the supplied ANTLR3_VECTOR in place based
  235. * on the previously supplied edge data.
  236. */
  237. template<typename DataType>
  238. void sortVector( typename ImplTraits::template VectorType<DataType>& v);
  239. void DFS(ANTLR_UINT32 node);
  240. /**
  241. * A method to free this structure and any associated memory.
  242. */
  243. ~Topo();
  244. };
  245. }
  246. #include "antlr3collections.inl"
  247. #endif