lr0.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /* Generate the LR(0) parser states for Bison.
  2. Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2021 Free
  3. Software Foundation, Inc.
  4. This file is part of Bison, the GNU Compiler Compiler.
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  15. /* See comments in state.h for the data structures that represent it.
  16. The entry point is generate_states. */
  17. #include <config.h>
  18. #include "system.h"
  19. #include <bitset.h>
  20. #include "closure.h"
  21. #include "complain.h"
  22. #include "getargs.h"
  23. #include "gram.h"
  24. #include "lalr.h"
  25. #include "lr0.h"
  26. #include "reader.h"
  27. #include "reduce.h"
  28. #include "state.h"
  29. #include "symtab.h"
  30. typedef struct state_list
  31. {
  32. struct state_list *next;
  33. state *state;
  34. } state_list;
  35. static state_list *first_state = NULL;
  36. static state_list *last_state = NULL;
  37. /* Print CORE for debugging. */
  38. static void
  39. core_print (size_t core_size, item_index *core, FILE *out)
  40. {
  41. for (int i = 0; i < core_size; ++i)
  42. {
  43. item_print (ritem + core[i], NULL, out);
  44. fputc ('\n', out);
  45. }
  46. }
  47. /*-----------------------------------------------------------------.
  48. | A state was just discovered by transitioning on SYM from another |
  49. | state. Queue this state for later examination, in order to find |
  50. | its outgoing transitions. Return it. |
  51. `-----------------------------------------------------------------*/
  52. static state *
  53. state_list_append (symbol_number sym, size_t core_size, item_index *core)
  54. {
  55. state_list *node = xmalloc (sizeof *node);
  56. state *res = state_new (sym, core_size, core);
  57. if (trace_flag & trace_automaton)
  58. fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
  59. nstates, sym, symbols[sym]->tag);
  60. node->next = NULL;
  61. node->state = res;
  62. if (!first_state)
  63. first_state = node;
  64. if (last_state)
  65. last_state->next = node;
  66. last_state = node;
  67. return res;
  68. }
  69. /* Symbols that can be "shifted" (including nonterminals) from the
  70. current state. */
  71. bitset shift_symbol;
  72. static rule **redset;
  73. /* For the current state, the list of pointers to states that can be
  74. reached via a shift/goto. Could be indexed by the reaching symbol,
  75. but labels of incoming transitions can be recovered by the state
  76. itself. */
  77. static state **shiftset;
  78. /* KERNEL_BASE[symbol-number] -> list of item indices (offsets inside
  79. RITEM) of length KERNEL_SIZE[symbol-number]. */
  80. static item_index **kernel_base;
  81. static int *kernel_size;
  82. /* A single dimension array that serves as storage for
  83. KERNEL_BASE. */
  84. static item_index *kernel_items;
  85. static void
  86. allocate_itemsets (void)
  87. {
  88. /* Count the number of occurrences of all the symbols in RITEMS.
  89. Note that useless productions (hence useless nonterminals) are
  90. browsed too, hence we need to allocate room for _all_ the
  91. symbols. */
  92. size_t count = 0;
  93. size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
  94. sizeof *symbol_count);
  95. for (rule_number r = 0; r < nrules; ++r)
  96. for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
  97. {
  98. symbol_number sym = item_number_as_symbol_number (*rhsp);
  99. count += 1;
  100. symbol_count[sym] += 1;
  101. }
  102. /* See comments before new_itemsets. All the vectors of items
  103. live inside KERNEL_ITEMS. The number of active items after
  104. some symbol S cannot be more than the number of times that S
  105. appears as an item, which is SYMBOL_COUNT[S].
  106. We allocate that much space for each symbol. */
  107. kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
  108. kernel_items = xnmalloc (count, sizeof *kernel_items);
  109. count = 0;
  110. for (symbol_number i = 0; i < nsyms; i++)
  111. {
  112. kernel_base[i] = kernel_items + count;
  113. count += symbol_count[i];
  114. }
  115. free (symbol_count);
  116. kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
  117. }
  118. /* Print the current kernel (in KERNEL_BASE). */
  119. static void
  120. kernel_print (FILE *out)
  121. {
  122. for (symbol_number i = 0; i < nsyms; ++i)
  123. if (kernel_size[i])
  124. {
  125. fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
  126. core_print (kernel_size[i], kernel_base[i], out);
  127. }
  128. }
  129. /* Make sure the kernel is in sane state. */
  130. static void
  131. kernel_check (void)
  132. {
  133. for (symbol_number i = 0; i < nsyms - 1; ++i)
  134. assert (kernel_base[i] + kernel_size[i] <= kernel_base[i + 1]);
  135. }
  136. static void
  137. allocate_storage (void)
  138. {
  139. allocate_itemsets ();
  140. shiftset = xnmalloc (nsyms, sizeof *shiftset);
  141. redset = xnmalloc (nrules, sizeof *redset);
  142. state_hash_new ();
  143. shift_symbol = bitset_create (nsyms, BITSET_FIXED);
  144. }
  145. static void
  146. free_storage (void)
  147. {
  148. bitset_free (shift_symbol);
  149. free (redset);
  150. free (shiftset);
  151. free (kernel_base);
  152. free (kernel_size);
  153. free (kernel_items);
  154. state_hash_free ();
  155. }
  156. /*------------------------------------------------------------------.
  157. | Find which term/nterm symbols can be "shifted" in S, and for each |
  158. | one record which items would be active after that transition. |
  159. | Uses the contents of itemset. |
  160. | |
  161. | shift_symbol is a bitset of the term/nterm symbols that can be |
  162. | shifted. For each symbol in the grammar, kernel_base[symbol] |
  163. | points to a vector of item numbers activated if that symbol is |
  164. | shifted, and kernel_size[symbol] is their numbers. |
  165. | |
  166. | itemset is sorted on item index in ritem, which is sorted on rule |
  167. | number. Compute each kernel_base[symbol] with the same sort. |
  168. `------------------------------------------------------------------*/
  169. static void
  170. new_itemsets (state *s)
  171. {
  172. if (trace_flag & trace_automaton)
  173. fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
  174. memset (kernel_size, 0, nsyms * sizeof *kernel_size);
  175. bitset_zero (shift_symbol);
  176. if (trace_flag & trace_automaton)
  177. {
  178. fprintf (stderr, "initial kernel:\n");
  179. kernel_print (stderr);
  180. }
  181. for (size_t i = 0; i < nitemset; ++i)
  182. if (item_number_is_symbol_number (ritem[itemset[i]]))
  183. {
  184. if (trace_flag & trace_automaton)
  185. {
  186. fputs ("working on: ", stderr);
  187. item_print (ritem + itemset[i], NULL, stderr);
  188. fputc ('\n', stderr);
  189. }
  190. symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
  191. bitset_set (shift_symbol, sym);
  192. kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
  193. kernel_size[sym]++;
  194. }
  195. if (trace_flag & trace_automaton)
  196. {
  197. fprintf (stderr, "final kernel:\n");
  198. kernel_print (stderr);
  199. fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
  200. }
  201. kernel_check ();
  202. }
  203. /*--------------------------------------------------------------.
  204. | Find the state we would get to (from the current state) by |
  205. | shifting SYM. Create a new state if no equivalent one exists |
  206. | already. Used by append_states. |
  207. `--------------------------------------------------------------*/
  208. static state *
  209. get_state (symbol_number sym, size_t core_size, item_index *core)
  210. {
  211. if (trace_flag & trace_automaton)
  212. {
  213. fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
  214. sym, symbols[sym]->tag);
  215. core_print (core_size, core, stderr);
  216. fputc ('\n', stderr);
  217. }
  218. state *s = state_hash_lookup (core_size, core);
  219. if (!s)
  220. s = state_list_append (sym, core_size, core);
  221. if (trace_flag & trace_automaton)
  222. fprintf (stderr, "Exiting get_state => %d\n", s->number);
  223. return s;
  224. }
  225. /*---------------------------------------------------------------.
  226. | Use the information computed by new_itemsets to find the state |
  227. | numbers reached by each shift transition from S. |
  228. | |
  229. | SHIFTSET is set up as a vector of those states. |
  230. `---------------------------------------------------------------*/
  231. static void
  232. append_states (state *s)
  233. {
  234. if (trace_flag & trace_automaton)
  235. fprintf (stderr, "append_states: begin: state = %d\n", s->number);
  236. bitset_iterator iter;
  237. symbol_number sym;
  238. int i = 0;
  239. BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
  240. {
  241. shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
  242. ++i;
  243. }
  244. if (trace_flag & trace_automaton)
  245. fprintf (stderr, "append_states: end: state = %d\n", s->number);
  246. }
  247. /*----------------------------------------------------------------.
  248. | Find which rules can be used for reduction transitions from the |
  249. | current state and make a reductions structure for the state to |
  250. | record their rule numbers. |
  251. `----------------------------------------------------------------*/
  252. static void
  253. save_reductions (state *s)
  254. {
  255. int count = 0;
  256. /* Find and count the active items that represent ends of rules. */
  257. for (size_t i = 0; i < nitemset; ++i)
  258. {
  259. item_number item = ritem[itemset[i]];
  260. if (item_number_is_rule_number (item))
  261. {
  262. rule_number r = item_number_as_rule_number (item);
  263. redset[count++] = &rules[r];
  264. if (r == 0)
  265. {
  266. /* This is "reduce 0", i.e., accept. */
  267. aver (!final_state);
  268. final_state = s;
  269. }
  270. }
  271. }
  272. if (trace_flag & trace_automaton)
  273. {
  274. fprintf (stderr, "reduction[%d] = {\n", s->number);
  275. for (int i = 0; i < count; ++i)
  276. {
  277. rule_print (redset[i], NULL, stderr);
  278. fputc ('\n', stderr);
  279. }
  280. fputs ("}\n", stderr);
  281. }
  282. /* Make a reductions structure and copy the data into it. */
  283. state_reductions_set (s, count, redset);
  284. }
  285. /*---------------.
  286. | Build STATES. |
  287. `---------------*/
  288. static void
  289. set_states (void)
  290. {
  291. states = xcalloc (nstates, sizeof *states);
  292. while (first_state)
  293. {
  294. state_list *this = first_state;
  295. /* Pessimization, but simplification of the code: make sure all
  296. the states have valid transitions and reductions members,
  297. even if reduced to 0. It is too soon for errs, which are
  298. computed later, but set_conflicts. */
  299. state *s = this->state;
  300. if (!s->transitions)
  301. state_transitions_set (s, 0, 0);
  302. if (!s->reductions)
  303. state_reductions_set (s, 0, 0);
  304. states[s->number] = s;
  305. first_state = this->next;
  306. free (this);
  307. }
  308. first_state = NULL;
  309. last_state = NULL;
  310. }
  311. /*-------------------------------------------------------------------.
  312. | Compute the LR(0) parser states (see state.h for details) from the |
  313. | grammar. |
  314. `-------------------------------------------------------------------*/
  315. void
  316. generate_states (void)
  317. {
  318. allocate_storage ();
  319. closure_new (nritems);
  320. /* Create the initial state. The 0 at the lhs is the index of the
  321. item of this initial rule. */
  322. item_index initial_core = 0;
  323. state_list_append (0, 1, &initial_core);
  324. /* States are queued when they are created; process them all. */
  325. for (state_list *list = first_state; list; list = list->next)
  326. {
  327. state *s = list->state;
  328. if (trace_flag & trace_automaton)
  329. fprintf (stderr, "Processing state %d (reached by %s)\n",
  330. s->number,
  331. symbols[s->accessing_symbol]->tag);
  332. /* Set up itemset for the transitions out of this state. itemset gets a
  333. vector of all the items that could be accepted next. */
  334. closure (s->items, s->nitems);
  335. /* Record the reductions allowed out of this state. */
  336. save_reductions (s);
  337. /* Find the itemsets of the states that shifts/gotos can reach. */
  338. new_itemsets (s);
  339. /* Find or create the core structures for those states. */
  340. append_states (s);
  341. /* Create the shifts structures for the shifts to those states,
  342. now that the state numbers transitioning to are known. */
  343. state_transitions_set (s, bitset_count (shift_symbol), shiftset);
  344. }
  345. /* discard various storage */
  346. free_storage ();
  347. /* Set up STATES. */
  348. set_states ();
  349. }