aboutsummaryrefslogtreecommitdiff
path: root/lr0-table.c
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2025-06-21 23:52:33 +0300
committerkartofen <mladenovnasko0@gmail.com>2025-06-21 23:52:33 +0300
commit1cbaa39e3cc7e8948cbd5be5f2d18170fcdebfe0 (patch)
treee6dfb5bd8788664d48b04165e5aa2fc3b0590797 /lr0-table.c
parent333acd33d8fa58ca463677b704153a90fc8fad44 (diff)
make lr0 and lexer modular
Diffstat (limited to 'lr0-table.c')
-rw-r--r--lr0-table.c326
1 files changed, 326 insertions, 0 deletions
diff --git a/lr0-table.c b/lr0-table.c
new file mode 100644
index 0000000..721bdb5
--- /dev/null
+++ b/lr0-table.c
@@ -0,0 +1,326 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MAX_SYMBOLS
+#define MAX_SYMBOLS 64
+#endif
+
+#ifdef XCALLOC_IMPLEMENTED
+extern void *xcalloc(size_t n, size_t size);
+#else
+void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+#endif
+
+typedef unsigned int symbol;
+
+extern const size_t total_symbols;
+extern int symbol_is_terminal(symbol s);
+extern int symbol_is_nonterminal(symbol s);
+extern int symbol_is_input_end(symbol s);
+
+extern struct production {
+ symbol LHS;
+ symbol *RHS;
+ size_t nRHS;
+} grammar[];
+extern const size_t total_productions;
+
+void grammar_print()
+{
+ for(size_t i = 0; i < total_productions; i++) {
+ printf("%d --> ", grammar[i].LHS);
+ for(size_t j = 0; j < grammar[i].nRHS; j++)
+ printf("%d ", grammar[i].RHS[j]);
+ printf("\n");
+ }
+}
+
+static int follow[MAX_SYMBOLS][MAX_SYMBOLS];
+
+void follow_table_fill();
+void follow_table_print();
+
+struct action {
+ enum action_type {
+ ACTION_NOT_SET = 0, ACTION_SHIFT,
+ ACTION_GOTO, ACTION_REDUCE,
+ ACTION_ACCEPT
+ } type;
+ size_t arg;
+};
+
+#define TABLE_CAP 64
+static struct action table[TABLE_CAP][MAX_SYMBOLS]; // *(table[total_symbols]);
+static size_t table_states;
+
+int table_insert(size_t state, symbol sym, struct action a);
+void table_print();
+
+struct item {
+ size_t prod_idx;
+ size_t dot;
+};
+
+int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
+
+#define SEEN_SETS_CAP 64
+static struct {
+ struct item *items;
+ size_t nitems;
+ size_t state;
+} seen_sets[SEEN_SETS_CAP];
+static size_t nseen_sets;
+
+void seen_sets_free() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);}
+
+size_t itemset_handle(struct item *set, size_t nset);
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial);
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout);
+void itemset_print(struct item *set, size_t nset)
+{
+ printf("{");
+ for(size_t i = 0; i < nset; i++)
+ printf("{%zu, %zu}, ", set[i].prod_idx, set[i].dot);
+ printf("}\n");
+}
+
+size_t itemset_handle(struct item *set, size_t nset)
+{
+ // 1. is set in seen_sets
+ for(size_t i = 0; i < nseen_sets; i++) {
+ if(seen_sets[i].nitems != nset) continue;
+
+ int _seen = 0;
+ for(size_t j = 0; j < nset; j++) {
+ _seen = 0;
+ for(size_t k = 0; k < nset; k++)
+ if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
+ if(!_seen) break;
+ }
+ if(_seen) return seen_sets[i].state;
+ }
+
+ // 2. add set to seen_sets
+ if(nseen_sets >= SEEN_SETS_CAP) {
+ fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n");
+ exit(1);
+ }
+
+ seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set));
+ seen_sets[nseen_sets].nitems = nset;
+ for(size_t i = 0; i < nset; i++)
+ seen_sets[nseen_sets].items[i] = set[i];
+
+ // 3. insert new state
+ size_t new_state = seen_sets[nseen_sets++].state = table_states++;
+ if(new_state >= TABLE_CAP) {
+ fprintf(stderr, "ERROR: TABLE_CAP exceeded\n");
+ exit(1);
+ }
+
+ if(itemset_insert(new_state, set, nset)) {
+ fprintf(stderr, "ERROR: itemset_insert failed\n");
+ exit(1);
+ }
+
+ return new_state;
+}
+
+#define CLOSURE_SET_CAP 64
+#define GOTO_SET_CAP 32
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial)
+{
+ struct item closure_set[CLOSURE_SET_CAP];
+
+ size_t nclosure = itemset_closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP);
+ if(nclosure > CLOSURE_SET_CAP) {
+ fprintf(stderr, "ERROR: CLOSURE_SET_CAP exceeded\n");
+ return 1;
+ }
+
+ for(size_t sym = 0; sym < total_symbols; sym++) {
+ struct item goto_set[GOTO_SET_CAP];
+ size_t ngoto = 0;
+
+ for(size_t j = 0; j < nclosure; j++) {
+ struct production *p = &grammar[closure_set[j].prod_idx];
+ size_t dot = closure_set[j].dot;
+
+ if(dot == p->nRHS) {
+ if(!follow[p->LHS][sym]) continue;
+ if(table_insert(state, sym, (struct action){
+ ACTION_REDUCE, closure_set[j].prod_idx}))
+ return 1;
+ continue;
+ }
+
+ if(p->RHS[dot] == sym) {
+ if(ngoto >= GOTO_SET_CAP) {
+ fprintf(stderr, "ERROR: GOTO_SET_CAP exceeded\n");
+ return 1;
+ }
+ goto_set[ngoto] = closure_set[j];
+ goto_set[ngoto++].dot++;
+ }
+ }
+
+ if(ngoto == 0) continue;
+
+ if(symbol_is_input_end(sym)) {
+ if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0}))
+ return 1;
+ continue;
+ }
+
+ size_t new_state = itemset_handle(goto_set, ngoto);
+
+ if(table_insert(state, sym, (struct action){
+ symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO,
+ new_state})) return 1;
+ }
+
+ return 0;
+}
+
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max)
+{
+ size_t nout = nin;
+
+ if(nout > nout_max) return nout;
+ for(size_t i = 0; i < nin; i++) out_set[i] = in_set[i];
+
+ int *is_in_closure = xcalloc(total_productions, sizeof(int));
+ for(size_t i = 0; i < nout; i++)
+ {
+ struct production *p = &grammar[out_set[i].prod_idx];
+ if(out_set[i].dot == p->nRHS) continue;
+ symbol sym = p->RHS[out_set[i].dot];
+
+ for(size_t j = 0; j < total_productions; j++)
+ if(grammar[j].LHS == sym) {
+ if(is_in_closure[j]) continue;
+ is_in_closure[j] = 1;
+
+ if(nout++ >= nout_max) goto cleanup;
+ out_set[nout-1] = (struct item){j, 0};
+ }
+ }
+
+cleanup:
+ free(is_in_closure);
+ return nout;
+}
+
+void follow_table_fill()
+{
+ for(size_t i = 0; i < total_productions; i++) {
+ struct production *p = &grammar[i];
+ for(size_t j = 1; j < p->nRHS; j++)
+ follow[p->RHS[j-1]][p->RHS[j]] = 1;
+ }
+
+#define set(e) if((e) != 1) changed = (e) = 1
+
+ int changed;
+ do {
+ changed = 0;
+ for(size_t i = 0; i < total_productions; i++)
+ for(size_t sym = 0; sym < total_symbols; sym++) {
+ struct production *p = &grammar[i];
+ if(follow[p->LHS][sym])
+ set(follow[p->RHS[p->nRHS-1]][sym]);
+ if(follow[sym][p->LHS])
+ set(follow[sym][p->RHS[0]]);
+ }
+ } while(changed);
+}
+
+void follow_table_print()
+{
+ printf(" ");
+ for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
+ printf("\n");
+
+ for(size_t i = 0; i < total_symbols; i++) {
+ printf("%2zu ", i);
+ for(size_t j = 0; j < total_symbols; j++)
+ printf(" %s ", follow[i][j] ? "x" : " ");
+ printf("\n");
+ }
+}
+
+int table_insert(size_t state, symbol sym, struct action a)
+{
+ if(table[state][sym].type != ACTION_NOT_SET) {
+ fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym);
+ fprintf(stderr, "\t{%d %zu} vs {%d %zu}",
+ table[state][sym].type, table[state][sym].arg,
+ a.type, a.arg);
+ return 1;
+ }
+
+ table[state][sym] = a;
+ return 0;
+}
+
+void table_print()
+{
+ printf(" ");
+ for(size_t sym = 0; sym < total_symbols; sym++) printf("%2zu ", sym);
+ printf("\n");
+
+ char action_to_char[] = {[ACTION_SHIFT] = 's', [ACTION_REDUCE] = 'r', [ACTION_GOTO] = 'g'};
+ for(size_t i = 0; i < table_states; i++) {
+ printf("%2zu ", i);
+ for(size_t sym = 0; sym < total_symbols; sym++)
+ if(table[i][sym].type == ACTION_ACCEPT) printf(" a ");
+ else if(table[i][sym].type) printf("%c%-2zu ", action_to_char[table[i][sym].type], table[i][sym].arg);
+ else printf(" ");
+ printf("\n");
+ }
+}
+
+#ifdef _LR0_TABLE_STANDALONE
+
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ N0, N1,
+ END_INPUT,
+
+ EP, E, T, N,
+ SYMBOLS_END,
+};
+const size_t total_symbols = SYMBOLS_END;
+
+int symbol_is_terminal(symbol s) { return s < E; }
+int symbol_is_nonterminal(symbol s) { return s >= E; }
+int symbol_is_input_end(symbol s) { return s == END_INPUT; }
+
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+struct production grammar[] = {
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, E, PLUS, T),
+ PROD(E, -->, E, MINUS, T),
+ PROD(E, -->, T),
+ PROD(T, -->, LPAREN, E, RPAREN),
+ PROD(T, -->, N),
+ PROD(N, -->, N0),
+ PROD(N, -->, N1),
+};
+
+const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
+
+int main(void)
+{
+ follow_table_fill();
+
+ itemset_handle((struct item[]){{0, 0}}, 1);
+ table_print();
+
+ seen_sets_free();
+}
+
+#endif