From 7f796bc571941a9c14eeb3a65d349d628f022275 Mon Sep 17 00:00:00 2001 From: kartofen Date: Mon, 30 Jun 2025 00:26:51 +0300 Subject: CLR yesss --- clr-table.c | 319 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 clr-table.c (limited to 'clr-table.c') diff --git a/clr-table.c b/clr-table.c new file mode 100644 index 0000000..92e415f --- /dev/null +++ b/clr-table.c @@ -0,0 +1,319 @@ +#include +#include + +#ifndef XCALLOC_IMPLEMENTED +#define XCALLOC_IMPLEMENTED +void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); } +#else +extern void *xcalloc(size_t n, size_t size); +#endif + +typedef unsigned int symbol; +extern const size_t total_symbols; + +extern int symbol_is_terminal(symbol s); +extern int symbol_is_input_end(symbol s); + +extern struct production { + symbol LHS; + symbol *RHS; + size_t nRHS; +} grammar[]; +extern const size_t total_productions; + +void grammar_print() +{ + for(size_t i = 0; i < total_productions; i++) { + printf("%d --> ", grammar[i].LHS); + for(size_t j = 0; j < grammar[i].nRHS; j++) + printf("%d ", grammar[i].RHS[j]); + printf("\n"); + } +} + +extern int **first; +extern int **follow; + +struct action { + enum action_type { + ACTION_NOT_SET = 0, ACTION_SHIFT, + ACTION_GOTO, ACTION_REDUCE, + ACTION_ACCEPT + } type; + size_t arg; +}; + +#define TABLE_CAP 64 +struct action *table[TABLE_CAP]; +size_t table_states = 0; + +void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); } +void table_free() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); } +int table_insert(size_t state, symbol sym, struct action a); +void table_print(); + +struct item { + size_t prod_idx; + size_t dot; + symbol lookahead; +}; + +int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; } + +#define SEEN_SETS_CAP 64 +static struct { + struct item *items; + size_t nitems; + size_t state; +} seen_sets[SEEN_SETS_CAP]; +static size_t nseen_sets; + +void seen_sets_free() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);} + +size_t itemset_handle(struct item *set, size_t nset); +int itemset_insert(size_t state, struct item *initial_set, size_t ninitial); +size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout); +void itemset_print(struct item *set, size_t nset) +{ + printf("{"); + for(size_t i = 0; i < nset; i++) + printf("{%zu, %zu, %d}, ", set[i].prod_idx, set[i].dot, set[i].lookahead); + printf("}\n"); +} + +size_t itemset_handle(struct item *set, size_t nset) +{ + // 1. is set in seen_sets + for(size_t i = 0; i < nseen_sets; i++) { + if(seen_sets[i].nitems != nset) continue; + + int _seen = 0; + for(size_t j = 0; j < nset; j++) { + _seen = 0; + for(size_t k = 0; k < nset; k++) + if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1; + if(!_seen) break; + } + if(_seen) return seen_sets[i].state; + } + + // 2. add set to seen_sets + if(nseen_sets >= SEEN_SETS_CAP) { + fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n"); + exit(1); + } + + seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set)); + seen_sets[nseen_sets].nitems = nset; + for(size_t i = 0; i < nset; i++) + seen_sets[nseen_sets].items[i] = set[i]; + + // 3. insert new state + size_t new_state = seen_sets[nseen_sets++].state = table_states++; + if(new_state >= TABLE_CAP) { + fprintf(stderr, "ERROR: TABLE_CAP exceeded\n"); + exit(1); + } + + if(itemset_insert(new_state, set, nset)) { + fprintf(stderr, "ERROR: itemset_insert failed\n"); + exit(1); + } + + return new_state; +} + +#define CLOSURE_SET_CAP 64 +#define GOTO_SET_CAP 32 +int itemset_insert(size_t state, struct item *initial_set, size_t ninitial) +{ + struct item closure_set[CLOSURE_SET_CAP]; + + size_t nclosure = itemset_closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP); + if(nclosure > CLOSURE_SET_CAP) { + fprintf(stderr, "ERROR: CLOSURE_SET_CAP exceeded\n"); + return 1; + } + + for(size_t sym = 0; sym < total_symbols; sym++) { + struct item goto_set[GOTO_SET_CAP]; + size_t ngoto = 0; + + for(size_t j = 0; j < nclosure; j++) { + struct production *p = &grammar[closure_set[j].prod_idx]; + size_t dot = closure_set[j].dot; + + if(dot == p->nRHS) { + if(sym != 0) continue; + if(table_insert(state, closure_set[j].lookahead, (struct action) { + ACTION_REDUCE, closure_set[j].prod_idx})) + return 1; + continue; + } + + if(p->RHS[dot] == sym) { + if(ngoto >= GOTO_SET_CAP) { + fprintf(stderr, "ERROR: GOTO_SET_CAP exceeded\n"); + return 1; + } + goto_set[ngoto] = closure_set[j]; + goto_set[ngoto++].dot++; + } + } + + if(ngoto == 0) continue; + + if(symbol_is_input_end(sym)) { + if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0})) + return 1; + continue; + } + + size_t new_state = itemset_handle(goto_set, ngoto); + + if(table_insert(state, sym, (struct action){ + symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO, + new_state})) return 1; + } + + return 0; +} + +size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max) +{ + size_t nout = nin; + + if(nout > nout_max) return nout; + for(size_t i = 0; i < nin; i++) out_set[i] = in_set[i]; + + int **is_in_closure = xcalloc(total_productions, sizeof(*is_in_closure)); + for(size_t i = 0; i < total_productions; i++) + is_in_closure[i] = xcalloc(total_symbols, sizeof(**is_in_closure)); + +#define add_item(prod_idx, lookahead, ...) do { \ + if(is_in_closure[prod_idx][lookahead]) break; \ + is_in_closure[prod_idx][lookahead] = 1; \ + if(nout++ >= nout_max) goto cleanup; \ + out_set[nout-1] = __VA_ARGS__; \ + } while(0) + + for(size_t i = 0; i < nout; i++) + { + struct production *p = &grammar[out_set[i].prod_idx]; + if(out_set[i].dot == p->nRHS) continue; + symbol sym = p->RHS[out_set[i].dot]; + + for(size_t j = 0; j < total_productions; j++) + if(grammar[j].LHS == sym) { + if(out_set[i].dot + 1 == p->nRHS) { + add_item(j, out_set[i].lookahead, + (struct item){j, 0, out_set[i].lookahead}); + continue; + } + + symbol next_symbol = p->RHS[out_set[i].dot+1]; + for(size_t terminal = 0; terminal < total_symbols; terminal++) { + if(!symbol_is_terminal(terminal)) continue; + if(first[next_symbol][terminal]) + add_item(j, terminal, (struct item){j, 0, terminal}); + } + } + } + +cleanup: + for(size_t i = 0; i < total_productions; i++) free(is_in_closure[i]); + free(is_in_closure); + return nout; +} + +int table_insert(size_t state, symbol sym, struct action a) +{ + if(table[state][sym].type != ACTION_NOT_SET) { + fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym); + fprintf(stderr, "\t{%d %zu} vs {%d %zu}\n", + table[state][sym].type, table[state][sym].arg, + a.type, a.arg); + return 1; + } + + table[state][sym] = a; + return 0; +} + +void table_print() +{ + printf(" "); + for(size_t sym = 0; sym < total_symbols; sym++) printf("%2zu ", sym); + printf("\n"); + + char action_to_char[] = {[ACTION_SHIFT] = 's', [ACTION_REDUCE] = 'r', [ACTION_GOTO] = 'g'}; + for(size_t i = 0; i < table_states; i++) { + printf("%2zu ", i); + for(size_t sym = 0; sym < total_symbols; sym++) + if(table[i][sym].type == ACTION_ACCEPT) printf(" a "); + else if(table[i][sym].type) printf("%c%-2zu ", action_to_char[table[i][sym].type], table[i][sym].arg); + else printf(" "); + printf("\n"); + } +} + +#ifdef _CLR_TABLE_STANDALONE + +#include "util-tables.c" + +#ifndef CHOOSE_GRAMMAR +#define CHOOSE_GRAMMAR 1 // 0 or 1 +#endif + +enum symbol { +#if (CHOOSE_GRAMMAR == 0) + ID, EQUAL, STAR, + END_INPUT, + EP, E, L, R, + SYMBOLS_END, +#else + SC, SD, + END_INPUT, + EP, E, C, + SYMBOLS_END +#endif +}; + +const size_t total_symbols = SYMBOLS_END; + +int symbol_is_terminal(symbol s) { return s < EP; } +int symbol_is_input_end(symbol s) { return s == END_INPUT; } + +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +struct production grammar[] = { +#if (CHOOSE_GRAMMAR == 0) + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, L, EQUAL, R), + PROD(E, -->, R), + PROD(L, -->, STAR, R), + PROD(L, -->, ID), + PROD(R, -->, L), +#else + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, C, C), + PROD(C, -->, SC, C), + PROD(C, -->, SD), +#endif +}; + +const size_t total_productions = sizeof(grammar)/sizeof(*grammar); + +int main(void) +{ + util_tables_fill(); + table_allocate(); + + itemset_handle((struct item[]){{0, 0, END_INPUT}}, 1); + table_print(); + + seen_sets_free(); + table_free(); + util_tables_free(); +} + +#endif -- cgit v1.2.3