#include #include #include #include // TODO: handle conflicts (itemset_insert returns 2 on table problem) #ifndef XCALLOC_IMPLEMENTED #define XCALLOC_IMPLEMENTED void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); } #else extern void *xcalloc(size_t n, size_t size); #endif // Requirements #include "parts/symbol.h" #include "parts/grammar.h" #include "parts/util-tables.h" // Implements #include "parts/table.h" #define TABLE_CAP 64 static struct action *__table[TABLE_CAP]; struct action **table = __table; size_t table_states = 0; static jmp_buf fail_jmpbuf; static void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); } static void table_deallocate() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); } static int table_insert(size_t state, symbol sym, struct action a); struct item { size_t prod_idx; size_t dot; symbol lookahead; }; static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; } #ifdef _LAZY_LALR static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; } #endif #define SEEN_SETS_CAP 64 static struct { struct item *items; size_t nitems; size_t state; } seen_sets[SEEN_SETS_CAP]; static size_t nseen_sets; static void seen_sets_free() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);} static size_t itemset_handle(struct item *set, size_t nset); static int itemset_insert(size_t state, struct item *initial_set, size_t ninitial); static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout); static void itemset_print(struct item *set, size_t nset) { printf("{"); for(size_t i = 0; i < nset; i++) printf("{%zu, %zu, %d}, ", set[i].prod_idx, set[i].dot, set[i].lookahead); printf("}\n"); } static size_t itemset_handle(struct item *set, size_t nset) { #ifdef _LAZY_LALR size_t use_state = SIZE_MAX; #endif // 1. is set in seen_sets for(size_t i = 0; i < nseen_sets; i++) { if(seen_sets[i].nitems == nset) { int _seen = 0; for(size_t j = 0; j < nset; j++) { _seen = 0; for(size_t k = 0; k < nset; k++) if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1; if(!_seen) break; } if(_seen) return seen_sets[i].state; } #ifdef _LAZY_LALR int _same_core = 1; for(size_t j = 0; j < nset; j++) { for(size_t k = 0; k < seen_sets[i].nitems; k++) if(!item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 0; if(!_same_core) break; } if(_same_core) { use_state = seen_sets[i].state; /*break;*/ } #endif } // 2. add set to seen_sets if(nseen_sets >= SEEN_SETS_CAP) { fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n"); longjmp(fail_jmpbuf, 1); } seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set)); seen_sets[nseen_sets].nitems = nset; for(size_t i = 0; i < nset; i++) seen_sets[nseen_sets].items[i] = set[i]; // 3. insert new state #ifdef _LAZY_LALR size_t new_state = seen_sets[nseen_sets++].state = (use_state != SIZE_MAX) ? use_state : table_states++; #else size_t new_state = seen_sets[nseen_sets++].state = table_states++; #endif if(new_state >= TABLE_CAP) { fprintf(stderr, "ERROR: TABLE_CAP exceeded\n"); longjmp(fail_jmpbuf, 1); } if(itemset_insert(new_state, set, nset)) { fprintf(stderr, "ERROR: itemset_insert failed\n"); longjmp(fail_jmpbuf, 1); } return new_state; } #define CLOSURE_SET_CAP 64 #define GOTO_SET_CAP 32 static int itemset_insert(size_t state, struct item *initial_set, size_t ninitial) { struct item closure_set[CLOSURE_SET_CAP]; size_t nclosure = itemset_closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP); if(nclosure > CLOSURE_SET_CAP) { fprintf(stderr, "ERROR: CLOSURE_SET_CAP exceeded\n"); return 1; } for(size_t sym = 0; sym < total_symbols; sym++) { struct item goto_set[GOTO_SET_CAP]; size_t ngoto = 0; for(size_t j = 0; j < nclosure; j++) { struct production *p = &grammar[closure_set[j].prod_idx]; size_t dot = closure_set[j].dot; if(dot == p->nRHS) { if(sym != 0) continue; // do it 1 time if(table_insert(state, closure_set[j].lookahead, (struct action){ ACTION_REDUCE, closure_set[j].prod_idx})) return 1; continue; } if(p->RHS[dot] == sym) { if(ngoto >= GOTO_SET_CAP) { fprintf(stderr, "ERROR: GOTO_SET_CAP exceeded\n"); return 1; } goto_set[ngoto] = closure_set[j]; goto_set[ngoto++].dot++; } } if(ngoto == 0) continue; if(symbol_is_input_end(sym)) { if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0})) return 1; continue; } size_t new_state = itemset_handle(goto_set, ngoto); if(table_insert(state, sym, (struct action){ symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO, new_state})) return 1; } return 0; } static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max) { size_t nout = nin; if(nout > nout_max) return nout; for(size_t i = 0; i < nin; i++) out_set[i] = in_set[i]; int **is_in_closure = xcalloc(total_productions, sizeof(*is_in_closure)); for(size_t i = 0; i < total_productions; i++) is_in_closure[i] = xcalloc(total_symbols, sizeof(**is_in_closure)); #define add_item(prod_idx, lookahead, ...) do { \ if(is_in_closure[prod_idx][lookahead]) break; \ is_in_closure[prod_idx][lookahead] = 1; \ if(nout++ >= nout_max) goto cleanup; \ out_set[nout-1] = __VA_ARGS__; \ } while(0) for(size_t i = 0; i < nout; i++) { struct production *p = &grammar[out_set[i].prod_idx]; if(out_set[i].dot == p->nRHS) continue; symbol sym = p->RHS[out_set[i].dot]; for(size_t j = 0; j < total_productions; j++) if(grammar[j].LHS == sym) { if(out_set[i].dot + 1 == p->nRHS) { add_item(j, out_set[i].lookahead, (struct item){j, 0, out_set[i].lookahead}); continue; } symbol next_symbol = p->RHS[out_set[i].dot+1]; for(size_t terminal = 0; terminal < total_symbols; terminal++) { if(!symbol_is_terminal(terminal)) continue; if(first[next_symbol][terminal]) add_item(j, terminal, (struct item){j, 0, terminal}); } } } cleanup: for(size_t i = 0; i < total_productions; i++) free(is_in_closure[i]); free(is_in_closure); return nout; } static int table_insert(size_t state, symbol sym, struct action a) { if(table[state][sym].type != ACTION_NOT_SET) #ifdef _LAZY_LALR if(table[state][sym].type != a.type && table[state][sym].arg != a.arg) #endif { fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym); fprintf(stderr, "\t{%d %zu} vs {%d %zu}\n", table[state][sym].type, table[state][sym].arg, a.type, a.arg); return 1; } table[state][sym] = a; return 0; } IMPLEMENT_FUNCPTR(int, table_fill, ()) { table_allocate(); // Possible bug: wrong lookahead for kernel item of state, // but it may not really matter int r = setjmp(fail_jmpbuf); if(r == 0) itemset_handle((struct item[]){{0, 0, 0}}, 1); return r; } IMPLEMENT_FUNCPTR(void, table_free, ()) { seen_sets_free(); table_deallocate(); } #ifdef _CLR_TABLE_STANDALONE #ifndef CHOOSE_GRAMMAR #define CHOOSE_GRAMMAR 1 // 0 or 1 #endif // implement symbol.h enum symbol { #if (CHOOSE_GRAMMAR == 0) ID, EQUAL, STAR, END_INPUT, EP, E, L, R, SYMBOLS_END, #else SC, SD, END_INPUT, EP, E, C, SYMBOLS_END #endif }; size_t total_symbols = SYMBOLS_END; IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } // implement grammar.h #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} static struct production _grammar[] = { #if (CHOOSE_GRAMMAR == 0) PROD(EP, ->, E, END_INPUT), PROD(E, -->, L, EQUAL, R), PROD(E, -->, R), PROD(L, -->, STAR, R), PROD(L, -->, ID), PROD(R, -->, L), #else PROD(EP, ->, E, END_INPUT), PROD(E, -->, C, C), PROD(C, -->, SC, C), PROD(C, -->, SD), #endif }; struct production *grammar = _grammar; size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); // implement util-tables.h #include "util-tables.c" int main(void) { int r = 0; util_tables_fill(); if((r = table_fill())) goto cleanup; table_print(); cleanup: table_free(); util_tables_free(); return r; } #endif /* +----------------------------+ * | CLR | * +---+------------------------+ * | - | 0 1 2 3 4 5 | * +---+------------------------+ * | 0 | s1 s2 g4 g5 | * | 1 | s1 s2 g3 | * | 2 | r3 r3 | * | 3 | r2 r2 | * | 4 | a | * | 5 | s6 s7 g9 | * | 6 | s6 s7 g8 | * | 7 | r3 | * | 8 | r2 | * | 9 | r1 | * +---+------------------------+ * * +----------------------------+ * | LALR | * +---+------------------------+ * | - | 0 1 2 3 4 5 | * +---+------------------------+ * | 0 | s1 s2 g4 g5 | * | 1 | s1 s2 g3 | * | 2 | r3 r3 r3 | * | 3 | r2 r2 r2 | * | 4 | a | * | 5 | s1 s2 g6 | * | 6 | r1 | * +---+------------------------+ */