From 7743cb4f8a06ab79a521c4346aac74b47c8ce224 Mon Sep 17 00:00:00 2001 From: kartofen Date: Mon, 30 Jun 2025 19:51:44 +0300 Subject: minimal lr parser --- build.sh | 8 +++- clr-table.c | 2 +- fusion.c | 60 +++++++++++++++++++++++++ lr-parser.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ slr-table.c | 7 +++ 5 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 fusion.c create mode 100644 lr-parser.c diff --git a/build.sh b/build.sh index 855d574..2680884 100755 --- a/build.sh +++ b/build.sh @@ -18,11 +18,15 @@ function leak # cc recursive/recursive-ascent-descent # cc util-tables -D_UTIL_TABLES_STANDALONE # cc slr-table -D_SLR_TABLE_STANDALONE -cc clr-table -D_CLR_TABLE_STANDALONE +# cc clr-table -D_CLR_TABLE_STANDALONE +# cc lr-parser -D_LR_PARSER_STANDALONE -std=gnu99 +cc fusion # leak lexer # leak recursive-ascent # leak recursive-ascent-descent # leak util-tables # leak slr-table -leak clr-table +# leak clr-table +# leak lr-parser +leak fusion diff --git a/clr-table.c b/clr-table.c index 92e415f..b6bd47b 100644 --- a/clr-table.c +++ b/clr-table.c @@ -144,7 +144,7 @@ int itemset_insert(size_t state, struct item *initial_set, size_t ninitial) size_t dot = closure_set[j].dot; if(dot == p->nRHS) { - if(sym != 0) continue; + if(sym != 0) continue; // do it 1 time if(table_insert(state, closure_set[j].lookahead, (struct action) { ACTION_REDUCE, closure_set[j].prod_idx})) return 1; diff --git a/fusion.c b/fusion.c new file mode 100644 index 0000000..1e4579f --- /dev/null +++ b/fusion.c @@ -0,0 +1,60 @@ +#include +#include + +#include "util-tables.c" +#include "slr-table.c" +#include "lr-parser.c" + +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + N0, N1, + END_INPUT, + + EP, E, T, N, + SYMBOLS_END, +}; + +const size_t total_symbols = SYMBOLS_END; + +int symbol_is_terminal(symbol s) { return s < EP; } +int symbol_is_input_end(symbol s) { return s == END_INPUT; } +int symbol_is_valid(symbol s) { return s < SYMBOLS_END; } + +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +struct production grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, E, PLUS, T), + PROD(E, -->, E, MINUS, T), + PROD(E, -->, T), + PROD(T, -->, LPAREN, E, RPAREN), + PROD(T, -->, N), + PROD(N, -->, N0), + PROD(N, -->, N1), +}; + +const size_t total_productions = sizeof(grammar)/sizeof(*grammar); + +symbol toklist[] = {N0, PLUS, N1, MINUS, N0, N0, END_INPUT}; +symbol *tok = toklist; + +symbol toklist_eat() { return *(tok++); } +symbol toklist_peek() { return *tok; } + +int main(void) +{ + util_tables_fill(); + table_allocate(); + + itemset_handle((struct item[]){{0, 0}}, 1); + + lr_parser() && (exit(1), 1); + + seen_sets_free(); + table_free(); + util_tables_free(); + + return r; +} diff --git a/lr-parser.c b/lr-parser.c new file mode 100644 index 0000000..3b0ecb7 --- /dev/null +++ b/lr-parser.c @@ -0,0 +1,145 @@ +#include +#include + +typedef unsigned int symbol; +// extern const size_t total_symbols; + +// extern int symbol_is_terminal(symbol s); +// extern int symbol_is_input_end(symbol s); +extern int symbol_is_valid(symbol s); + +extern struct production { + symbol LHS; + symbol *RHS; + size_t nRHS; +} grammar[]; +extern const size_t total_productions; +// extern void grammar_print(); + +struct action { + enum action_type { + ACTION_NOT_SET = 0, ACTION_SHIFT, + ACTION_GOTO, ACTION_REDUCE, + ACTION_ACCEPT + } type; + size_t arg; +}; + +extern struct action *table[]; +extern size_t table_states; +// extern void table_print(); + +extern symbol toklist_eat(); +extern symbol toklist_peek(); + +typedef int stack_item; + +#define STACK_CAP 128 +stack_item stack_bottom[STACK_CAP]; +stack_item *stack_head = stack_bottom; + +int lr_parser() +{ +#define push(item) ({ if(++stack_head - stack_bottom < STACK_CAP ) (*stack_head = item); else { fprintf(stderr, "ERROR: STACK_CAP exceeded"); return 1; }}) +#define pop() (--stack_head) +#define eat() toklist_eat() +#define peek() ({ symbol s = toklist_peek(); if(!symbol_is_valid(s)) return 1; s; }) + + while(1) { + struct action a = table[(size_t)*stack_head][peek()]; + + switch(a.type) { + case ACTION_SHIFT: + push(eat()); + push(a.arg); + break; + case ACTION_REDUCE: + for(size_t i = 0; i < 2*grammar[a.arg].nRHS; i++) pop(); + + symbol lhs = grammar[a.arg].LHS; + struct action a_goto = table[(size_t)*stack_head][lhs]; + if(a_goto.type != ACTION_GOTO) { + fprintf(stderr, + "ERROR: Expected goto action for symbol %d at state %zu", + lhs, (size_t)*stack_head); + return 1; + } + + push(lhs); + push(a_goto.arg); + break; + case ACTION_ACCEPT: + return 0; + case ACTION_NOT_SET: + default: + fprintf(stderr, + "ERROR: Unexpected error symbol %d at state %zu", + peek(), (size_t)*stack_head); + return 1; + } + } + + return 1; +} + +#ifdef _LR_PARSER_STANDALONE + +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + N0, N1, + END_INPUT, + + EP, E, T, N, + SYMBOLS_END, +}; + +const size_t total_symbols = SYMBOLS_END; + +int symbol_is_valid(symbol s) { return s < SYMBOLS_END; } + +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +struct production grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, E, PLUS, T), + PROD(E, -->, E, MINUS, T), + PROD(E, -->, T), + PROD(T, -->, LPAREN, E, RPAREN), + PROD(T, -->, N), + PROD(N, -->, N0), + PROD(N, -->, N1), +}; + +const size_t total_productions = sizeof(grammar)/sizeof(*grammar); + +struct action *table[] = { + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 12},{2, 11},{2, 7},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 4},{2, 11},{2, 7},}, + (struct action[]){{3, 6},{3, 6},{0, 0},{3, 6},{0, 0},{0, 0},{3, 6},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 7},{3, 7},{0, 0},{3, 7},{0, 0},{0, 0},{3, 7},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{1, 5},{1, 8},{0, 0},{1, 10},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 6},{2, 7},}, + (struct action[]){{3, 1},{3, 1},{0, 0},{3, 1},{0, 0},{0, 0},{3, 1},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 5},{3, 5},{0, 0},{3, 5},{0, 0},{0, 0},{3, 5},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 9},{2, 7},}, + (struct action[]){{3, 2},{3, 2},{0, 0},{3, 2},{0, 0},{0, 0},{3, 2},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 4},{3, 4},{0, 0},{3, 4},{0, 0},{0, 0},{3, 4},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 3},{3, 3},{0, 0},{3, 3},{0, 0},{0, 0},{3, 3},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{1, 5},{1, 8},{0, 0},{0, 0},{0, 0},{0, 0},{4, 0},{0, 0},{0, 0},{0, 0},{0, 0},}, +}; +size_t table_states = 13; + +symbol toklist[] = {N0, PLUS, N1, END_INPUT}; +symbol *tok = toklist; + +symbol toklist_eat() { return *(tok++); } +symbol toklist_peek() { return *tok; } + +int main(void) +{ + return lr_parser(); +} + +#endif diff --git a/slr-table.c b/slr-table.c index 2454a65..39e95ae 100644 --- a/slr-table.c +++ b/slr-table.c @@ -235,6 +235,13 @@ void table_print() else printf(" "); printf("\n"); } + + // for(size_t i = 0; i < table_states; i++) { + // printf("{"); + // for(size_t sym = 0; sym < total_symbols; sym++) + // printf("{%d, %d},", table[i][sym].type, table[i][sym].arg); + // printf("},\n"); + // } } #ifdef _SLR_TABLE_STANDALONE -- cgit v1.2.3