From 5064a7ebce75a26d0405c92040f1a40187fcc7e3 Mon Sep 17 00:00:00 2001 From: kartofen Date: Wed, 2 Jul 2025 22:55:08 +0300 Subject: turn clr into lalr and first steps for generating a parser --- build.sh | 16 +++++++-- clr-table.c | 87 +++++++++++++++++++++++++++-------------------- demos/generate-parser.c | 25 ++++++++++++++ demos/instant-parser.c | 61 +++++++++++++++++++++++++++++++++ demos/sample-files/defs.c | 23 +++++++++++++ fusion.c | 61 --------------------------------- lr-parser.c | 12 +++---- parts/symbol.h | 2 +- parts/table.h | 2 +- parts/toklist.h | 2 +- parts/util-tables.h | 3 ++ slr-table.c | 9 ++--- util-tables.c | 18 +++++----- 13 files changed, 199 insertions(+), 122 deletions(-) create mode 100644 demos/generate-parser.c create mode 100644 demos/instant-parser.c create mode 100644 demos/sample-files/defs.c delete mode 100644 fusion.c diff --git a/build.sh b/build.sh index 464d86a..d7b6f33 100755 --- a/build.sh +++ b/build.sh @@ -5,7 +5,13 @@ set -xe function cc { mkdir -p bin - gcc -Wall -Wextra -Wpedantic -g $2 $1.c -o bin/$(basename $1) + gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o bin/$(basename $1) +} + +function shared +{ + mkdir -p bin + gcc -Wall -Wextra -Wpedantic -I. -shared -fPIC $2 $1.c -o bin/$(basename $1).so } function leak @@ -19,8 +25,11 @@ function leak # cc util-tables -D_UTIL_TABLES_STANDALONE # cc slr-table -D_SLR_TABLE_STANDALONE # cc clr-table -D_CLR_TABLE_STANDALONE +# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" # cc lr-parser -D_LR_PARSER_STANDALONE -cc fusion +# cc demos/instant-parser +cc demos/generate-parser +shared demos/sample-files/defs # leak lexer # leak recursive-ascent @@ -29,4 +38,5 @@ cc fusion # leak slr-table # leak clr-table # leak lr-parser -leak fusion +# leak instant-parser +leak "generate-parser bin/defs.so" diff --git a/clr-table.c b/clr-table.c index 68ce3a4..37b2cd6 100644 --- a/clr-table.c +++ b/clr-table.c @@ -13,11 +13,12 @@ extern void *xcalloc(size_t n, size_t size); #include "parts/grammar.h" #include "parts/util-tables.h" -// Implements +// Implements #include "parts/table.h" #define TABLE_CAP 64 -struct action *table[TABLE_CAP]; +static struct action *__table[TABLE_CAP]; +struct action **table = __table; size_t table_states = 0; int table_fill(); @@ -33,7 +34,11 @@ struct item { symbol lookahead; }; +#ifndef _LAZY_LALR static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; } +#else +static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; } +#endif #define SEEN_SETS_CAP 64 static struct { @@ -60,6 +65,7 @@ static size_t itemset_handle(struct item *set, size_t nset) { // 1. is set in seen_sets for(size_t i = 0; i < nseen_sets; i++) { +#ifndef _LAZY_LALR if(seen_sets[i].nitems != nset) continue; int _seen = 0; @@ -70,6 +76,15 @@ static size_t itemset_handle(struct item *set, size_t nset) if(!_seen) break; } if(_seen) return seen_sets[i].state; +#else + int _same_core = 1; + for(size_t j = 0; j < nset; j++) { + for(size_t k = 0; k < seen_sets[i].nitems; k++) + if(!item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 0; + if(!_same_core) break; + } + if(_same_core) return seen_sets[i].state; +#endif } // 2. add set to seen_sets @@ -120,7 +135,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia if(dot == p->nRHS) { if(sym != 0) continue; // do it 1 time - if(table_insert(state, closure_set[j].lookahead, (struct action) { + if(table_insert(state, closure_set[j].lookahead, (struct action){ ACTION_REDUCE, closure_set[j].prod_idx})) return 1; continue; @@ -137,7 +152,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia } if(ngoto == 0) continue; - + if(symbol_is_input_end(sym)) { if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0})) return 1; @@ -164,13 +179,13 @@ static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_ int **is_in_closure = xcalloc(total_productions, sizeof(*is_in_closure)); for(size_t i = 0; i < total_productions; i++) is_in_closure[i] = xcalloc(total_symbols, sizeof(**is_in_closure)); - + #define add_item(prod_idx, lookahead, ...) do { \ if(is_in_closure[prod_idx][lookahead]) break; \ is_in_closure[prod_idx][lookahead] = 1; \ if(nout++ >= nout_max) goto cleanup; \ out_set[nout-1] = __VA_ARGS__; \ - } while(0) + } while(0) for(size_t i = 0; i < nout; i++) { @@ -185,7 +200,7 @@ static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_ (struct item){j, 0, out_set[i].lookahead}); continue; } - + symbol next_symbol = p->RHS[out_set[i].dot+1]; for(size_t terminal = 0; terminal < total_symbols; terminal++) { if(!symbol_is_terminal(terminal)) continue; @@ -233,12 +248,12 @@ void table_free() #ifdef _CLR_TABLE_STANDALONE #ifndef CHOOSE_GRAMMAR -#define CHOOSE_GRAMMAR 0 // 0 or 1 +#define CHOOSE_GRAMMAR 1 // 0 or 1 #endif // implement symbol.h enum symbol { -#if (CHOOSE_GRAMMAR == 0) +#if (CHOOSE_GRAMMAR == 0) ID, EQUAL, STAR, END_INPUT, EP, E, L, R, @@ -251,7 +266,7 @@ enum symbol { #endif }; -const size_t total_symbols = SYMBOLS_END; +size_t total_symbols = SYMBOLS_END; int symbol_is_terminal(symbol s) { return s < EP; } int symbol_is_input_end(symbol s) { return s == END_INPUT; } @@ -292,34 +307,34 @@ int main(void) #endif -/* +---+------------------------+ +/* +----------------------------+ + * | CLR | + * +---+------------------------+ * | - | 0 1 2 3 4 5 | * +---+------------------------+ - * | 0 | s1 s2 g4 g5 | - * | 1 | s1 s2 g3 | - * | 2 | r3 r3 | - * | 3 | r2 r2 | - * | 4 | a | - * | 5 | s6 s7 g9 | - * | 6 | s6 s7 g8 | - * | 7 | r3 | - * | 8 | r2 | + * | 0 | s1 s2 g4 g5 | + * | 1 | s1 s2 g3 | + * | 2 | r3 r3 | + * | 3 | r2 r2 | + * | 4 | a | + * | 5 | s6 s7 g9 | + * | 6 | s6 s7 g8 | + * | 7 | r3 | + * | 8 | r2 | * | 9 | r1 | * +---+------------------------+ - - 0 1 2 3 4 5 6 7 - 0 s1 s2 g5 g6 g13 - 1 r4 r4 - 2 s1 s2 g3 g4 - 3 r5 r5 - 4 r3 r3 - 5 a - 6 s7 r5 - 7 s8 s9 g10 g12 - 8 r4 - 9 s8 s9 g10 g11 -10 r5 -11 r3 -12 r1 -13 r2 + * + * +----------------------------+ + * | LALR | + * +---+------------------------+ + * | - | 0 1 2 3 4 5 | + * +---+------------------------+ + * | 0 | s1 s2 g4 g5 | + * | 1 | s1 s2 g3 | + * | 2 | r3 r3 | + * | 3 | r2 r2 | + * | 4 | a | + * | 5 | s1 s2 g6 | + * | 6 | r1 | + * +---+------------------------+ */ diff --git a/demos/generate-parser.c b/demos/generate-parser.c new file mode 100644 index 0000000..b0a769d --- /dev/null +++ b/demos/generate-parser.c @@ -0,0 +1,25 @@ +#include +#include + +#include + +struct action **table; +size_t table_states; +size_t total_symbols; + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + void *handle = dlopen(argv[1], RTLD_LAZY); + if(!handle) { puts(dlerror()); return 1; } + + table = *(typeof(&table))dlsym(handle, "table"); + table_states = *(typeof(&table_states))dlsym(handle, "table_states"); + total_symbols = *(typeof(&total_symbols))dlsym(handle, "total_symbols"); + + table_print(); + + dlclose(handle); + return 0; +} diff --git a/demos/instant-parser.c b/demos/instant-parser.c new file mode 100644 index 0000000..3a82b07 --- /dev/null +++ b/demos/instant-parser.c @@ -0,0 +1,61 @@ +#include +#include + +#include "parts/symbol.h" +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + N0, N1, + END_INPUT, + + EP, E, T, N, + SYMBOLS_END, +}; + +size_t total_symbols = SYMBOLS_END; + +int symbol_is_terminal(symbol s) { return s < EP; } +int symbol_is_input_end(symbol s) { return s == END_INPUT; } +int symbol_is_valid(symbol s) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +struct production grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, E, PLUS, T), + PROD(E, -->, E, MINUS, T), + PROD(E, -->, T), + PROD(T, -->, LPAREN, E, RPAREN), + PROD(T, -->, N), + PROD(N, -->, N0), + PROD(N, -->, N1), +}; + +const size_t total_productions = sizeof(grammar)/sizeof(*grammar); + +#include "parts/toklist.h" +static symbol toklist[] = {N0, PLUS, N1, MINUS, N0, END_INPUT}; +static symbol *tok = toklist; + +symbol toklist_eat() { return *(tok++); } // unsafe +symbol toklist_peek() { return *tok; } // unsafe + +#include "slr-table.c" +#include "util-tables.c" +#include "lr-parser.c" + +int main(void) +{ + util_tables_fill(); + table_fill(); + + int r = 0; + r = lr_parser(); + + table_free(); + util_tables_free(); + + return r; +} diff --git a/demos/sample-files/defs.c b/demos/sample-files/defs.c new file mode 100644 index 0000000..797c86d --- /dev/null +++ b/demos/sample-files/defs.c @@ -0,0 +1,23 @@ +#include +#include + +#include + +struct action **table = (struct action *([])){ + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 12},{2, 11},{2, 7},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 4},{2, 11},{2, 7},}, + (struct action[]){{3, 6},{3, 6},{0, 0},{3, 6},{0, 0},{0, 0},{3, 6},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 7},{3, 7},{0, 0},{3, 7},{0, 0},{0, 0},{3, 7},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{1, 5},{1, 8},{0, 0},{1, 10},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 6},{2, 7},}, + (struct action[]){{3, 1},{3, 1},{0, 0},{3, 1},{0, 0},{0, 0},{3, 1},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 5},{3, 5},{0, 0},{3, 5},{0, 0},{0, 0},{3, 5},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 9},{2, 7},}, + (struct action[]){{3, 2},{3, 2},{0, 0},{3, 2},{0, 0},{0, 0},{3, 2},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 4},{3, 4},{0, 0},{3, 4},{0, 0},{0, 0},{3, 4},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{3, 3},{3, 3},{0, 0},{3, 3},{0, 0},{0, 0},{3, 3},{0, 0},{0, 0},{0, 0},{0, 0},}, + (struct action[]){{1, 5},{1, 8},{0, 0},{0, 0},{0, 0},{0, 0},{4, 0},{0, 0},{0, 0},{0, 0},{0, 0},}, +}; + +size_t total_symbols = 10; +size_t table_states = 13; diff --git a/fusion.c b/fusion.c deleted file mode 100644 index 3dc7dba..0000000 --- a/fusion.c +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include - -#include "parts/symbol.h" -enum symbol { - PLUS = 0, - MINUS, - LPAREN, - RPAREN, - N0, N1, - END_INPUT, - - EP, E, T, N, - SYMBOLS_END, -}; - -const size_t total_symbols = SYMBOLS_END; - -int symbol_is_terminal(symbol s) { return s < EP; } -int symbol_is_input_end(symbol s) { return s == END_INPUT; } -int symbol_is_valid(symbol s) { return s < SYMBOLS_END; } - -#include "parts/grammar.h" -#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} -struct production grammar[] = { - PROD(EP, ->, E, END_INPUT), - PROD(E, -->, E, PLUS, T), - PROD(E, -->, E, MINUS, T), - PROD(E, -->, T), - PROD(T, -->, LPAREN, E, RPAREN), - PROD(T, -->, N), - PROD(N, -->, N0), - PROD(N, -->, N1), -}; - -const size_t total_productions = sizeof(grammar)/sizeof(*grammar); - -#include "parts/toklist.h" -static symbol toklist[] = {N0, PLUS, N1, MINUS, N0, END_INPUT}; -static symbol *tok = toklist; - -symbol toklist_eat() { return *(tok++); } // unsafe -symbol toklist_peek() { return *tok; } // unsafe - -#include "slr-table.c" -#include "util-tables.c" -#include "lr-parser.c" - -int main(void) -{ - util_tables_fill(); - table_fill(); - - int r = 0; - r = lr_parser(); - - table_free(); - util_tables_free(); - - return r; -} diff --git a/lr-parser.c b/lr-parser.c index 41cc45b..2358164 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -19,10 +19,10 @@ int lr_parser() #define pop() (--stack_head) #define eat() toklist_eat() #define peek() ({ symbol s = toklist_peek(); if(!symbol_is_valid(s)) return 1; s; }) - + while(1) { struct action a = table[(size_t)*stack_head][peek()]; - + switch(a.type) { case ACTION_SHIFT: push(eat()); @@ -30,7 +30,7 @@ int lr_parser() break; case ACTION_REDUCE: for(size_t i = 0; i < 2*grammar[a.arg].nRHS; i++) pop(); - + symbol lhs = grammar[a.arg].LHS; struct action a_goto = table[(size_t)*stack_head][lhs]; if(a_goto.type != ACTION_GOTO) { @@ -39,7 +39,7 @@ int lr_parser() lhs, (size_t)*stack_head); return 1; } - + push(lhs); push(a_goto.arg); break; @@ -72,7 +72,7 @@ enum symbol { SYMBOLS_END, }; -const size_t total_symbols = SYMBOLS_END; +size_t total_symbols = SYMBOLS_END; int symbol_is_valid(symbol s) { return s < SYMBOLS_END; } @@ -92,7 +92,7 @@ struct production grammar[] = { const size_t total_productions = sizeof(grammar)/sizeof(*grammar); // implement table.h -struct action *table[] = { +struct action **table = (struct action *([])){ (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 12},{2, 11},{2, 7},}, (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 4},{2, 11},{2, 7},}, (struct action[]){{3, 6},{3, 6},{0, 0},{3, 6},{0, 0},{0, 0},{3, 6},{0, 0},{0, 0},{0, 0},{0, 0},}, diff --git a/parts/symbol.h b/parts/symbol.h index d3cb5cd..5f865ec 100644 --- a/parts/symbol.h +++ b/parts/symbol.h @@ -2,7 +2,7 @@ #define SYMBOL_H typedef unsigned int symbol; -extern const size_t total_symbols; +extern size_t total_symbols; // extern char *symbol_to_str[] ... extern int symbol_is_terminal(symbol s); diff --git a/parts/table.h b/parts/table.h index 3b54312..23c61dc 100644 --- a/parts/table.h +++ b/parts/table.h @@ -8,7 +8,7 @@ extern struct action { ACTION_ACCEPT } type; size_t arg; -} *table[]; +} **table; extern size_t table_states; diff --git a/parts/toklist.h b/parts/toklist.h index b6fd10d..9a7b8ce 100644 --- a/parts/toklist.h +++ b/parts/toklist.h @@ -5,5 +5,5 @@ extern symbol toklist_eat(); extern symbol toklist_peek(); - + #endif diff --git a/parts/util-tables.h b/parts/util-tables.h index a6d788a..66ecab5 100644 --- a/parts/util-tables.h +++ b/parts/util-tables.h @@ -5,4 +5,7 @@ extern int **follow; extern int **first; +extern void util_tables_fill(); +extern void util_tables_free(); + #endif diff --git a/slr-table.c b/slr-table.c index 76399d7..d5f4505 100644 --- a/slr-table.c +++ b/slr-table.c @@ -17,7 +17,8 @@ extern void *xcalloc(size_t n, size_t size); #include "parts/table.h" #define TABLE_CAP 64 -struct action *table[TABLE_CAP]; +static struct action *__table[TABLE_CAP]; +struct action **table = __table; size_t table_states = 0; int table_fill(); @@ -224,7 +225,7 @@ enum symbol { SYMBOLS_END, }; -const size_t total_symbols = SYMBOLS_END; +size_t total_symbols = SYMBOLS_END; int symbol_is_terminal(symbol s) { return s < EP; } int symbol_is_input_end(symbol s) { return s == END_INPUT; } @@ -251,9 +252,9 @@ int main(void) { util_tables_fill(); table_fill(); - + table_print(); - + table_free(); util_tables_free(); } diff --git a/util-tables.c b/util-tables.c index 83a015c..09df914 100644 --- a/util-tables.c +++ b/util-tables.c @@ -27,20 +27,20 @@ void util_tables_fill() follow[i] = xcalloc(total_symbols, sizeof(*follow[i])); first[i] = xcalloc(total_symbols, sizeof(*follow[i])); } - + for(size_t sym = 0; sym < total_symbols; sym++) { first[sym][sym] = 1; } - + for(size_t i = 0; i < total_productions; i++) { struct production *p = &grammar[i]; - + first[p->LHS][p->RHS[0]] = 1; - + for(size_t j = 1; j < p->nRHS; j++) follow[p->RHS[j-1]][p->RHS[j]] = 1; } - + #define set(e) if((e) != 1) changed = (e) = 1 int changed; @@ -66,7 +66,7 @@ void util_tables_free() free(follow[i]); free(first[i]); } - + free(follow); free(first); } @@ -75,7 +75,7 @@ void util_tables_print() { char *s1 = "-- FIRST --"; printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s1))/2, "", s1); - + printf(" "); for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i); printf("\n"); @@ -88,7 +88,7 @@ void util_tables_print() } printf("\n"); - + char *s2 = "-- FOLLOW --"; printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s2))/2, "", s2); @@ -119,7 +119,7 @@ enum symbol { SYMBOLS_END, }; -const size_t total_symbols = SYMBOLS_END; +size_t total_symbols = SYMBOLS_END; int symbol_is_terminal(symbol s) { return s < E; } int symbol_is_nonterminal(symbol s) { return s >= E; } -- cgit v1.2.3