diff options
author | kartofen <mladenovnasko0@gmail.com> | 2025-07-06 21:18:28 +0300 |
---|---|---|
committer | kartofen <mladenovnasko0@gmail.com> | 2025-07-06 21:18:28 +0300 |
commit | b4261ac4a79651bd8fd1bd03d38bbf49ee89b615 (patch) | |
tree | 3b86e7afe5d16899d691122c3271944dc41d324e | |
parent | b65dd53885eabb8f39a3115039563edc08efb2b4 (diff) |
modular table building
-rwxr-xr-x | build.sh | 37 | ||||
-rw-r--r-- | clr-table.c | 38 | ||||
-rw-r--r-- | demos/generate-parser.c | 89 | ||||
-rw-r--r-- | demos/instant-parser.c | 6 | ||||
-rw-r--r-- | demos/sample-files/arithmetic-defs.c (renamed from demos/sample-files/defs.c) | 6 | ||||
-rw-r--r-- | demos/sample-files/lalr-defs.c | 35 | ||||
-rw-r--r-- | lr-parser.c | 9 | ||||
-rw-r--r-- | parts/symbol.h | 7 | ||||
-rw-r--r-- | parts/table.h | 5 | ||||
-rw-r--r-- | slr-table.c | 39 | ||||
-rw-r--r-- | util-tables.c | 4 |
11 files changed, 186 insertions, 89 deletions
@@ -1,22 +1,30 @@ -#!/bin/sh +#!/bin/sh -set -xe +set -e + +function log +{ + >&2 echo "-> $@" + "$@" +} function cc { mkdir -p bin - gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o bin/$(basename $1) + [ -n "$3" ] && NAME=$3 || NAME=$(basename $1) + log gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o "bin/$NAME" } function shared { mkdir -p bin - gcc -Wall -Wextra -Wpedantic -I. -shared -fPIC $2 $1.c -o bin/$(basename $1).so + [ -n "$3" ] && NAME=$3 || NAME=$(basename $1) + log gcc -Wall -Wextra -Wpedantic -I. -g -shared -fPIC $2 $1.c -o "bin/$NAME.so" } function leak { - valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 $2 + log valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 $2 } # cc lexer -D_LEXER_STANDALONE @@ -25,7 +33,7 @@ function leak # cc util-tables -D_UTIL_TABLES_STANDALONE # cc slr-table -D_SLR_TABLE_STANDALONE # cc clr-table -D_CLR_TABLE_STANDALONE -# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" +# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" lalr-table # cc lr-parser -D_LR_PARSER_STANDALONE # cc demos/instant-parser @@ -36,14 +44,21 @@ function leak # leak util-tables # leak slr-table # leak clr-table +# leak lalr-table # leak lr-parser # leak instant-parser #--------------------------------------------------------------------------------------------------# -cc demos/generate-parser -shared demos/sample-files/defs +cc demos/generate-parser "-rdynamic" + +shared demos/sample-files/lalr-defs +shared demos/sample-files/arithmetic-defs + +shared slr-table +shared clr-table +shared clr-table -D_LAZY_LALR lalr-table -leak "generate-parser bin/defs.so" > bin/generated.c -cc demos/sample-files/parser-skeleton # this includes bin/generated.c -leak parser-skeleton "0-1+(1+0)-1+0" +leak "generate-parser -t slr-table bin/arithmetic-defs.so" > bin/generated.c +cc demos/sample-files/parser-skeleton "" parser # this includes bin/generated.c +leak parser "0-1+(1+0)-1+0" diff --git a/clr-table.c b/clr-table.c index fb68826..ca23799 100644 --- a/clr-table.c +++ b/clr-table.c @@ -1,9 +1,9 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <setjmp.h> -// TODO: idk but a good parser should just exit(1) -// like in itemset_handle() +// TODO: handle conflicts (itemset_insert returns 2 on table problem) #ifndef XCALLOC_IMPLEMENTED #define XCALLOC_IMPLEMENTED @@ -25,8 +25,7 @@ static struct action *__table[TABLE_CAP]; struct action **table = __table; size_t table_states = 0; -int table_fill(); -void table_free(); +static jmp_buf fail_jmpbuf; static void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); } static void table_deallocate() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); } @@ -98,7 +97,7 @@ static size_t itemset_handle(struct item *set, size_t nset) // 2. add set to seen_sets if(nseen_sets >= SEEN_SETS_CAP) { fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set)); @@ -114,12 +113,12 @@ static size_t itemset_handle(struct item *set, size_t nset) #endif if(new_state >= TABLE_CAP) { fprintf(stderr, "ERROR: TABLE_CAP exceeded\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } if(itemset_insert(new_state, set, nset)) { fprintf(stderr, "ERROR: itemset_insert failed\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } return new_state; @@ -247,16 +246,18 @@ static int table_insert(size_t state, symbol sym, struct action a) return 0; } -int table_fill() +IMPLEMENT_FUNCPTR(int, table_fill, ()) { table_allocate(); + // Possible bug: wrong lookahead for kernel item of state, // but it may not really matter - itemset_handle((struct item[]){{0, 0, 0}}, 1); - return 0; + int r = setjmp(fail_jmpbuf); + if(r == 0) itemset_handle((struct item[]){{0, 0, 0}}, 1); + return r; } -void table_free() +IMPLEMENT_FUNCPTR(void, table_free, ()) { seen_sets_free(); table_deallocate(); @@ -285,8 +286,8 @@ enum symbol { size_t total_symbols = SYMBOLS_END; -IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s), { return s < EP; }) -IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s), { return s == END_INPUT; }) +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } // implement grammar.h #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} @@ -314,18 +315,17 @@ size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); int main(void) { + int r = 0; + util_tables_fill(); - table_fill(); + if((r = table_fill())) goto cleanup; table_print(); - for(size_t i = 0; i < nseen_sets; i++) - { - printf("\nSTATE: %zu\n", i); - itemset_print(seen_sets[i].items, seen_sets[i].nitems); - } +cleanup: table_free(); util_tables_free(); + return r; } #endif diff --git a/demos/generate-parser.c b/demos/generate-parser.c index d32a697..5766223 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -2,30 +2,35 @@ #include <stdlib.h> #include <dlfcn.h> +#define DEFUALT_PATH "./bin" +#define DEFUALT_TYPE "lalr-table" + #include "parts/symbol.h" -#include "parts/table.h" size_t total_symbols; int (*symbol_is_terminal)(symbol s); int (*symbol_is_input_end)(symbol s); int (*symbol_is_valid)(symbol s); #include "parts/grammar.h" - struct production *grammar; size_t total_productions; char **semantic_action_str; -#include "util-tables.c" +#include "parts/table.h" +struct action **table; +size_t table_states; +int (*table_fill)(); +void (*table_free)(); -#define _LAZY_LALR -#include "clr-table.c" +#include "util-tables.c" void *xdlsym(void *handle, char *sym) { void *r = dlsym(handle, sym); if(!r) { fprintf(stderr, "ERROR: Symbol '%s' was not found\n", sym); + dlclose(handle); exit(1); } return r; @@ -34,26 +39,60 @@ void *xdlsym(void *handle, char *sym) #define GET_VARIABLE(var, handle) \ var = *(typeof(&var))xdlsym(handle, #var) -int main(int argc, char **argv) + +char *modpath(char *name) { - if(argc != 2) return 1; + static char fullpath[128]; + char *path = getenv("GENERATE_PARSER_PATH"); + if(!path) path = DEFUALT_PATH; - void *handle = dlopen(argv[1], RTLD_LAZY); - if(!handle) { puts(dlerror()); return 1; } + snprintf(fullpath, 128, "%s/%s.so", path, name); + return fullpath; +} - GET_VARIABLE(total_symbols, handle); - GET_VARIABLE(symbol_is_terminal, handle); - GET_VARIABLE(symbol_is_input_end, handle); - GET_VARIABLE(symbol_is_valid, handle); - GET_VARIABLE(grammar, handle); - GET_VARIABLE(total_productions, handle); - GET_VARIABLE(semantic_action_str, handle); +int main(int argc, char **argv) +{ + if(argc < 2) return -1; + + void *table_handle; + void *def_handle; + + if(argc == 2) { + table_handle = dlopen(modpath(DEFUALT_TYPE), RTLD_LAZY); + if(!table_handle) { fputs(dlerror(), stderr); return 1; } + def_handle = dlopen(argv[1], RTLD_LAZY); + if(!def_handle) { fputs(dlerror(), stderr); return 1; } + } else if(argc == 4 && + argv[1][0] == '-' && argv[1][1] == 't') { + table_handle = dlopen(modpath(argv[2]), RTLD_LAZY); + if(!table_handle) { fputs(dlerror(), stderr); return 1; } + def_handle = dlopen(argv[3], RTLD_LAZY); + if(!def_handle) { fputs(dlerror(), stderr); return 1; } + } else return -1; + + GET_VARIABLE(table, table_handle); + GET_VARIABLE(table_states, table_handle); + GET_VARIABLE(table_fill, table_handle); + GET_VARIABLE(table_free, table_handle); + + GET_VARIABLE(total_symbols, def_handle); + GET_VARIABLE(symbol_is_terminal, def_handle); + GET_VARIABLE(symbol_is_input_end, def_handle); + GET_VARIABLE(symbol_is_valid, def_handle); + GET_VARIABLE(grammar, def_handle); + GET_VARIABLE(total_productions, def_handle); + GET_VARIABLE(semantic_action_str, def_handle); util_tables_fill(); - table_fill() && (fprintf(stderr, "ERROR: Table couldn't be generated\n"), exit(1), 1); + + int r = 0; + if((r = table_fill())) { + fprintf(stderr, "ERROR: Table couldn't be generated\n"); + goto cleanup; + } printf("size_t total_symbols = %zu;\n", total_symbols); - printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s), {return s < total_symbols;})\n"); + printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) {return s < total_symbols;}\n"); printf("struct production _grammar[] = {\n"); grammar_print_cstyle(); @@ -68,25 +107,25 @@ int main(int argc, char **argv) printf("size_t table_states = %zu;\n", table_states); for(size_t i = 0; i < total_productions; i++) { - printf("#define A(n) (*(stack_head-3*%d+3*n-1))\n", grammar[i].nRHS-1); - printf("int __prod%d_action(int *stack_head)\n", i); + printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1); + printf("int __prod%zu_action(int *stack_head)\n", i); printf("{ int v;\n"); printf(semantic_action_str[i]); printf("return v; }\n"); printf("#undef A\n"); } - printf("typedef int (*semantic_action_fn)(int *stack_head);\n"); printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n"); for(size_t i = 0; i < total_productions; i++) - printf("__prod%d_action, ", i); + printf("__prod%zu_action, ", i); printf("};"); +cleanup: table_free(); util_tables_free(); - - dlclose(handle); - return 0; + dlclose(table_handle); + dlclose(def_handle); + return r; } diff --git a/demos/instant-parser.c b/demos/instant-parser.c index 57d1f2f..06990f6 100644 --- a/demos/instant-parser.c +++ b/demos/instant-parser.c @@ -16,9 +16,9 @@ enum symbol { size_t total_symbols = SYMBOLS_END; -IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s), { return s < EP; }) -IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s), { return s == END_INPUT; }) -IMPLEMENT_FUNCPTR(int, symbol_is_valid,(symbol s), { return s < SYMBOLS_END; }) +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid,(symbol s)) { return s < SYMBOLS_END; } #include "parts/grammar.h" #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} diff --git a/demos/sample-files/defs.c b/demos/sample-files/arithmetic-defs.c index 76a0534..9a0db22 100644 --- a/demos/sample-files/defs.c +++ b/demos/sample-files/arithmetic-defs.c @@ -16,9 +16,9 @@ enum symbol { size_t total_symbols = SYMBOLS_END; -IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s), { return s < EP; }) -IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s), { return s == END_INPUT; }) -IMPLEMENT_FUNCPTR(int, symbol_is_valid,(symbol s), { return s < SYMBOLS_END; }) +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } #include "parts/grammar.h" #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} diff --git a/demos/sample-files/lalr-defs.c b/demos/sample-files/lalr-defs.c new file mode 100644 index 0000000..6147661 --- /dev/null +++ b/demos/sample-files/lalr-defs.c @@ -0,0 +1,35 @@ +#include <stdio.h> +#include <stddef.h> + +#include "parts/symbol.h" +enum symbol { + ID, EQUAL, STAR, + END_INPUT, + EP, E, L, R, + SYMBOLS_END, +}; + +size_t total_symbols = SYMBOLS_END; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +static struct production _grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, L, EQUAL, R), + PROD(E, -->, R), + PROD(L, -->, STAR, R), + PROD(L, -->, ID), + PROD(R, -->, L), +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + "", "", "", "", "", "", +}; diff --git a/lr-parser.c b/lr-parser.c index f7197c5..0d244fd 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -1,8 +1,9 @@ #include <stdio.h> #include <stdlib.h> -// TODO: check erros and fail safely and -// see connection with the lexer +// TODO: - check erros and fail safely and +// see connection with the lexer +// - standardize this // Requirements #include "parts/symbol.h" @@ -86,7 +87,7 @@ enum symbol { size_t total_symbols = SYMBOLS_END; -IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s), { return s < SYMBOLS_END; }) +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } // implement grammar.h #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} @@ -130,7 +131,7 @@ static symbol *tok = toklist; symbol toklist_eat() { return *(tok++); } // unsafe symbol toklist_peek() { return *tok; } // unsafe -int none(int *stack_head) {(void)stack_head; return 0;}; +int none(int *stack_head) {(void)stack_head; return 0;} semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none}; int main(void) diff --git a/parts/symbol.h b/parts/symbol.h index 2e9c30c..dea457e 100644 --- a/parts/symbol.h +++ b/parts/symbol.h @@ -10,8 +10,9 @@ extern int (*symbol_is_input_end)(symbol s); extern int (*symbol_is_valid)(symbol s); // helper macro -#define IMPLEMENT_FUNCPTR(type, name, args, ...) \ - type __##name args __VA_ARGS__ \ - type (*name) args = __##name; +#define IMPLEMENT_FUNCPTR(type, name, args) \ + type __##name args; \ + type (*name) args = __##name; \ + type __##name args #endif diff --git a/parts/table.h b/parts/table.h index 44d1e12..486466f 100644 --- a/parts/table.h +++ b/parts/table.h @@ -12,8 +12,9 @@ extern struct action { extern size_t table_states; -/*extern*/ int table_fill(); -/*extern*/ void table_free(); +extern int (*table_fill)(); +extern void (*table_free)(); + void table_print(); void table_print_cstyle(); diff --git a/slr-table.c b/slr-table.c index fde8e78..4e12efd 100644 --- a/slr-table.c +++ b/slr-table.c @@ -1,8 +1,8 @@ #include <stdio.h> #include <stdlib.h> +#include <setjmp.h> -// TODO: idk but a good parser shouldn't just exit(1) -// like in itemset_handle() +// TODO: handle conflicts (itemset_insert returns 2 on table problem) #ifndef XCALLOC_IMPLEMENTED #define XCALLOC_IMPLEMENTED @@ -24,8 +24,7 @@ static struct action *__table[TABLE_CAP]; struct action **table = __table; size_t table_states = 0; -int table_fill(); -void table_free(); +static jmp_buf fail_jmpbuf; static void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); } static void table_deallocate() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); } @@ -78,7 +77,7 @@ static size_t itemset_handle(struct item *set, size_t nset) // 2. add set to seen_sets if(nseen_sets >= SEEN_SETS_CAP) { fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set)); @@ -90,12 +89,12 @@ static size_t itemset_handle(struct item *set, size_t nset) size_t new_state = seen_sets[nseen_sets++].state = table_states++; if(new_state >= TABLE_CAP) { fprintf(stderr, "ERROR: TABLE_CAP exceeded\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } if(itemset_insert(new_state, set, nset)) { fprintf(stderr, "ERROR: itemset_insert failed\n"); - exit(1); + longjmp(fail_jmpbuf, 1); } return new_state; @@ -125,7 +124,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia if(!follow[p->LHS][sym]) continue; if(table_insert(state, sym, (struct action){ ACTION_REDUCE, closure_set[j].prod_idx})) - return 1; + return 2; continue; } @@ -143,7 +142,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia if(symbol_is_input_end(sym)) { if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0})) - return 1; + return 2; continue; } @@ -151,7 +150,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia if(table_insert(state, sym, (struct action){ symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO, - new_state})) return 1; + new_state})) return 2; } return 0; @@ -200,14 +199,16 @@ static int table_insert(size_t state, symbol sym, struct action a) return 0; } -int table_fill() +IMPLEMENT_FUNCPTR(int, table_fill, ()) { table_allocate(); - itemset_handle((struct item[]){{0, 0}}, 1); - return 0; + + int r = setjmp(fail_jmpbuf); + if(r == 0) itemset_handle((struct item[]){{0, 0}}, 1); + return r; } -void table_free() +IMPLEMENT_FUNCPTR(void, table_free, ()) { seen_sets_free(); table_deallocate(); @@ -230,8 +231,8 @@ enum symbol { size_t total_symbols = SYMBOLS_END; -IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s), { return s < EP; }) -IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s), { return s == END_INPUT; }) +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } // implement grammar.h #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} @@ -254,13 +255,17 @@ size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); int main(void) { + int r = 0; + util_tables_fill(); - table_fill(); + if((r = table_fill())) goto cleanup; table_print(); +cleanup: table_free(); util_tables_free(); + return r; } #endif diff --git a/util-tables.c b/util-tables.c index 507153d..cfebf68 100644 --- a/util-tables.c +++ b/util-tables.c @@ -21,8 +21,8 @@ int **first; void util_tables_fill() { - follow = calloc(total_symbols, sizeof(*follow)); - first = calloc(total_symbols, sizeof(*follow)); + follow = xcalloc(total_symbols, sizeof(*follow)); + first = xcalloc(total_symbols, sizeof(*follow)); for(size_t i = 0; i < total_symbols; i++) { follow[i] = xcalloc(total_symbols, sizeof(*follow[i])); first[i] = xcalloc(total_symbols, sizeof(*follow[i])); |