diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
| commit | 34357640c0676f33ad13aac1fe28effc6f6e47c7 (patch) | |
| tree | d656ee61da7d7a0b133aa57311266653ef100569 /demos | |
| parent | 174e9b35ce3b6e99e500907f1bb24c6f31f481bf (diff) | |
start of grammar parsing
Diffstat (limited to 'demos')
| -rw-r--r-- | demos/generate-parser.c | 48 | ||||
| -rw-r--r-- | demos/sample-files/calc-defs.c | 41 | ||||
| -rw-r--r-- | demos/sample-files/calc-skeleton.c | 91 | ||||
| -rw-r--r-- | demos/sample-files/gram-defs.c | 65 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 149 |
5 files changed, 324 insertions, 70 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c index 23201fa..48fa48c 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -5,12 +5,13 @@ #include <unistd.h> // getopt #include <assert.h> -#define DEFUALT_PATH "./bin" -#define DEFUALT_TYPE "lalr-table" #define DEFAULT_OUTPUT "bin/a" +#define DEFUALT_MODPATH "./bin" +#define DEFUALT_TYPE "lalr-table" #include "parts/symbol.h" size_t total_symbols; +char **symbol_to_str; int (*symbol_is_terminal)(symbol s); int (*symbol_is_input_end)(symbol s); int (*symbol_is_valid)(symbol s); @@ -42,8 +43,7 @@ void (*table_free)(); #include "util-tables.c" - -void *xdlsym(void *handle, char *sym) +static void *xdlsym(void *handle, char *sym) { void *r = dlsym(handle, sym); if(!r) { @@ -58,18 +58,18 @@ void *xdlsym(void *handle, char *sym) var = *(typeof(&var))xdlsym(handle, #var) -char *modpath(char *name) +static char *modpath(char *name) { static char fullpath[128]; // TODO: search the GENERATE_PARSER_PATH env var - char *path = DEFUALT_PATH; + char *path = DEFUALT_MODPATH; assert(snprintf(fullpath, 128, "%s/%s.so", path, name) < 128); return fullpath; } -char *add_extension(char *str, char *ext) +static char *add_extension(char *str, char *ext) { static char full[128]; assert((strlen(str) + strlen(ext) + 1) <= 128); @@ -77,7 +77,7 @@ char *add_extension(char *str, char *ext) return strcat(full, ext); } -void set_stdout(char *filename) +static void set_stdout(char *filename) { if(!filename) filename = "/dev/tty"; assert(freopen(filename, "w", stdout)); @@ -114,6 +114,7 @@ int main(int argc, char **argv) GET_VARIABLE(table_free, table_handle); GET_VARIABLE(total_symbols, def_handle); + GET_VARIABLE(symbol_to_str, def_handle); GET_VARIABLE(symbol_is_terminal, def_handle); GET_VARIABLE(symbol_is_input_end, def_handle); GET_VARIABLE(symbol_is_valid, def_handle); @@ -132,8 +133,13 @@ int main(int argc, char **argv) goto cleanup; } + table_print(); + set_stdout(add_extension(output_path, ".c")); printf("size_t total_symbols = %zu;\n", total_symbols); + printf("char **symbol_to_string = (char *([])){\n"); + for(size_t i = 0; i < total_symbols; i++) printf("\"%s\", ", symbol_to_str[i]); + printf("};\n"); printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) {return s < total_symbols;}\n"); printf("struct production _grammar[] = {\n"); @@ -157,19 +163,33 @@ int main(int argc, char **argv) for(size_t i = 0; i < total_productions; i++) { printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1); - printf("int __prod%zu_action(int *stack_head)\n", i); - printf("{ int v;\n"); - printf(semantic_action_str[i]); + printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i); + printf("{ intptr_t v;\n"); + puts(semantic_action_str[i]); printf("return v; }\n"); printf("#undef A\n"); } - printf("typedef int (*semantic_action_fn)(int *stack_head);\n"); + printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n"); for(size_t i = 0; i < total_productions; i++) printf("__prod%zu_action, ", i); printf("};"); + + set_stdout(add_extension(output_path, ".h")); + printf("#ifndef GENERATED_H\n"); + printf("#define GENERATED_H\n"); + printf("#include \"parts/symbol.h\"\n"); + printf("enum symbol {\n"); + for(size_t i = 0; i < total_symbols; i++) printf("%s, ", symbol_to_str[i]); + printf("};\n"); + printf("#include \"parts/grammar.h\"\n"); + printf("#include \"parts/table.h\"\n"); + printf("#include <stdint.h>\n"); + printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); + printf("extern semantic_action_fn *semantic_actions;\n"); + printf("#endif\n"); set_stdout(NULL); cleanup: @@ -189,10 +209,10 @@ void precedence_tables_fill() for(size_t i = 0; i < nprecedence_defs; i++) for(size_t j = 0; j < precedence_defs[i].nlist; j++) - if(precedence_defs[i].flag >= 0) + if(precedence_defs[i].list[j] >= 0) precedence_symbol[precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1); else - precedence_production[precedence_defs[i].list[j]] = PRECEDENCE_SET(~precedence_defs[i].flag, i+1); + precedence_production[~precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1); for(size_t i = 0; i < total_productions; i++) { if(precedence_production[i]) continue; diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c index 7321a88..103b69e 100644 --- a/demos/sample-files/calc-defs.c +++ b/demos/sample-files/calc-defs.c @@ -1,17 +1,19 @@ -#include <stddef.h> // size_t +#include "util/util.h" +#define SYMBOLS(X) \ + X(PLUS) X(MINUS) X(TIMES) X(MORE) X(LESS) X(EQUA) \ + X(LPAREN) X(RPAREN) \ + X(QMARK) X(COLON) \ + X(NUM) X(END_INPUT) \ + \ + X(EP) X(E) \ + X(SYMBOLS_END) #include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; - +enum symbol { SYMBOLS(X_TO_ENUM) }; size_t total_symbols = SYMBOLS_END; +extern char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } @@ -24,6 +26,12 @@ static struct production _grammar[] = { PROD(E, -->, E, MINUS, E), PROD(E, -->, E, TIMES, E), PROD(E, -->, LPAREN, E, RPAREN), + PROD(E, -->, MINUS, E), + PROD(E, -->, E, QMARK, E, COLON, E), + PROD(E, -->, E, QMARK, E), + PROD(E, -->, E, MORE, E), + PROD(E, -->, E, LESS, E), + PROD(E, -->, E, EQUA, E), PROD(E, -->, NUM), }; @@ -37,20 +45,29 @@ char **semantic_action_str = (char *([])){ "v = A(0) - A(2);", "v = A(0) * A(2);", "v = A(1);", + "v = - A(1);", + "v = A(0) ? A(2) : A(4);", + "v = A(0) ? A(2) : 0;", + "v = A(0) > A(1);", + "v = A(0) < A(1);", + "v = A(0) = A(1);", "v = A(0);", }; #include "parts/precedence.h" - struct precedence_def { int flag; int *list; size_t nlist; }; #define PREC(f, ...) {f, (int[]){__VA_ARGS__}, sizeof((int[]){__VA_ARGS__})/sizeof(int)} +#define USE_PROD(n) (~(n)) struct precedence_def _precedence_defs[] = { + PREC(0, QMARK), + PREC(0, USE_PROD(7)), + PREC(PRECEDENCE_LEFT_ASSOC, MORE, LESS, EQUA), PREC(PRECEDENCE_LEFT_ASSOC, MINUS, PLUS), - PREC(PRECEDENCE_LEFT_ASSOC, TIMES), + PREC(PRECEDENCE_LEFT_ASSOC, TIMES, USE_PROD(5)), PREC(PRECEDENCE_LEFT_ASSOC, LPAREN, RPAREN), }; diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c index b0cbf00..6e5d2d5 100644 --- a/demos/sample-files/calc-skeleton.c +++ b/demos/sample-files/calc-skeleton.c @@ -1,30 +1,57 @@ #include <stdio.h> #include <string.h> +#include <stdint.h> #include <ctype.h> -#include "lr-parser.c" -#include "bin/a.c" // generated - -// these should come from a generated -// header file by the parser generator -#include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; +// generated +#include "bin/calc.h" +#include "bin/calc.c" +#include "parts/toklist.h" static struct token { symbol s; int v; } tok; +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return (intptr_t)t->v; } + +static char *input; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + input = next_token(argv[1]); + + intptr_t value; + if(lr_parser(&value)) return 1; + + printf("INPUT: '%s'\n", argv[1]); + printf("OUTPUT: %jd\n", value); + + return 0; +} + +// LEXER + static inline int issep(char c) { - return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*';; + return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '>' || c == '<' || c == '=' || c == '?' || c == ':'; } static inline int tillsep(char *str) @@ -59,6 +86,11 @@ static char *next_token(char *str) case '-': tok.s = MINUS; break; case '+': tok.s = PLUS; break; case '*': tok.s = TIMES; break; + case '>': tok.s = MORE; break; + case '<': tok.s = LESS; break; + case '=': tok.s = EQUA; break; + case '?': tok.s = QMARK; break; + case ':': tok.s = COLON; break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; @@ -68,32 +100,3 @@ static char *next_token(char *str) return str+off; } - -static char *input; - -symbol token_sym(struct token *t) { return t->s; } -int token_val(struct token *t) { return t->v; } - -struct token *toklist_eat() -{ - static struct token t; - t = tok; - input = next_token(input); - return &t; -} -struct token *toklist_peek() { return &tok; } - -int main(int argc, char **argv) -{ - if(argc != 2) return 1; - - input = next_token(argv[1]); - - int value; - if(lr_parser(&value)) return 1; - - printf("INPUT: '%s'\n", argv[1]); - printf("OUTPUT: %d\n", value); - - return 0; -} diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c new file mode 100644 index 0000000..733a866 --- /dev/null +++ b/demos/sample-files/gram-defs.c @@ -0,0 +1,65 @@ +#include "util/util.h" +#define SYMBOLS(X) \ + X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \ + X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ + \ + X(Sp) X(S) X(Slist) X(Prod) X(Prec) \ + X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \ + X(SYMBOLS_END) \ + +#include "parts/symbol.h" +enum symbol { SYMBOLS(X_TO_ENUM) }; +size_t total_symbols = SYMBOLS_END; + +char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +#define GRAMMAR_ACTION_DEF(X) \ + X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \ + X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \ + X(PROD(Slist, -->, S, DOT), "v = 0;") \ + X(PROD(S, -->, Prod), "v = 0;") \ + X(PROD(S, -->, Prec), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN), "v = 0;") \ + X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \ + X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \ + X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN), "v = 0;") \ + X(PROD(IorN, -->, IDEN), "v = 0;") \ + X(PROD(IorN, -->, NUM), "v = 0;") + +#define X_GRAMMAR(G, A) G, +#define X_ACTION(G, A) A, + +static struct production _grammar[] = { + GRAMMAR_ACTION_DEF(X_GRAMMAR) +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar) / sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + GRAMMAR_ACTION_DEF(X_ACTION) +}; + +#include "parts/precedence.h" +struct precedence_def { + int flag; + int *list; + size_t nlist; +}; +struct precedence_def *precedence_defs = NULL; +size_t nprecedence_defs = 0; diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c new file mode 100644 index 0000000..89ef6b4 --- /dev/null +++ b/demos/sample-files/gram-skeleton.c @@ -0,0 +1,149 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#define ARENA_IMPLEMENTATION +#include "util/arena.h" + +static char buf[1024]; +static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf)); +static void *xalloc(size_t sz) { + void *addr = arena_allocate(&global_arena, sz); + if(!addr) { + fprintf(stderr, "ERROR: Arena empty\n"); exit(1); + } + + return addr; +} + +// generated +#include "bin/gram.h" +#include "bin/gram.c" + +#include "parts/toklist.h" +struct token { + symbol s; + intptr_t v; +} tok; + +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return t->v; } + +static char *input = (char []){ + "-left B;" + "-right C;" + "-left D;" + "" + "A: B {a}" + " | C N {d}." +}; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(void) +{ + input = next_token(input); + + intptr_t value; + if(lr_parser(&value)) { + return 1; + } + + printf("OUTPUT: %jd\n", value); + return 0; +} + +// STR UTIL + +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str) +{ + return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +// LEXER + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == ':' || c == '|' || c == ';' || c == '.' || c == '-' || c == '{'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static char *next_token(char *str) +{ + if(!str) return str; + + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case ':': tok.s = COLON; break; + case '|': tok.s = PIPE; break; + case ';': tok.s = SEMICOL; break; + case '.': tok.s = DOT; break; + case '-': + off = tillsep(++str); + char *s = substring(str, off); + if(strcmp(s, "left") == 0) tok.s = D_LEFT; + else if(strcmp(s, "right") == 0) tok.s = D_RIGHT; + else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL; + else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM; + else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } + break; + case '{': + for(int c = 1; c != 0; off++) + if(str[off] == '\0') { fprintf(stderr, "ERROR: No closing '{'\n"); goto fail; } + else if(str[off] == '{') c++; + else if(str[off] == '}') c--; + tok.s = ACTION; + tok.v = (intptr_t)strdup(substring(str, off)); + break; + } + } else if(isalpha(c0)) { // iden or named symbol + tok.s = IDEN; + tok.v = (intptr_t)strdup(substring(str, off)); + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = (intptr_t)atoi(substring(str, off)); + } + } + + return str+off; + +fail: + tok.s = END_INPUT; + return NULL; +} |
