diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
| commit | 34357640c0676f33ad13aac1fe28effc6f6e47c7 (patch) | |
| tree | d656ee61da7d7a0b133aa57311266653ef100569 /demos/sample-files | |
| parent | 174e9b35ce3b6e99e500907f1bb24c6f31f481bf (diff) | |
start of grammar parsing
Diffstat (limited to 'demos/sample-files')
| -rw-r--r-- | demos/sample-files/calc-defs.c | 41 | ||||
| -rw-r--r-- | demos/sample-files/calc-skeleton.c | 91 | ||||
| -rw-r--r-- | demos/sample-files/gram-defs.c | 65 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 149 |
4 files changed, 290 insertions, 56 deletions
diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c index 7321a88..103b69e 100644 --- a/demos/sample-files/calc-defs.c +++ b/demos/sample-files/calc-defs.c @@ -1,17 +1,19 @@ -#include <stddef.h> // size_t +#include "util/util.h" +#define SYMBOLS(X) \ + X(PLUS) X(MINUS) X(TIMES) X(MORE) X(LESS) X(EQUA) \ + X(LPAREN) X(RPAREN) \ + X(QMARK) X(COLON) \ + X(NUM) X(END_INPUT) \ + \ + X(EP) X(E) \ + X(SYMBOLS_END) #include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; - +enum symbol { SYMBOLS(X_TO_ENUM) }; size_t total_symbols = SYMBOLS_END; +extern char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } @@ -24,6 +26,12 @@ static struct production _grammar[] = { PROD(E, -->, E, MINUS, E), PROD(E, -->, E, TIMES, E), PROD(E, -->, LPAREN, E, RPAREN), + PROD(E, -->, MINUS, E), + PROD(E, -->, E, QMARK, E, COLON, E), + PROD(E, -->, E, QMARK, E), + PROD(E, -->, E, MORE, E), + PROD(E, -->, E, LESS, E), + PROD(E, -->, E, EQUA, E), PROD(E, -->, NUM), }; @@ -37,20 +45,29 @@ char **semantic_action_str = (char *([])){ "v = A(0) - A(2);", "v = A(0) * A(2);", "v = A(1);", + "v = - A(1);", + "v = A(0) ? A(2) : A(4);", + "v = A(0) ? A(2) : 0;", + "v = A(0) > A(1);", + "v = A(0) < A(1);", + "v = A(0) = A(1);", "v = A(0);", }; #include "parts/precedence.h" - struct precedence_def { int flag; int *list; size_t nlist; }; #define PREC(f, ...) {f, (int[]){__VA_ARGS__}, sizeof((int[]){__VA_ARGS__})/sizeof(int)} +#define USE_PROD(n) (~(n)) struct precedence_def _precedence_defs[] = { + PREC(0, QMARK), + PREC(0, USE_PROD(7)), + PREC(PRECEDENCE_LEFT_ASSOC, MORE, LESS, EQUA), PREC(PRECEDENCE_LEFT_ASSOC, MINUS, PLUS), - PREC(PRECEDENCE_LEFT_ASSOC, TIMES), + PREC(PRECEDENCE_LEFT_ASSOC, TIMES, USE_PROD(5)), PREC(PRECEDENCE_LEFT_ASSOC, LPAREN, RPAREN), }; diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c index b0cbf00..6e5d2d5 100644 --- a/demos/sample-files/calc-skeleton.c +++ b/demos/sample-files/calc-skeleton.c @@ -1,30 +1,57 @@ #include <stdio.h> #include <string.h> +#include <stdint.h> #include <ctype.h> -#include "lr-parser.c" -#include "bin/a.c" // generated - -// these should come from a generated -// header file by the parser generator -#include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; +// generated +#include "bin/calc.h" +#include "bin/calc.c" +#include "parts/toklist.h" static struct token { symbol s; int v; } tok; +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return (intptr_t)t->v; } + +static char *input; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + input = next_token(argv[1]); + + intptr_t value; + if(lr_parser(&value)) return 1; + + printf("INPUT: '%s'\n", argv[1]); + printf("OUTPUT: %jd\n", value); + + return 0; +} + +// LEXER + static inline int issep(char c) { - return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*';; + return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '>' || c == '<' || c == '=' || c == '?' || c == ':'; } static inline int tillsep(char *str) @@ -59,6 +86,11 @@ static char *next_token(char *str) case '-': tok.s = MINUS; break; case '+': tok.s = PLUS; break; case '*': tok.s = TIMES; break; + case '>': tok.s = MORE; break; + case '<': tok.s = LESS; break; + case '=': tok.s = EQUA; break; + case '?': tok.s = QMARK; break; + case ':': tok.s = COLON; break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; @@ -68,32 +100,3 @@ static char *next_token(char *str) return str+off; } - -static char *input; - -symbol token_sym(struct token *t) { return t->s; } -int token_val(struct token *t) { return t->v; } - -struct token *toklist_eat() -{ - static struct token t; - t = tok; - input = next_token(input); - return &t; -} -struct token *toklist_peek() { return &tok; } - -int main(int argc, char **argv) -{ - if(argc != 2) return 1; - - input = next_token(argv[1]); - - int value; - if(lr_parser(&value)) return 1; - - printf("INPUT: '%s'\n", argv[1]); - printf("OUTPUT: %d\n", value); - - return 0; -} diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c new file mode 100644 index 0000000..733a866 --- /dev/null +++ b/demos/sample-files/gram-defs.c @@ -0,0 +1,65 @@ +#include "util/util.h" +#define SYMBOLS(X) \ + X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \ + X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ + \ + X(Sp) X(S) X(Slist) X(Prod) X(Prec) \ + X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \ + X(SYMBOLS_END) \ + +#include "parts/symbol.h" +enum symbol { SYMBOLS(X_TO_ENUM) }; +size_t total_symbols = SYMBOLS_END; + +char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +#define GRAMMAR_ACTION_DEF(X) \ + X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \ + X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \ + X(PROD(Slist, -->, S, DOT), "v = 0;") \ + X(PROD(S, -->, Prod), "v = 0;") \ + X(PROD(S, -->, Prec), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN), "v = 0;") \ + X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \ + X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \ + X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN), "v = 0;") \ + X(PROD(IorN, -->, IDEN), "v = 0;") \ + X(PROD(IorN, -->, NUM), "v = 0;") + +#define X_GRAMMAR(G, A) G, +#define X_ACTION(G, A) A, + +static struct production _grammar[] = { + GRAMMAR_ACTION_DEF(X_GRAMMAR) +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar) / sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + GRAMMAR_ACTION_DEF(X_ACTION) +}; + +#include "parts/precedence.h" +struct precedence_def { + int flag; + int *list; + size_t nlist; +}; +struct precedence_def *precedence_defs = NULL; +size_t nprecedence_defs = 0; diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c new file mode 100644 index 0000000..89ef6b4 --- /dev/null +++ b/demos/sample-files/gram-skeleton.c @@ -0,0 +1,149 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#define ARENA_IMPLEMENTATION +#include "util/arena.h" + +static char buf[1024]; +static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf)); +static void *xalloc(size_t sz) { + void *addr = arena_allocate(&global_arena, sz); + if(!addr) { + fprintf(stderr, "ERROR: Arena empty\n"); exit(1); + } + + return addr; +} + +// generated +#include "bin/gram.h" +#include "bin/gram.c" + +#include "parts/toklist.h" +struct token { + symbol s; + intptr_t v; +} tok; + +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return t->v; } + +static char *input = (char []){ + "-left B;" + "-right C;" + "-left D;" + "" + "A: B {a}" + " | C N {d}." +}; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(void) +{ + input = next_token(input); + + intptr_t value; + if(lr_parser(&value)) { + return 1; + } + + printf("OUTPUT: %jd\n", value); + return 0; +} + +// STR UTIL + +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str) +{ + return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +// LEXER + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == ':' || c == '|' || c == ';' || c == '.' || c == '-' || c == '{'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static char *next_token(char *str) +{ + if(!str) return str; + + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case ':': tok.s = COLON; break; + case '|': tok.s = PIPE; break; + case ';': tok.s = SEMICOL; break; + case '.': tok.s = DOT; break; + case '-': + off = tillsep(++str); + char *s = substring(str, off); + if(strcmp(s, "left") == 0) tok.s = D_LEFT; + else if(strcmp(s, "right") == 0) tok.s = D_RIGHT; + else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL; + else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM; + else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } + break; + case '{': + for(int c = 1; c != 0; off++) + if(str[off] == '\0') { fprintf(stderr, "ERROR: No closing '{'\n"); goto fail; } + else if(str[off] == '{') c++; + else if(str[off] == '}') c--; + tok.s = ACTION; + tok.v = (intptr_t)strdup(substring(str, off)); + break; + } + } else if(isalpha(c0)) { // iden or named symbol + tok.s = IDEN; + tok.v = (intptr_t)strdup(substring(str, off)); + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = (intptr_t)atoi(substring(str, off)); + } + } + + return str+off; + +fail: + tok.s = END_INPUT; + return NULL; +} |
