From d028cc9c04cf46256166434bdea68d5f5c6d310f Mon Sep 17 00:00:00 2001 From: kartofen Date: Tue, 8 Jul 2025 18:25:29 +0300 Subject: simple calculator example --- build.sh | 51 +++++++++----- demos/generate-parser.c | 2 +- demos/lexer.c | 116 +++++++++++++++++++++++++++++++ demos/sample-files/arithmetic-skeleton.c | 62 +++++++++++++++++ demos/sample-files/calc-defs.c | 44 ++++++++++++ demos/sample-files/calc-skeleton.c | 100 ++++++++++++++++++++++++++ demos/sample-files/parser-skeleton.c | 52 -------------- lexer.c | 85 ---------------------- lr-parser.c | 60 +++++++++++----- parts/table.h | 2 +- parts/toklist.h | 8 +-- 11 files changed, 401 insertions(+), 181 deletions(-) create mode 100644 demos/lexer.c create mode 100644 demos/sample-files/arithmetic-skeleton.c create mode 100644 demos/sample-files/calc-defs.c create mode 100644 demos/sample-files/calc-skeleton.c delete mode 100644 demos/sample-files/parser-skeleton.c delete mode 100644 lexer.c diff --git a/build.sh b/build.sh index 91c7fdc..9f1899b 100755 --- a/build.sh +++ b/build.sh @@ -27,40 +27,53 @@ function leak log valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 $2 } -cc dict -D_DICT_STANDALONE -cc lexer -D_LEXER_STANDALONE -# cc recursive/recursive-ascent -# cc recursive/recursive-ascent-descent -# cc util-tables -D_UTIL_TABLES_STANDALONE -# cc slr-table -D_SLR_TABLE_STANDALONE -# cc clr-table -D_CLR_TABLE_STANDALONE -# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" lalr-table -# cc lr-parser -D_LR_PARSER_STANDALONE -# cc demos/instant-parser +# cc dict -D_DICT_STANDALONE +# leak dict +# cc demos/lexer -D_LEXER_STANDALONE +# leak lexer -leak lexer -leak dict # leak recursive-ascent +# cc recursive/recursive-ascent + # leak recursive-ascent-descent +# cc recursive/recursive-ascent-descent + +# cc util-tables -D_UTIL_TABLES_STANDALONE # leak util-tables + +# cc slr-table -D_SLR_TABLE_STANDALONE # leak slr-table + +# cc clr-table -D_CLR_TABLE_STANDALONE # leak clr-table + +# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" lalr-table # leak lalr-table + +# cc lr-parser -D_LR_PARSER_STANDALONE # leak lr-parser -# leak instant-parser + +# cc demos/instant-parser # not working +# leak instant-parser # not working #--------------------------------------------------------------------------------------------------# cc demos/generate-parser "-rdynamic" -shared demos/sample-files/lalr-defs -shared demos/sample-files/arithmetic-defs - shared slr-table shared clr-table shared clr-table -D_LAZY_LALR lalr-table +shared demos/sample-files/lalr-defs + +# --- Arithemitc example --- +# shared demos/sample-files/arithmetic-defs +# leak "generate-parser -t lalr-table bin/arithmetic-defs.so" +# cc demos/sample-files/arithmetic-skeleton "" parser +# leak parser "0-1+(1+0)-1+0" -leak "generate-parser -t lalr-table bin/arithmetic-defs.so" -cc demos/sample-files/parser-skeleton "" parser # this includes bin/generated.c -leak parser "0-1+(1+0)-1+0" +# --- Calc example --- +shared demos/sample-files/calc-defs +leak "generate-parser -t lalr-table bin/calc-defs.so" +cc demos/sample-files/calc-skeleton "" parser +leak parser "4237-100+(17498+70)-1321+82910" diff --git a/demos/generate-parser.c b/demos/generate-parser.c index 7856db6..fad3b93 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -66,7 +66,7 @@ char *add_extension(char *str, char *ext) void set_stdout(char *filename) { if(!filename) filename = "/dev/tty"; - assert(freopen(filename, "a+", stdout)); + assert(freopen(filename, "w", stdout)); } int main(int argc, char **argv) diff --git a/demos/lexer.c b/demos/lexer.c new file mode 100644 index 0000000..a206066 --- /dev/null +++ b/demos/lexer.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include + +#include "dict.c" + +struct token { + enum symbol { + LPAREN, RPAREN, STRING, IDEN, NUM, + HERE, THERE, WOW, TEST + } sym; + + union { + char *iden; + int i; + char *str; + }; +}; + +const struct string_token strings[] = { + {"here", HERE}, + {"there", THERE}, + {"wow", WOW}, + {"test", TEST}, +}; +const size_t nstrings = sizeof(strings)/sizeof(*strings); + +const uint8_t char_to_bit[256] = { + ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, + ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, + ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, + ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, + ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 +}; + +static struct token tok; + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static char *next_token(char *str) +{ + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') return NULL; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case '{': tok.sym = LPAREN; break; + case '}': tok.sym = RPAREN; break; + case '"': + while(str[off++] != '"') if(str[off] == '\0') return NULL; + tok.sym = STRING; + tok.str = strdup(substring(str+1, off-2)); + } + } else if(isalpha(c0)) { // iden or named symbol + char *substr = substring(str, off); + if((tok.sym = dict_check(substr)) == -1) { + tok.sym = IDEN; + tok.iden = strdup(substr); + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.sym = NUM; + tok.i = atoi(substring(str, off)); + } + } + + return str+off; +} + +int main(void) +{ + dict_compile(); + + char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here {therern{there{tok {wow} {"; + while((str = next_token(str))) + switch(tok.sym) { + case LPAREN: printf("{ "); break; + case RPAREN: printf("} "); break; + case STRING: printf("\"%s\" ", tok.str); free(tok.str); break; + case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break; + case NUM: printf("%d ", tok.i); break; + case HERE: printf("HERE "); break; + case THERE: printf("THERE "); break; + case WOW: printf("WOW "); break; + case TEST: printf("TEST "); break; + default: printf("WHAT??%d??", tok.sym); break; + } + + printf("\n"); + + dict_free(); + return 0; +} diff --git a/demos/sample-files/arithmetic-skeleton.c b/demos/sample-files/arithmetic-skeleton.c new file mode 100644 index 0000000..ef5ec2f --- /dev/null +++ b/demos/sample-files/arithmetic-skeleton.c @@ -0,0 +1,62 @@ +#include ae +#include + +#include "lr-parser.c" +#include "bin/generated.c" + +#include "parts/toklist.h" + +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + N0, N1, + END_INPUT, + + EP, E, T, N, + SYMBOLS_END, +}; + +struct token { + symbol s; +}; + +static inline struct token *char_to_token(char c) +{ + static struct token t; + + switch(c) { + case '+': t = (struct token){PLUS}; break; + case '-': t = (struct token){MINUS}; break; + case '(': t = (struct token){LPAREN}; break; + case ')': t = (struct token){RPAREN}; break; + case '0': t = (struct token){N0}; break; + case '1': t = (struct token){N1}; break; + case 0 : t = (struct token){END_INPUT}; break; + default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1); + } + + return &t; +} + +static char *input; + +symbol token_sym(struct token *t) { return t->s; } +int token_val(struct token *t) { return 0; } +struct token *toklist_eat() { return char_to_token(*(input++)); } // unsafe +struct token *toklist_peek() { return char_to_token(*input); } // unsafe + +int main(int argc, char **argv) +{ + if(argc != 2) { + fprintf(stderr, "ERROR: Not enough arguments\n"); + return 1; + } + + input = argv[1]; + + printf("INPUT: '%s'\n", input); + printf("OUTPUT: %d\n", lr_parser()); + return 0; +} diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c new file mode 100644 index 0000000..de1f705 --- /dev/null +++ b/demos/sample-files/calc-defs.c @@ -0,0 +1,44 @@ +#include + +#include "parts/symbol.h" +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + NUM, + END_INPUT, + + EP, E, T, + SYMBOLS_END, +}; + +size_t total_symbols = SYMBOLS_END; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +static struct production _grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, E, PLUS, T), + PROD(E, -->, E, MINUS, T), + PROD(E, -->, T), + PROD(T, -->, LPAREN, E, RPAREN), + PROD(T, -->, NUM), +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + "v = A(0);", + "v = A(0) + A(2);", + "v = A(0) - A(2);", + "v = A(0);", + "v = A(1);", + "v = A(0);", +}; diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c new file mode 100644 index 0000000..29f181b --- /dev/null +++ b/demos/sample-files/calc-skeleton.c @@ -0,0 +1,100 @@ +#include +#include +#include + +#include "lr-parser.c" +#include "bin/a.c" + +// from calc-defs.c +#include "parts/symbol.h" +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + NUM, + END_INPUT, + + EP, E, T, + SYMBOLS_END, +}; + +static struct token { + symbol s; + int v; +} tok; + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static char *next_token(char *str) +{ + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case '(': tok.s = LPAREN; break; + case ')': tok.s = RPAREN; break; + case '-': tok.s = MINUS; break; + case '+': tok.s = PLUS; break; + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = atoi(substring(str, off)); + } + } + + return str+off; +} + +static char *input; + +symbol token_sym(struct token *t) { return t->s; } +int token_val(struct token *t) { return t->v; } + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} +struct token *toklist_peek() { return &tok; } + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + input = next_token(argv[1]); + + int value; + if(lr_parser(&value)) return 1; + + printf("INPUT: '%s'\n", argv[1]); + printf("OUTPUT: %d\n", value); + + return 0; +} diff --git a/demos/sample-files/parser-skeleton.c b/demos/sample-files/parser-skeleton.c deleted file mode 100644 index f601369..0000000 --- a/demos/sample-files/parser-skeleton.c +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include - -#include "lr-parser.c" -#include "bin/generated.c" - -#include "parts/toklist.h" - -enum symbol { - PLUS = 0, - MINUS, - LPAREN, - RPAREN, - N0, N1, - END_INPUT, - - EP, E, T, N, - SYMBOLS_END, -}; - -static inline symbol char_to_token(char c) -{ - switch(c) { - case '+': return PLUS; - case '-': return MINUS; - case '(': return LPAREN; - case ')': return RPAREN; - case '0': return N0; - case '1': return N1; - case 0 : return END_INPUT; - default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1); - } -} - -static char *input; - -symbol toklist_eat() { return char_to_token(*(input++)); } // unsafe -symbol toklist_peek() { return char_to_token(*input); } // unsafe - -int main(int argc, char **argv) -{ - if(argc != 2) { - fprintf(stderr, "ERROR: Not enough arguments\n"); - return 1; - } - - input = argv[1]; - - printf("INPUT: '%s'\n", input); - printf("OUTPUT: %d\n", lr_parser()); - return 0; -} diff --git a/lexer.c b/lexer.c deleted file mode 100644 index 7ebf2e7..0000000 --- a/lexer.c +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include -#include - -struct token { - enum symbol { - LPAREN, RPAREN, STRING, IDEN, NUM - } sym; - - union { - char *iden; - int i; - char *str; - }; -}; - -static struct token tok; - -static inline int issep(char c) -{ - return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"'; -} - -static inline int tillsep(char *str) -{ - size_t i = 0; - while(!issep(str[i++])); - return i-1; -} - -static inline char *substring(char *str, size_t sub_end) -{ - static char sub[128]; - if(sub_end+1 > sizeof(sub)) return NULL; - - sub[sub_end+1] = '\0'; - return memcpy(sub, str, sub_end); -} - -static char *next_token(char *str) -{ - size_t off = 0; - char c0 = str[0]; - - if(c0 == '\0') return NULL; - if(isspace(c0)) return next_token(str+1); - else { - off = tillsep(str); - if(off == 0) { // sep - switch(str[off++]) { - case '{': tok.sym = LPAREN; break; - case '}': tok.sym = RPAREN; break; - case '"': - while(str[off] != '"') if(str[off++] == '\0') return NULL; - tok.sym = STRING; - tok.str = strdup(substring(str, off)); - } - } else if(isalpha(c0)) { // iden - tok.sym = IDEN; - tok.iden = strdup(substring(str, off)); - } else if(c0 >= '0' && c0 <= '9') { // num - tok.sym = NUM; - tok.i = atoi(substring(str, off)); - } - } - - return str+off; -} - -int main(void) -{ - char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here { {tok {"; - while((str = next_token(str))) - switch(tok.sym) { - case LPAREN: printf("{ "); break; - case RPAREN: printf("} "); break; - case STRING: printf("\"%s\" ", tok.str); free(tok.str); break; - case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break; - case NUM: printf("%d ", tok.i); break; - } - - printf("\n"); - return 0; -} diff --git a/lr-parser.c b/lr-parser.c index 0d244fd..ad7dae5 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -20,23 +20,25 @@ typedef int stack_item; static stack_item stack_bottom[STACK_CAP]; static stack_item *stack_head = stack_bottom; -int lr_parser() +int lr_parser(int *value) { -#define push(item) ({ if(++stack_head - stack_bottom < STACK_CAP ) (*stack_head = item); else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; }}) +#define push(item) do { \ + if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \ + else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; } \ + } while(0) #define pop() (--stack_head) #define eat() toklist_eat() -#define peek() ({ \ - symbol s = toklist_peek(); \ - if(!symbol_is_valid(s)) { fprintf(stderr, "ERROR: Unknown token '%d'\n", s); return 1;} \ - s; }) +#define peek() toklist_peek() while(1) { - struct action a = table[(size_t)*stack_head][peek()]; + struct action a = table[(size_t)*stack_head][token_sym(peek())]; switch(a.type) { - case ACTION_SHIFT: - push(eat()); - push(-10000000); // the semantic action + case ACTION_SHIFT:; + struct token *t = eat(); + + push(token_sym(t)); + push(token_val(t)); push(a.arg); break; case ACTION_REDUCE: @@ -57,12 +59,13 @@ int lr_parser() push(a_goto.arg); break; case ACTION_ACCEPT: - return *(stack_head-1); + *value = *(stack_head-1); + return 0; case ACTION_NOT_SET: default: fprintf(stderr, - "ERROR: Unexpected token '%d' at state %zu\n", - peek(), (size_t)*stack_head); + "ERROR: Unexpected symbol '%d' at state %zu\n", + token_sym(peek()), (size_t)*stack_head); return 1; } } @@ -124,19 +127,38 @@ struct action **table = (struct action *([])){ size_t table_states = 13; -// implement toklist -static symbol toklist[] = {N0, PLUS, N1, END_INPUT}; -static symbol *tok = toklist; +// implement toklist.h +struct token { + symbol s; + int v; +}; + +static struct token toklist[] = {{N0}, {PLUS}, {N1}, {END_INPUT}}; +static const size_t ntoklist = sizeof(toklist)/sizeof(*toklist); -symbol toklist_eat() { return *(tok++); } // unsafe -symbol toklist_peek() { return *tok; } // unsafe +static size_t tok; + +struct token *toklist_eat() +{ + if(tok == ntoklist) return toklist + ntoklist-1; + return toklist + tok++; +} +struct token *toklist_peek() { return toklist + tok; } + +symbol token_sym(struct token *t) { return t->s; } +int token_val(struct token *t) { return t->v; } int none(int *stack_head) {(void)stack_head; return 0;} semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none}; int main(void) { - return lr_parser(); + int value; + + if(lr_parser(&value)) return 1; + printf("%d\n", value); + + return 0; } #endif diff --git a/parts/table.h b/parts/table.h index 486466f..fc63488 100644 --- a/parts/table.h +++ b/parts/table.h @@ -12,7 +12,7 @@ extern struct action { extern size_t table_states; -extern int (*table_fill)(); +extern int (*table_fill)(); // non-zero on fail extern void (*table_free)(); void table_print(); diff --git a/parts/toklist.h b/parts/toklist.h index f32cd25..760846f 100644 --- a/parts/toklist.h +++ b/parts/toklist.h @@ -5,10 +5,10 @@ struct token; -// /*extern*/ struct token *toklist_eat(); -// /*extern*/ struct token *toklist_peek(); +symbol token_sym(struct token *t); // UB for NULL +int token_val(struct token *t); // UB for NULL -symbol toklist_eat(); -symbol toklist_peek(); +struct token *toklist_eat(); // always non-NULL +struct token *toklist_peek(); // always non-NULL #endif -- cgit v1.2.3