From 245f4b65cc695bf3d0dd430e2c728f2a3a51e3e5 Mon Sep 17 00:00:00 2001 From: kartofen Date: Tue, 17 Jun 2025 15:15:21 +0300 Subject: add recursive ascent-descent --- build.sh | 2 + recursive-ascent-descent.c | 197 ++++++++++++++++++++++++ recursive-ascent.c | 367 +++++++++++++++++++++++++++++---------------- 3 files changed, 436 insertions(+), 130 deletions(-) create mode 100644 recursive-ascent-descent.c diff --git a/build.sh b/build.sh index 0a69ce7..52da4b0 100755 --- a/build.sh +++ b/build.sh @@ -6,4 +6,6 @@ gcc -Wall -Wextra -g lexer.c -o lexer gcc -Wall -Wextra -g recursive-ascent.c -o recursive-ascent gcc -Wall -Wextra -g recursive-ascent-descent.c -o recursive-ascent-descent +valgrind --leak-check=full --show-leak-kinds=all -s ./lexer +valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent-descent diff --git a/recursive-ascent-descent.c b/recursive-ascent-descent.c new file mode 100644 index 0000000..628540d --- /dev/null +++ b/recursive-ascent-descent.c @@ -0,0 +1,197 @@ +#include // printf(char *, ...) +#include // exit(int) + +/* + * For grammar: (https://en.wikipedia.org/wiki/Recursive_ascent_parser) + * (R1) expr : expr _ '+' term { $$ = $1 + $3; } + * (R2) | expr _ '-' term { $$ = $1 - $3; } + * (R3) | _ term { $$ = $1; } + * ; + * + * (R4) term : _ '(' expr ')' { $$ = $2; } + * (R5) | _ num { $$ = $1; } + * ; + * + * (R6) num : _ '0' { $$ = 0; } + * (R7) | _ '1' { $$ = 1; } + * ; + */ + +#pragma GCC diagnostic ignored "-Wunused-value" +#pragma GCC diagnostic ignored "-Wunused-parameter" + +#define DIE(...) (printf("ERROR %s:%d\n", __FILE__, __LINE__), __VA_OPT__(exit(__VA_ARGS__),) exit(1), 1) + +enum nonterminal { INVAILID, EXPR, TERM, NUM }; +struct result { enum nonterminal nonterm; int value; int depth; char *s; }; + +struct args { int args[2]; }; +#define new_arg(...) (struct args){{__VA_ARGS__}} +#define get_arg(a, n) ((a).args[n]) + +// top-down +struct result rule0(char *s, struct args a); +struct result rule1(char *s, struct args a); +struct result rule2(char *s, struct args a); +struct result rule3(char *s, struct args a); +struct result rule4(char *s, struct args a); +struct result rule5(char *s, struct args a); +struct result rule6(char *s, struct args a); +struct result rule7(char *s, struct args a); + +// bottom-up +struct result accept(char *s, struct args a); +struct result expr(char *s, struct args a); +struct result expr2(char *s, struct args a); +struct result term(char *s, struct args a); +struct result num(char *s, struct args a); + +int main(void) +{ + char *s = "1-((0-1)+1+1+1+1+1)-0+1-(((1)))+1+1+1-1+0+(1-0+1)-1-0"; + struct result r = accept(s, new_arg(0)); + printf("%s = %d\n", s, r.value); + return 0; +} + +char *eat(char *s, char c) +{ + if(*s == c) + return s + 1; + else DIE(); +} + +struct result rule0(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r = expr(r.s, a); + r.s = eat(r.s, '\0'); + return r; +} + +struct result rule1(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r.s = eat(r.s, '+'); + r = term(r.s, a); + return (struct result){EXPR, get_arg(a, 0) + r.value, 0, r.s}; +} + +struct result rule2(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r.s = eat(r.s, '-'); + r = term(r.s, a); + return (struct result){EXPR, get_arg(a, 0) - r.value, 0, r.s}; +} + +struct result rule3(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r = term(r.s, a); + return (struct result){EXPR, r.value, 0, r.s}; +} + +struct result rule4(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r.s = eat(r.s, '('); + r = expr(r.s, a); + r.s = eat(r.s, ')'); + return (struct result){EXPR, r.value, 1, r.s}; +} + +struct result rule5(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r = num(r.s, a); + return (struct result){TERM, r.value, 0, r.s}; +} + +struct result rule6(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r.s = eat(r.s, '0'); + return (struct result){NUM, 0, 0, r.s}; +} + +struct result rule7(char *s, struct args a) +{ + struct result r = {0, 0, 0, s}; + r.s = eat(r.s, '1'); + return (struct result){NUM, 1, 0, r.s}; +} + +struct result accept(char *s, struct args a) +{ + struct result r = {0}; + switch(*s) { + case '(': + case '0': + case '1': r = rule0(s, a); break; + default: DIE(); + } + + return r; +} + +struct result expr(char *s, struct args a) +{ + struct result r = {0}; + switch(*s) { + case '(': + case '0': + case '1': r = rule3(s, a); break; + default: DIE(); + } + + struct result r2 = r; + while(r2.depth == 0) { + r = r2; + switch(r.nonterm) { + case EXPR: r2 = expr2(r.s, new_arg(r.value)); break; + default: DIE(); + } + } + + r.depth--; + return r; +} + +struct result expr2(char *s, struct args a) +{ + struct result r = {0}; + switch(*s) { + case '+': r = rule1(s, a); break; + case '-': r = rule2(s, a); break; + default: r.depth = 1; + } + + return r; +} + +struct result term(char *s, struct args a) +{ + struct result r = {0}; + switch(*s) { + case '(': r = rule4(s, a); break; + case '0': r = rule5(s, a); break; + case '1': r = rule5(s, a); break; + default: DIE(); + } + + return r; +} + +struct result num(char *s, struct args a) +{ + struct result r = {0}; + switch(*s) { + case '0': r = rule6(s, a); break; + case '1': r = rule7(s, a); break; + default: DIE(); + } + + return r; +} + diff --git a/recursive-ascent.c b/recursive-ascent.c index 8020f7e..7f69e57 100644 --- a/recursive-ascent.c +++ b/recursive-ascent.c @@ -1,115 +1,155 @@ -// file: rad_mixed_parser.c -// Recursive Ascent–Descent parser for: -// -// expr : expr '+' term -// | expr '-' term -// | term -// -// term : '(' expr ')' -// | num -// -// num : '0' | '1' -// -// We implement: -// - A miniature LALR(1) ascent machine *just* for expr (+ and –). -// - A recursive‑descent helper for term/num. -// - No hand‑unrolling of left recursion! - #include #include -#define DIE() do { \ - fprintf(stderr, "Parse error at %s:%d\n", __FILE__, __LINE__); \ - exit(1); \ -} while (0) +/* + * For grammar: (https://en.wikipedia.org/wiki/Recursive_ascent_parser) + * expr : expr '+' term { $$ = $1 + $3; } + * | expr '-' term { $$ = $1 - $3; } + * | term { $$ = $1; } + * ; + * + * term : '(' expr ')' { $$ = $2; } + * | num { $$ = $1; } + * ; + * + * num : '0' { $$ = 0; } + * | '1' { $$ = 1; } + * ; + */ -enum nonterminal { EXPR, TERM, NUM }; +#pragma GCC diagnostic ignored "-Wunused-value" -struct result { - enum nonterminal nonterm; - int value; - int depth; - char *s; -}; +#define DIE(...) (printf("ERROR %s:%d\n", __FILE__, __LINE__), __VA_OPT__(exit(__VA_ARGS__),) exit(1), 1) -struct args { int args[2]; }; -#define arg(...) (struct args){{__VA_ARGS__}} +enum nonterminal { EXPR, TERM, NUM }; +struct result { enum nonterminal nonterm; int value; int depth; char *s; }; + +struct args { int args[2]; }; +#define new_arg(...) (struct args){{__VA_ARGS__}} +#define get_arg(a, n) ((a).args[n]) -// forward declarations of ascent states for expr struct result state0(char *s, struct args a); +struct result state1(char *s, struct args a); +struct result state2(char *s, struct args a); +struct result state3(char *s, struct args a); struct result state4(char *s, struct args a); struct result state5(char *s, struct args a); +struct result state6(char *s, struct args a); struct result state7(char *s, struct args a); struct result state8(char *s, struct args a); struct result state9(char *s, struct args a); struct result state10(char *s, struct args a); +struct result state11(char *s, struct args a); struct result state12(char *s, struct args a); struct result state13(char *s, struct args a); -// --- Recursive‑descent helper for term/num ------------------------------ +int main(void) +{ + char *s = "1-((0-1)+1+1+1+1+1)-0+1-(((1)))+1+1+1-1+0+(1-0+1)-1-0"; + struct result r = state0(s, new_arg(0)); + printf("%s = %d\n", s, r.value); + return 0; +} -// Recognize exactly one `term` ( '(' expr ')' or '0' | '1' ). -// Return the updated pointer and write the semantic value into *out. -static char * -descend_term(char *s, int *out) +/* + * 0 $accept: . expr $end + * + * '(' shift, and go to state 1 + * '0' shift, and go to state 2 + * '1' shift, and go to state 3 + * + * expr go to state 4 + * term go to state 5 + * num go to state 6 + */ +struct result state0(char *s, struct args a) { - if (*s == '(') { - s++; // consume '(' - // call back into the ascent machine for the full expr - struct result r = state0(s, arg(0)); - // drive that machine to its final EXPR reduction - while (r.depth == 0) { - switch (r.nonterm) { - case EXPR: r = state4(r.s, arg(r.value)); break; - default: DIE(); - } - } - // now r.nonterm==EXPR, r.depth==2, r.s points just past all of expr - r.depth--; // pop the final accept - if (*r.s != ')') DIE(); - *out = r.value; - return r.s + 1; - } - else if (*s == '0' || *s == '1') { - *out = (*s - '0'); - return s + 1; + struct result r = {0}; + switch(*s) { + case '(': r = state1(s+1, a); break; + case '0': r = state2(s+1, a); break; + case '1': r = state3(s+1, a); break; + default: DIE(); } - else { - DIE(); + + while(r.depth == 0) { + switch(r.nonterm) { + case EXPR: r = state4(r.s, new_arg(r.value)); break; + case TERM: r = state5(r.s, new_arg(r.value)); break; + case NUM: r = state6(r.s, new_arg(r.value)); break; + default: DIE(); + } } - return s; // unreachable -} -// --- Ascent state machine for expr (“+” and “–” only) ------------------ + r.depth--; + return r; +} -// state0: $accept: . expr $end -// dispatch to shift into the left‑recursive expr machine -struct result state0(char *s, struct args a) +/* + * 4 term: '(' . expr ')' + * + * '(' shift, and go to state 1 + * '0' shift, and go to state 2 + * '1' shift, and go to state 3 + * + * expr go to state 7 + * term go to state 5 + * num go to state 6 + */ +struct result state1(char *s, struct args a) { struct result r = {0}; - // Instead of shifting '(' or '0','1' directly, - // we “descend” for the initial term, then reduce to EXPR: - { - int t; - s = descend_term(s, &t); - r = (struct result){ TERM, t, 0, s }; + switch(*s) { + case '(': r = state1(s+1, a); break; + case '0': r = state2(s+1, a); break; + case '1': r = state3(s+1, a); break; + default: DIE(); } - // now fold up into EXPR until we reach a non‑term reduction - while (r.depth == 0) { - switch (r.nonterm) { - case EXPR: r = state4(r.s, arg(r.value)); break; - case TERM: r = state5(r.s, arg(r.value)); break; - default: DIE(); + + while(r.depth == 0) { + switch(r.nonterm) { + case EXPR: r = state7(r.s, new_arg(r.value)); break; + case TERM: r = state5(r.s, new_arg(r.value)); break; + case NUM: r = state6(r.s, new_arg(r.value)); break; + default: DIE(); } } + r.depth--; return r; } -// state4: 0 $accept: expr . $end -// 1 expr: expr . '+' term -// 2 expr: expr . '-' term -// on '+' → state9, on '-' → state10, on '\0' → state8 +/* + * 6 num: '0' . + * + * $default reduce using rule 6 (num) + */ +struct result state2(char *s, struct args a) +{ + (void)a; + return (struct result){NUM, 0, 0, s}; +} + +/* + * 7 num: '1' . + * + * $default reduce using rule 7 (num) + */ +struct result state3(char *s, struct args a) +{ + (void)a; + return (struct result){NUM, 1, 0, s}; +} + +/* + * 0 $accept: expr . $end + * 1 expr: expr . '+' term + * 2 | expr . '-' term + * + * $end shift, and go to state 8 + * '+' shift, and go to state 9 + * '-' shift, and go to state 10 + */ struct result state4(char *s, struct args a) { struct result r = {0}; @@ -117,93 +157,160 @@ struct result state4(char *s, struct args a) case '\0': r = state8(s+1, a); break; case '+': r = state9(s+1, a); break; case '-': r = state10(s+1, a); break; - default: DIE(); + default: DIE(); } - if (r.depth == 0) DIE(); + + (r.depth == 0) && DIE(); + r.depth--; return r; } -// state5: 3 expr: term . -// reduce term→expr +/* + * 3 expr: term . + * + * $default reduce using rule 3 (expr) + */ struct result state5(char *s, struct args a) { - return (struct result){ EXPR, a.args[0], 0, s }; + return (struct result){EXPR, get_arg(a, 0), 0, s}; } -// state7: after a nested expr in parentheses -// same as state4 but also handles ')' +/* + * 5 term: num . + * + * $default reduce using rule 5 (term) + */ +struct result state6(char *s, struct args a) +{ + return (struct result){TERM, get_arg(a, 0), 0, s}; +} + +/* + * 1 expr: expr . '+' term + * 2 | expr . '-' term + * 4 term: '(' expr . ')' + * + * '+' shift, and go to state 9 + * '-' shift, and go to state 10 + * ')' shift, and go to state 11 + */ struct result state7(char *s, struct args a) { struct result r = {0}; switch(*s) { case '+': r = state9(s+1, a); break; case '-': r = state10(s+1, a); break; - case ')': r = state8(s+1, a); break; // fold up to expr→term inside () - default: DIE(); + case ')': r = state11(s+1, a); break; + default: DIE(); } - if (r.depth == 0) DIE(); + + (r.depth == 0) && DIE(); + r.depth--; return r; } -// state8: 0 $accept: expr $end . -// accept +/* + * 0 $accept: expr $end . + * + * $default accept + */ struct result state8(char *s, struct args a) { - return (struct result){ EXPR, a.args[0], 2, s }; + return (struct result){EXPR, a.args[0], 2, s}; } -// state9: 1 expr: expr '+' . term -// shift '+' then descend for *one* term, then reduce via state12 +/* + * 1 expr: expr '+' . term + * + * '(' shift, and go to state 1 + * '0' shift, and go to state 2 + * '1' shift, and go to state 3 + * + * term go to state 12 + * num go to state 6 + */ struct result state9(char *s, struct args a) { - int t; - s = descend_term(s, &t); - return state12(s, arg(a.args[0], t)); + struct result r = {0}; + switch(*s) { + case '(': r = state1(s+1, a); break; + case '0': r = state2(s+1, a); break; + case '1': r = state3(s+1, a); break; + default: DIE(); + } + + while(r.depth == 0) { + switch(r.nonterm) { + case TERM: r = state12(r.s, new_arg(r.value, get_arg(a, 0))); break; + case NUM: r = state6(r.s, new_arg(r.value)); break; + default: DIE(); + } + } + + r.depth--; + return r; } -// state10: 2 expr: expr '-' . term -// same as state9 but for '-' +/* + * 2 expr: expr '-' . term + * + * '(' shift, and go to state 1 + * '0' shift, and go to state 2 + * '1' shift, and go to state 3 + * + * term go to state 13 + * num go to state 6 + */ struct result state10(char *s, struct args a) { - int t; - s = descend_term(s, &t); - return state13(s, arg(a.args[0], t)); + struct result r = {0}; + switch(*s) { + case '(': r = state1(s+1, a); break; + case '0': r = state2(s+1, a); break; + case '1': r = state3(s+1, a); break; + default: DIE(); + } + + while(r.depth == 0) { + switch(r.nonterm) { + case TERM: r = state13(r.s, new_arg(get_arg(a, 0), r.value)); break; + case NUM: r = state6(r.s, new_arg(r.value)); break; + default: DIE(); + } + } + + r.depth--; + return r; } -// state12: 1 expr: expr '+' term . -// reduce -struct result state12(char *s, struct args a) +/* + * 4 term: '(' expr ')' . + * + * $default reduce using rule 4 (term) + */ +struct result state11(char *s, struct args a) { - return (struct result){ EXPR, a.args[0] + a.args[1], 2, s }; + return (struct result){TERM, get_arg(a, 0), 2, s}; } -// state13: 2 expr: expr '-' term . -// reduce -struct result state13(char *s, struct args a) +/* + * 1 expr: expr '+' term . + * + * $default reduce using rule 1 (expr) + */ +struct result state12(char *s, struct args a) { - return (struct result){ EXPR, a.args[0] - a.args[1], 2, s }; + return (struct result){EXPR, get_arg(a, 0) + get_arg(a, 1), 2, s}; } -// --- main entry point --------------------------------------------------- - -int main(void) +/* + * 2 expr: expr '-' term . + * + * $default reduce using rule 2 (expr) + */ +struct result state13(char *s, struct args a) { - const char *input = "1+((0-1)+1)"; - struct result r = state0((char*)input, arg(0)); - - // finish driving the ascent machine if needed - while (r.depth == 0) { - switch (r.nonterm) { - case EXPR: r = state4(r.s, arg(r.value)); break; - default: DIE(); - } - } - if (r.nonterm != EXPR || r.depth != 2 || *r.s != '\0') { - DIE(); - } - - printf("result = %d\n", r.value); - return 0; + return (struct result){EXPR, get_arg(a, 0) - get_arg(a, 1), 2, s}; } -- cgit v1.2.3