diff options
Diffstat (limited to 'demos')
-rw-r--r-- | demos/generate-parser.c | 2 | ||||
-rw-r--r-- | demos/lexer.c | 116 | ||||
-rw-r--r-- | demos/sample-files/arithmetic-skeleton.c | 62 | ||||
-rw-r--r-- | demos/sample-files/calc-defs.c | 44 | ||||
-rw-r--r-- | demos/sample-files/calc-skeleton.c | 100 | ||||
-rw-r--r-- | demos/sample-files/parser-skeleton.c | 52 |
6 files changed, 323 insertions, 53 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c index 7856db6..fad3b93 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -66,7 +66,7 @@ char *add_extension(char *str, char *ext) void set_stdout(char *filename) { if(!filename) filename = "/dev/tty"; - assert(freopen(filename, "a+", stdout)); + assert(freopen(filename, "w", stdout)); } int main(int argc, char **argv) diff --git a/demos/lexer.c b/demos/lexer.c new file mode 100644 index 0000000..a206066 --- /dev/null +++ b/demos/lexer.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <string.h> + +#include "dict.c" + +struct token { + enum symbol { + LPAREN, RPAREN, STRING, IDEN, NUM, + HERE, THERE, WOW, TEST + } sym; + + union { + char *iden; + int i; + char *str; + }; +}; + +const struct string_token strings[] = { + {"here", HERE}, + {"there", THERE}, + {"wow", WOW}, + {"test", TEST}, +}; +const size_t nstrings = sizeof(strings)/sizeof(*strings); + +const uint8_t char_to_bit[256] = { + ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, + ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, + ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, + ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, + ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 +}; + +static struct token tok; + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static char *next_token(char *str) +{ + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') return NULL; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case '{': tok.sym = LPAREN; break; + case '}': tok.sym = RPAREN; break; + case '"': + while(str[off++] != '"') if(str[off] == '\0') return NULL; + tok.sym = STRING; + tok.str = strdup(substring(str+1, off-2)); + } + } else if(isalpha(c0)) { // iden or named symbol + char *substr = substring(str, off); + if((tok.sym = dict_check(substr)) == -1) { + tok.sym = IDEN; + tok.iden = strdup(substr); + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.sym = NUM; + tok.i = atoi(substring(str, off)); + } + } + + return str+off; +} + +int main(void) +{ + dict_compile(); + + char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here {therern{there{tok {wow} {"; + while((str = next_token(str))) + switch(tok.sym) { + case LPAREN: printf("{ "); break; + case RPAREN: printf("} "); break; + case STRING: printf("\"%s\" ", tok.str); free(tok.str); break; + case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break; + case NUM: printf("%d ", tok.i); break; + case HERE: printf("HERE "); break; + case THERE: printf("THERE "); break; + case WOW: printf("WOW "); break; + case TEST: printf("TEST "); break; + default: printf("WHAT??%d??", tok.sym); break; + } + + printf("\n"); + + dict_free(); + return 0; +} diff --git a/demos/sample-files/arithmetic-skeleton.c b/demos/sample-files/arithmetic-skeleton.c new file mode 100644 index 0000000..ef5ec2f --- /dev/null +++ b/demos/sample-files/arithmetic-skeleton.c @@ -0,0 +1,62 @@ +#include <stdio.h>ae +#include <stdlib.h> + +#include "lr-parser.c" +#include "bin/generated.c" + +#include "parts/toklist.h" + +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + N0, N1, + END_INPUT, + + EP, E, T, N, + SYMBOLS_END, +}; + +struct token { + symbol s; +}; + +static inline struct token *char_to_token(char c) +{ + static struct token t; + + switch(c) { + case '+': t = (struct token){PLUS}; break; + case '-': t = (struct token){MINUS}; break; + case '(': t = (struct token){LPAREN}; break; + case ')': t = (struct token){RPAREN}; break; + case '0': t = (struct token){N0}; break; + case '1': t = (struct token){N1}; break; + case 0 : t = (struct token){END_INPUT}; break; + default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1); + } + + return &t; +} + +static char *input; + +symbol token_sym(struct token *t) { return t->s; } +int token_val(struct token *t) { return 0; } +struct token *toklist_eat() { return char_to_token(*(input++)); } // unsafe +struct token *toklist_peek() { return char_to_token(*input); } // unsafe + +int main(int argc, char **argv) +{ + if(argc != 2) { + fprintf(stderr, "ERROR: Not enough arguments\n"); + return 1; + } + + input = argv[1]; + + printf("INPUT: '%s'\n", input); + printf("OUTPUT: %d\n", lr_parser()); + return 0; +} diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c new file mode 100644 index 0000000..de1f705 --- /dev/null +++ b/demos/sample-files/calc-defs.c @@ -0,0 +1,44 @@ +#include <stdio.h> + +#include "parts/symbol.h" +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + NUM, + END_INPUT, + + EP, E, T, + SYMBOLS_END, +}; + +size_t total_symbols = SYMBOLS_END; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +static struct production _grammar[] = { + PROD(EP, ->, E, END_INPUT), + PROD(E, -->, E, PLUS, T), + PROD(E, -->, E, MINUS, T), + PROD(E, -->, T), + PROD(T, -->, LPAREN, E, RPAREN), + PROD(T, -->, NUM), +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + "v = A(0);", + "v = A(0) + A(2);", + "v = A(0) - A(2);", + "v = A(0);", + "v = A(1);", + "v = A(0);", +}; diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c new file mode 100644 index 0000000..29f181b --- /dev/null +++ b/demos/sample-files/calc-skeleton.c @@ -0,0 +1,100 @@ +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "lr-parser.c" +#include "bin/a.c" + +// from calc-defs.c +#include "parts/symbol.h" +enum symbol { + PLUS = 0, + MINUS, + LPAREN, + RPAREN, + NUM, + END_INPUT, + + EP, E, T, + SYMBOLS_END, +}; + +static struct token { + symbol s; + int v; +} tok; + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static char *next_token(char *str) +{ + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case '(': tok.s = LPAREN; break; + case ')': tok.s = RPAREN; break; + case '-': tok.s = MINUS; break; + case '+': tok.s = PLUS; break; + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = atoi(substring(str, off)); + } + } + + return str+off; +} + +static char *input; + +symbol token_sym(struct token *t) { return t->s; } +int token_val(struct token *t) { return t->v; } + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} +struct token *toklist_peek() { return &tok; } + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + input = next_token(argv[1]); + + int value; + if(lr_parser(&value)) return 1; + + printf("INPUT: '%s'\n", argv[1]); + printf("OUTPUT: %d\n", value); + + return 0; +} diff --git a/demos/sample-files/parser-skeleton.c b/demos/sample-files/parser-skeleton.c deleted file mode 100644 index f601369..0000000 --- a/demos/sample-files/parser-skeleton.c +++ /dev/null @@ -1,52 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> - -#include "lr-parser.c" -#include "bin/generated.c" - -#include "parts/toklist.h" - -enum symbol { - PLUS = 0, - MINUS, - LPAREN, - RPAREN, - N0, N1, - END_INPUT, - - EP, E, T, N, - SYMBOLS_END, -}; - -static inline symbol char_to_token(char c) -{ - switch(c) { - case '+': return PLUS; - case '-': return MINUS; - case '(': return LPAREN; - case ')': return RPAREN; - case '0': return N0; - case '1': return N1; - case 0 : return END_INPUT; - default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1); - } -} - -static char *input; - -symbol toklist_eat() { return char_to_token(*(input++)); } // unsafe -symbol toklist_peek() { return char_to_token(*input); } // unsafe - -int main(int argc, char **argv) -{ - if(argc != 2) { - fprintf(stderr, "ERROR: Not enough arguments\n"); - return 1; - } - - input = argv[1]; - - printf("INPUT: '%s'\n", input); - printf("OUTPUT: %d\n", lr_parser()); - return 0; -} |