diff options
Diffstat (limited to 'demos/lexer.c')
-rw-r--r-- | demos/lexer.c | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/demos/lexer.c b/demos/lexer.c new file mode 100644 index 0000000..a206066 --- /dev/null +++ b/demos/lexer.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <string.h> + +#include "dict.c" + +struct token { + enum symbol { + LPAREN, RPAREN, STRING, IDEN, NUM, + HERE, THERE, WOW, TEST + } sym; + + union { + char *iden; + int i; + char *str; + }; +}; + +const struct string_token strings[] = { + {"here", HERE}, + {"there", THERE}, + {"wow", WOW}, + {"test", TEST}, +}; +const size_t nstrings = sizeof(strings)/sizeof(*strings); + +const uint8_t char_to_bit[256] = { + ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, + ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, + ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, + ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, + ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 +}; + +static struct token tok; + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static char *next_token(char *str) +{ + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') return NULL; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case '{': tok.sym = LPAREN; break; + case '}': tok.sym = RPAREN; break; + case '"': + while(str[off++] != '"') if(str[off] == '\0') return NULL; + tok.sym = STRING; + tok.str = strdup(substring(str+1, off-2)); + } + } else if(isalpha(c0)) { // iden or named symbol + char *substr = substring(str, off); + if((tok.sym = dict_check(substr)) == -1) { + tok.sym = IDEN; + tok.iden = strdup(substr); + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.sym = NUM; + tok.i = atoi(substring(str, off)); + } + } + + return str+off; +} + +int main(void) +{ + dict_compile(); + + char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here {therern{there{tok {wow} {"; + while((str = next_token(str))) + switch(tok.sym) { + case LPAREN: printf("{ "); break; + case RPAREN: printf("} "); break; + case STRING: printf("\"%s\" ", tok.str); free(tok.str); break; + case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break; + case NUM: printf("%d ", tok.i); break; + case HERE: printf("HERE "); break; + case THERE: printf("THERE "); break; + case WOW: printf("WOW "); break; + case TEST: printf("TEST "); break; + default: printf("WHAT??%d??", tok.sym); break; + } + + printf("\n"); + + dict_free(); + return 0; +} |