aboutsummaryrefslogtreecommitdiff
path: root/demos/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'demos/lexer.c')
-rw-r--r--demos/lexer.c116
1 files changed, 116 insertions, 0 deletions
diff --git a/demos/lexer.c b/demos/lexer.c
new file mode 100644
index 0000000..a206066
--- /dev/null
+++ b/demos/lexer.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "dict.c"
+
+struct token {
+ enum symbol {
+ LPAREN, RPAREN, STRING, IDEN, NUM,
+ HERE, THERE, WOW, TEST
+ } sym;
+
+ union {
+ char *iden;
+ int i;
+ char *str;
+ };
+};
+
+const struct string_token strings[] = {
+ {"here", HERE},
+ {"there", THERE},
+ {"wow", WOW},
+ {"test", TEST},
+};
+const size_t nstrings = sizeof(strings)/sizeof(*strings);
+
+const uint8_t char_to_bit[256] = {
+ ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
+ ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
+ ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
+ ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
+ ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
+};
+
+static struct token tok;
+
+static inline int issep(char c)
+{
+ return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"';
+}
+
+static inline int tillsep(char *str)
+{
+ size_t i = 0;
+ while(!issep(str[i++]));
+ return i-1;
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+ static char sub[128];
+ if(sub_end+1 > sizeof(sub)) return NULL;
+
+ sub[sub_end] = '\0';
+ return memcpy(sub, str, sub_end);
+}
+
+static char *next_token(char *str)
+{
+ size_t off = 0;
+ char c0 = str[0];
+
+ if(c0 == '\0') return NULL;
+ if(isspace(c0)) return next_token(str+1);
+ else {
+ off = tillsep(str);
+ if(off == 0) { // sep
+ switch(str[off++]) {
+ case '{': tok.sym = LPAREN; break;
+ case '}': tok.sym = RPAREN; break;
+ case '"':
+ while(str[off++] != '"') if(str[off] == '\0') return NULL;
+ tok.sym = STRING;
+ tok.str = strdup(substring(str+1, off-2));
+ }
+ } else if(isalpha(c0)) { // iden or named symbol
+ char *substr = substring(str, off);
+ if((tok.sym = dict_check(substr)) == -1) {
+ tok.sym = IDEN;
+ tok.iden = strdup(substr);
+ }
+ } else if(c0 >= '0' && c0 <= '9') { // num
+ tok.sym = NUM;
+ tok.i = atoi(substring(str, off));
+ }
+ }
+
+ return str+off;
+}
+
+int main(void)
+{
+ dict_compile();
+
+ char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here {therern{there{tok {wow} {";
+ while((str = next_token(str)))
+ switch(tok.sym) {
+ case LPAREN: printf("{ "); break;
+ case RPAREN: printf("} "); break;
+ case STRING: printf("\"%s\" ", tok.str); free(tok.str); break;
+ case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break;
+ case NUM: printf("%d ", tok.i); break;
+ case HERE: printf("HERE "); break;
+ case THERE: printf("THERE "); break;
+ case WOW: printf("WOW "); break;
+ case TEST: printf("TEST "); break;
+ default: printf("WHAT??%d??", tok.sym); break;
+ }
+
+ printf("\n");
+
+ dict_free();
+ return 0;
+}