aboutsummaryrefslogtreecommitdiff
path: root/demos/lexer.c
blob: a2060660c00348843b7dc69ffa0bcdc2a453c7e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dict.c"

struct token {
    enum symbol {
        LPAREN, RPAREN, STRING, IDEN, NUM,
        HERE, THERE, WOW, TEST
    } sym;

    union {
        char *iden;
        int i;
        char *str;
    };
};

const struct string_token strings[] = {
    {"here", HERE},
    {"there", THERE},
    {"wow", WOW},
    {"test", TEST},
};
const size_t nstrings = sizeof(strings)/sizeof(*strings);

const uint8_t char_to_bit[256] = {
    ['a'] = 2,  ['b'] = 3,  ['c'] = 4,  ['d'] = 5,  ['e'] = 6,  ['f'] = 7,
    ['g'] = 8,  ['h'] = 9,  ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
    ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
    ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
    ['y'] = 26, ['z'] = 27, [ 0 ] = 1,  [' '] = 1
};

static struct token tok;

static inline int issep(char c)
{
    return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"';
}

static inline int tillsep(char *str)
{
    size_t i = 0;
    while(!issep(str[i++]));
    return i-1;
}

static inline char *substring(char *str, size_t sub_end)
{
    static char sub[128];
    if(sub_end+1 > sizeof(sub)) return NULL;

    sub[sub_end] = '\0';
    return memcpy(sub, str, sub_end);
}

static char *next_token(char *str)
{
    size_t off = 0;
    char c0 = str[0];

    if(c0 == '\0')  return NULL;
    if(isspace(c0)) return next_token(str+1);
    else {
        off = tillsep(str);
        if(off == 0) { // sep
            switch(str[off++]) {
            case '{': tok.sym = LPAREN; break;
            case '}': tok.sym = RPAREN; break;
            case '"':
                while(str[off++] != '"') if(str[off] == '\0') return NULL;
                tok.sym = STRING;
                tok.str = strdup(substring(str+1, off-2));
            }
        } else if(isalpha(c0)) { // iden or named symbol
            char *substr = substring(str, off);
            if((tok.sym = dict_check(substr)) == -1) {
                tok.sym = IDEN;
                tok.iden = strdup(substr);
            }
        } else if(c0 >= '0' && c0 <= '9') { // num
            tok.sym = NUM;
            tok.i = atoi(substring(str, off));
        }
    }

    return str+off;
}

int main(void)
{
    dict_compile();

    char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher}  fdsfj\" here {therern{there{tok {wow} {";
    while((str = next_token(str)))
        switch(tok.sym) {
        case LPAREN: printf("{ "); break;
        case RPAREN: printf("} "); break;
        case STRING: printf("\"%s\" ", tok.str); free(tok.str); break;
        case IDEN:   printf("'%s' ", tok.iden); free(tok.iden); break;
        case NUM:    printf("%d ", tok.i); break;
        case HERE: printf("HERE "); break;
        case THERE: printf("THERE "); break;
        case WOW: printf("WOW "); break;
        case TEST: printf("TEST "); break;
        default: printf("WHAT??%d??", tok.sym); break;
        }

    printf("\n");

    dict_free();
    return 0;
}