aboutsummaryrefslogtreecommitdiff
path: root/lexer.c
blob: 7ebf2e75effe46d4e7099d0282910e92b4655e4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

struct token {
    enum symbol {
        LPAREN, RPAREN, STRING, IDEN, NUM
    } sym;
    
    union {
        char *iden;
        int i;
        char *str;
    };
};

static struct token tok;

static inline int issep(char c)
{
    return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"';
}
    
static inline int tillsep(char *str)
{
    size_t i = 0;
    while(!issep(str[i++]));
    return i-1;
}

static inline char *substring(char *str, size_t sub_end)
{
    static char sub[128];
    if(sub_end+1 > sizeof(sub)) return NULL;
    
    sub[sub_end+1] = '\0';
    return memcpy(sub, str, sub_end);
}

static char *next_token(char *str)
{
    size_t off = 0;
    char c0 = str[0];

    if(c0 == '\0')  return NULL;
    if(isspace(c0)) return next_token(str+1);
    else {
        off = tillsep(str);
        if(off == 0) { // sep
            switch(str[off++]) {
            case '{': tok.sym = LPAREN; break;
            case '}': tok.sym = RPAREN; break;
            case '"':
                while(str[off] != '"') if(str[off++] == '\0') return NULL;
                tok.sym = STRING;
                tok.str = strdup(substring(str, off));
            }
        } else if(isalpha(c0)) { // iden
            tok.sym = IDEN;
            tok.iden = strdup(substring(str, off));
        } else if(c0 >= '0' && c0 <= '9') { // num
            tok.sym = NUM;
            tok.i = atoi(substring(str, off));
        }
    }
    
    return str+off;
}

int main(void)
{
    char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher}  fdsfj\" here {  {tok {";
    while((str = next_token(str)))
        switch(tok.sym) {
        case LPAREN: printf("{ "); break;
        case RPAREN: printf("} "); break;
        case STRING: printf("\"%s\" ", tok.str); free(tok.str); break;
        case IDEN:   printf("'%s' ", tok.iden); free(tok.iden); break;
        case NUM:    printf("%d ", tok.i); break;
        }
    
    printf("\n");
    return 0;
}