6 files changed, 323 insertions, 53 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index 7856db6..fad3b93 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -66,7 +66,7 @@ char *add_extension(char *str, char *ext)
 void set_stdout(char *filename)
 {
     if(!filename) filename = "/dev/tty";
-    assert(freopen(filename, "a+", stdout));
+    assert(freopen(filename, "w", stdout));
 }
 
 int main(int argc, char **argv)
diff --git a/demos/lexer.c b/demos/lexer.c
new file mode 100644
index 0000000..a206066
--- /dev/null
+++ b/demos/lexer.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "dict.c"
+
+struct token {
+    enum symbol {
+        LPAREN, RPAREN, STRING, IDEN, NUM,
+        HERE, THERE, WOW, TEST
+    } sym;
+
+    union {
+        char *iden;
+        int i;
+        char *str;
+    };
+};
+
+const struct string_token strings[] = {
+    {"here", HERE},
+    {"there", THERE},
+    {"wow", WOW},
+    {"test", TEST},
+};
+const size_t nstrings = sizeof(strings)/sizeof(*strings);
+
+const uint8_t char_to_bit[256] = {
+    ['a'] = 2,  ['b'] = 3,  ['c'] = 4,  ['d'] = 5,  ['e'] = 6,  ['f'] = 7,
+    ['g'] = 8,  ['h'] = 9,  ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
+    ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
+    ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
+    ['y'] = 26, ['z'] = 27, [ 0 ] = 1,  [' '] = 1
+};
+
+static struct token tok;
+
+static inline int issep(char c)
+{
+    return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"';
+}
+
+static inline int tillsep(char *str)
+{
+    size_t i = 0;
+    while(!issep(str[i++]));
+    return i-1;
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+    static char sub[128];
+    if(sub_end+1 > sizeof(sub)) return NULL;
+
+    sub[sub_end] = '\0';
+    return memcpy(sub, str, sub_end);
+}
+
+static char *next_token(char *str)
+{
+    size_t off = 0;
+    char c0 = str[0];
+
+    if(c0 == '\0')  return NULL;
+    if(isspace(c0)) return next_token(str+1);
+    else {
+        off = tillsep(str);
+        if(off == 0) { // sep
+            switch(str[off++]) {
+            case '{': tok.sym = LPAREN; break;
+            case '}': tok.sym = RPAREN; break;
+            case '"':
+                while(str[off++] != '"') if(str[off] == '\0') return NULL;
+                tok.sym = STRING;
+                tok.str = strdup(substring(str+1, off-2));
+            }
+        } else if(isalpha(c0)) { // iden or named symbol
+            char *substr = substring(str, off);
+            if((tok.sym = dict_check(substr)) == -1) {
+                tok.sym = IDEN;
+                tok.iden = strdup(substr);
+            }
+        } else if(c0 >= '0' && c0 <= '9') { // num
+            tok.sym = NUM;
+            tok.i = atoi(substring(str, off));
+        }
+    }
+
+    return str+off;
+}
+
+int main(void)
+{
+    dict_compile();
+
+    char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher}  fdsfj\" here {therern{there{tok {wow} {";
+    while((str = next_token(str)))
+        switch(tok.sym) {
+        case LPAREN: printf("{ "); break;
+        case RPAREN: printf("} "); break;
+        case STRING: printf("\"%s\" ", tok.str); free(tok.str); break;
+        case IDEN:   printf("'%s' ", tok.iden); free(tok.iden); break;
+        case NUM:    printf("%d ", tok.i); break;
+        case HERE: printf("HERE "); break;
+        case THERE: printf("THERE "); break;
+        case WOW: printf("WOW "); break;
+        case TEST: printf("TEST "); break;
+        default: printf("WHAT??%d??", tok.sym); break;
+        }
+
+    printf("\n");
+
+    dict_free();
+    return 0;
+}
diff --git a/demos/sample-files/arithmetic-skeleton.c b/demos/sample-files/arithmetic-skeleton.c
new file mode 100644
index 0000000..ef5ec2f
--- /dev/null
+++ b/demos/sample-files/arithmetic-skeleton.c
@@ -0,0 +1,62 @@
+#include <stdio.h>ae
+#include <stdlib.h>
+
+#include "lr-parser.c"
+#include "bin/generated.c"
+
+#include "parts/toklist.h"
+
+enum symbol {
+    PLUS = 0,
+    MINUS,
+    LPAREN,
+    RPAREN,
+    N0, N1,
+    END_INPUT,
+
+    EP, E, T, N,
+    SYMBOLS_END,
+};
+
+struct token {
+    symbol s;
+};
+
+static inline struct token *char_to_token(char c)
+{
+    static struct token t;
+
+    switch(c) {
+    case '+': t = (struct token){PLUS}; break;
+    case '-': t = (struct token){MINUS}; break;
+    case '(': t = (struct token){LPAREN}; break;
+    case ')': t = (struct token){RPAREN}; break;
+    case '0': t = (struct token){N0}; break;
+    case '1': t = (struct token){N1}; break;
+    case  0 : t = (struct token){END_INPUT}; break;
+    default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1);
+    }
+
+    return &t;
+}
+
+static char *input;
+
+symbol token_sym(struct token *t) { return t->s; }
+int token_val(struct token *t) { return 0; }
+struct token *toklist_eat() { return char_to_token(*(input++)); } // unsafe
+struct token *toklist_peek() { return char_to_token(*input); }    // unsafe
+
+int main(int argc, char **argv)
+{
+    if(argc != 2) {
+        fprintf(stderr, "ERROR: Not enough arguments\n");
+        return 1;
+    }
+
+    input = argv[1];
+
+    printf("INPUT: '%s'\n", input);
+    printf("OUTPUT: %d\n", lr_parser());
+    return 0;
+}
diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c
new file mode 100644
index 0000000..de1f705
--- /dev/null
+++ b/demos/sample-files/calc-defs.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+
+#include "parts/symbol.h"
+enum symbol {
+    PLUS = 0,
+    MINUS,
+    LPAREN,
+    RPAREN,
+    NUM,
+    END_INPUT,
+
+    EP, E, T,
+    SYMBOLS_END,
+};
+
+size_t total_symbols = SYMBOLS_END;
+
+IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; }
+IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
+IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
+
+#include "parts/grammar.h"
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+static struct production _grammar[] = {
+    PROD(EP, ->, E, END_INPUT),
+    PROD(E, -->, E, PLUS, T),
+    PROD(E, -->, E, MINUS, T),
+    PROD(E, -->, T),
+    PROD(T, -->, LPAREN, E, RPAREN),
+    PROD(T, -->, NUM),
+};
+
+struct production *grammar = _grammar;
+size_t total_productions = sizeof(_grammar)/sizeof(*_grammar);
+
+// #include "???.h"
+char **semantic_action_str = (char *([])){
+    "v = A(0);",
+    "v = A(0) + A(2);",
+    "v = A(0) - A(2);",
+    "v = A(0);",
+    "v = A(1);",
+    "v = A(0);",
+};
diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c
new file mode 100644
index 0000000..29f181b
--- /dev/null
+++ b/demos/sample-files/calc-skeleton.c
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "lr-parser.c"
+#include "bin/a.c"
+
+// from calc-defs.c
+#include "parts/symbol.h"
+enum symbol {
+    PLUS = 0,
+    MINUS,
+    LPAREN,
+    RPAREN,
+    NUM,
+    END_INPUT,
+
+    EP, E, T,
+    SYMBOLS_END,
+};
+
+static struct token {
+    symbol s;
+    int v;
+} tok;
+
+static inline int issep(char c)
+{
+    return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-';
+}
+
+static inline int tillsep(char *str)
+{
+    size_t i = 0;
+    while(!issep(str[i++]));
+    return i-1;
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+    static char sub[128];
+    if(sub_end+1 > sizeof(sub)) return NULL;
+
+    sub[sub_end] = '\0';
+    return memcpy(sub, str, sub_end);
+}
+
+static char *next_token(char *str)
+{
+    size_t off = 0;
+    char c0 = str[0];
+
+    if(c0 == '\0') tok.s = END_INPUT;
+    if(isspace(c0)) return next_token(str+1);
+    else {
+        off = tillsep(str);
+        if(off == 0) { // sep
+            switch(str[off++]) {
+            case '(': tok.s = LPAREN; break;
+            case ')': tok.s = RPAREN; break;
+            case '-': tok.s = MINUS; break;
+            case '+': tok.s = PLUS; break;
+            }
+        } else if(c0 >= '0' && c0 <= '9') { // num
+            tok.s = NUM;
+            tok.v = atoi(substring(str, off));
+        }
+    }
+
+    return str+off;
+}
+
+static char *input;
+
+symbol token_sym(struct token *t) { return t->s; }
+int token_val(struct token *t) { return t->v; }
+
+struct token *toklist_eat()
+{
+    static struct token t;
+    t = tok;
+    input = next_token(input);
+    return &t;
+}
+struct token *toklist_peek() { return &tok; }
+
+int main(int argc, char **argv)
+{
+    if(argc != 2) return 1;
+
+    input = next_token(argv[1]);
+
+    int value;
+    if(lr_parser(&value)) return 1;
+
+    printf("INPUT: '%s'\n", argv[1]);
+    printf("OUTPUT: %d\n", value);
+
+    return 0;
+}
diff --git a/demos/sample-files/parser-skeleton.c b/demos/sample-files/parser-skeleton.c
deleted file mode 100644
index f601369..0000000
--- a/demos/sample-files/parser-skeleton.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "lr-parser.c"
-#include "bin/generated.c"
-
-#include "parts/toklist.h"
-
-enum symbol {
-    PLUS = 0,
-    MINUS,
-    LPAREN,
-    RPAREN,
-    N0, N1,
-    END_INPUT,
-
-    EP, E, T, N,
-    SYMBOLS_END,
-};
-
-static inline symbol char_to_token(char c)
-{
-    switch(c) {
-    case '+': return PLUS;
-    case '-': return MINUS;
-    case '(': return LPAREN;
-    case ')': return RPAREN;
-    case '0': return N0;
-    case '1': return N1;
-    case  0 : return END_INPUT;
-    default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1);
-    }
-}
-
-static char *input;
-
-symbol toklist_eat() { return char_to_token(*(input++)); } // unsafe
-symbol toklist_peek() { return char_to_token(*input); }    // unsafe
-
-int main(int argc, char **argv)
-{
-    if(argc != 2) {
-        fprintf(stderr, "ERROR: Not enough arguments\n");
-        return 1;
-    }
-
-    input = argv[1];
-
-    printf("INPUT: '%s'\n", input);
-    printf("OUTPUT: %d\n", lr_parser());
-    return 0;
-}