aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh51
-rw-r--r--demos/generate-parser.c2
-rw-r--r--demos/lexer.c (renamed from lexer.c)57
-rw-r--r--demos/sample-files/arithmetic-skeleton.c62
-rw-r--r--demos/sample-files/calc-defs.c44
-rw-r--r--demos/sample-files/calc-skeleton.c100
-rw-r--r--demos/sample-files/parser-skeleton.c52
-rw-r--r--lr-parser.c60
-rw-r--r--parts/table.h2
-rw-r--r--parts/toklist.h8
10 files changed, 329 insertions, 109 deletions
diff --git a/build.sh b/build.sh
index 91c7fdc..9f1899b 100755
--- a/build.sh
+++ b/build.sh
@@ -27,40 +27,53 @@ function leak
log valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 $2
}
-cc dict -D_DICT_STANDALONE
-cc lexer -D_LEXER_STANDALONE
-# cc recursive/recursive-ascent
-# cc recursive/recursive-ascent-descent
-# cc util-tables -D_UTIL_TABLES_STANDALONE
-# cc slr-table -D_SLR_TABLE_STANDALONE
-# cc clr-table -D_CLR_TABLE_STANDALONE
-# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" lalr-table
-# cc lr-parser -D_LR_PARSER_STANDALONE
-# cc demos/instant-parser
+# cc dict -D_DICT_STANDALONE
+# leak dict
+# cc demos/lexer -D_LEXER_STANDALONE
+# leak lexer
-leak lexer
-leak dict
# leak recursive-ascent
+# cc recursive/recursive-ascent
+
# leak recursive-ascent-descent
+# cc recursive/recursive-ascent-descent
+
+# cc util-tables -D_UTIL_TABLES_STANDALONE
# leak util-tables
+
+# cc slr-table -D_SLR_TABLE_STANDALONE
# leak slr-table
+
+# cc clr-table -D_CLR_TABLE_STANDALONE
# leak clr-table
+
+# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR" lalr-table
# leak lalr-table
+
+# cc lr-parser -D_LR_PARSER_STANDALONE
# leak lr-parser
-# leak instant-parser
+
+# cc demos/instant-parser # not working
+# leak instant-parser # not working
#--------------------------------------------------------------------------------------------------#
cc demos/generate-parser "-rdynamic"
-shared demos/sample-files/lalr-defs
-shared demos/sample-files/arithmetic-defs
-
shared slr-table
shared clr-table
shared clr-table -D_LAZY_LALR lalr-table
+shared demos/sample-files/lalr-defs
+
+# --- Arithemitc example ---
+# shared demos/sample-files/arithmetic-defs
+# leak "generate-parser -t lalr-table bin/arithmetic-defs.so"
+# cc demos/sample-files/arithmetic-skeleton "" parser
+# leak parser "0-1+(1+0)-1+0"
-leak "generate-parser -t lalr-table bin/arithmetic-defs.so"
-cc demos/sample-files/parser-skeleton "" parser # this includes bin/generated.c
-leak parser "0-1+(1+0)-1+0"
+# --- Calc example ---
+shared demos/sample-files/calc-defs
+leak "generate-parser -t lalr-table bin/calc-defs.so"
+cc demos/sample-files/calc-skeleton "" parser
+leak parser "4237-100+(17498+70)-1321+82910"
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index 7856db6..fad3b93 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -66,7 +66,7 @@ char *add_extension(char *str, char *ext)
void set_stdout(char *filename)
{
if(!filename) filename = "/dev/tty";
- assert(freopen(filename, "a+", stdout));
+ assert(freopen(filename, "w", stdout));
}
int main(int argc, char **argv)
diff --git a/lexer.c b/demos/lexer.c
index 7ebf2e7..a206066 100644
--- a/lexer.c
+++ b/demos/lexer.c
@@ -3,11 +3,14 @@
#include <ctype.h>
#include <string.h>
+#include "dict.c"
+
struct token {
enum symbol {
- LPAREN, RPAREN, STRING, IDEN, NUM
+ LPAREN, RPAREN, STRING, IDEN, NUM,
+ HERE, THERE, WOW, TEST
} sym;
-
+
union {
char *iden;
int i;
@@ -15,13 +18,29 @@ struct token {
};
};
+const struct string_token strings[] = {
+ {"here", HERE},
+ {"there", THERE},
+ {"wow", WOW},
+ {"test", TEST},
+};
+const size_t nstrings = sizeof(strings)/sizeof(*strings);
+
+const uint8_t char_to_bit[256] = {
+ ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
+ ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
+ ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
+ ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
+ ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
+};
+
static struct token tok;
static inline int issep(char c)
{
return isspace(c) || c == '\0' || c == '}' || c == '{' || c == '"';
}
-
+
static inline int tillsep(char *str)
{
size_t i = 0;
@@ -33,8 +52,8 @@ static inline char *substring(char *str, size_t sub_end)
{
static char sub[128];
if(sub_end+1 > sizeof(sub)) return NULL;
-
- sub[sub_end+1] = '\0';
+
+ sub[sub_end] = '\0';
return memcpy(sub, str, sub_end);
}
@@ -52,25 +71,30 @@ static char *next_token(char *str)
case '{': tok.sym = LPAREN; break;
case '}': tok.sym = RPAREN; break;
case '"':
- while(str[off] != '"') if(str[off++] == '\0') return NULL;
+ while(str[off++] != '"') if(str[off] == '\0') return NULL;
tok.sym = STRING;
- tok.str = strdup(substring(str, off));
+ tok.str = strdup(substring(str+1, off-2));
+ }
+ } else if(isalpha(c0)) { // iden or named symbol
+ char *substr = substring(str, off);
+ if((tok.sym = dict_check(substr)) == -1) {
+ tok.sym = IDEN;
+ tok.iden = strdup(substr);
}
- } else if(isalpha(c0)) { // iden
- tok.sym = IDEN;
- tok.iden = strdup(substring(str, off));
} else if(c0 >= '0' && c0 <= '9') { // num
tok.sym = NUM;
tok.i = atoi(substring(str, off));
}
}
-
+
return str+off;
}
int main(void)
{
- char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here { {tok {";
+ dict_compile();
+
+ char *str = "blah 0 1 443 test{here}13}{1\"fdlkfjakl{fher} fdsfj\" here {therern{there{tok {wow} {";
while((str = next_token(str)))
switch(tok.sym) {
case LPAREN: printf("{ "); break;
@@ -78,8 +102,15 @@ int main(void)
case STRING: printf("\"%s\" ", tok.str); free(tok.str); break;
case IDEN: printf("'%s' ", tok.iden); free(tok.iden); break;
case NUM: printf("%d ", tok.i); break;
+ case HERE: printf("HERE "); break;
+ case THERE: printf("THERE "); break;
+ case WOW: printf("WOW "); break;
+ case TEST: printf("TEST "); break;
+ default: printf("WHAT??%d??", tok.sym); break;
}
-
+
printf("\n");
+
+ dict_free();
return 0;
}
diff --git a/demos/sample-files/arithmetic-skeleton.c b/demos/sample-files/arithmetic-skeleton.c
new file mode 100644
index 0000000..ef5ec2f
--- /dev/null
+++ b/demos/sample-files/arithmetic-skeleton.c
@@ -0,0 +1,62 @@
+#include <stdio.h>ae
+#include <stdlib.h>
+
+#include "lr-parser.c"
+#include "bin/generated.c"
+
+#include "parts/toklist.h"
+
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ N0, N1,
+ END_INPUT,
+
+ EP, E, T, N,
+ SYMBOLS_END,
+};
+
+struct token {
+ symbol s;
+};
+
+static inline struct token *char_to_token(char c)
+{
+ static struct token t;
+
+ switch(c) {
+ case '+': t = (struct token){PLUS}; break;
+ case '-': t = (struct token){MINUS}; break;
+ case '(': t = (struct token){LPAREN}; break;
+ case ')': t = (struct token){RPAREN}; break;
+ case '0': t = (struct token){N0}; break;
+ case '1': t = (struct token){N1}; break;
+ case 0 : t = (struct token){END_INPUT}; break;
+ default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1);
+ }
+
+ return &t;
+}
+
+static char *input;
+
+symbol token_sym(struct token *t) { return t->s; }
+int token_val(struct token *t) { return 0; }
+struct token *toklist_eat() { return char_to_token(*(input++)); } // unsafe
+struct token *toklist_peek() { return char_to_token(*input); } // unsafe
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) {
+ fprintf(stderr, "ERROR: Not enough arguments\n");
+ return 1;
+ }
+
+ input = argv[1];
+
+ printf("INPUT: '%s'\n", input);
+ printf("OUTPUT: %d\n", lr_parser());
+ return 0;
+}
diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c
new file mode 100644
index 0000000..de1f705
--- /dev/null
+++ b/demos/sample-files/calc-defs.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+
+#include "parts/symbol.h"
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ NUM,
+ END_INPUT,
+
+ EP, E, T,
+ SYMBOLS_END,
+};
+
+size_t total_symbols = SYMBOLS_END;
+
+IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; }
+IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
+IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
+
+#include "parts/grammar.h"
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+static struct production _grammar[] = {
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, E, PLUS, T),
+ PROD(E, -->, E, MINUS, T),
+ PROD(E, -->, T),
+ PROD(T, -->, LPAREN, E, RPAREN),
+ PROD(T, -->, NUM),
+};
+
+struct production *grammar = _grammar;
+size_t total_productions = sizeof(_grammar)/sizeof(*_grammar);
+
+// #include "???.h"
+char **semantic_action_str = (char *([])){
+ "v = A(0);",
+ "v = A(0) + A(2);",
+ "v = A(0) - A(2);",
+ "v = A(0);",
+ "v = A(1);",
+ "v = A(0);",
+};
diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c
new file mode 100644
index 0000000..29f181b
--- /dev/null
+++ b/demos/sample-files/calc-skeleton.c
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "lr-parser.c"
+#include "bin/a.c"
+
+// from calc-defs.c
+#include "parts/symbol.h"
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ NUM,
+ END_INPUT,
+
+ EP, E, T,
+ SYMBOLS_END,
+};
+
+static struct token {
+ symbol s;
+ int v;
+} tok;
+
+static inline int issep(char c)
+{
+ return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-';
+}
+
+static inline int tillsep(char *str)
+{
+ size_t i = 0;
+ while(!issep(str[i++]));
+ return i-1;
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+ static char sub[128];
+ if(sub_end+1 > sizeof(sub)) return NULL;
+
+ sub[sub_end] = '\0';
+ return memcpy(sub, str, sub_end);
+}
+
+static char *next_token(char *str)
+{
+ size_t off = 0;
+ char c0 = str[0];
+
+ if(c0 == '\0') tok.s = END_INPUT;
+ if(isspace(c0)) return next_token(str+1);
+ else {
+ off = tillsep(str);
+ if(off == 0) { // sep
+ switch(str[off++]) {
+ case '(': tok.s = LPAREN; break;
+ case ')': tok.s = RPAREN; break;
+ case '-': tok.s = MINUS; break;
+ case '+': tok.s = PLUS; break;
+ }
+ } else if(c0 >= '0' && c0 <= '9') { // num
+ tok.s = NUM;
+ tok.v = atoi(substring(str, off));
+ }
+ }
+
+ return str+off;
+}
+
+static char *input;
+
+symbol token_sym(struct token *t) { return t->s; }
+int token_val(struct token *t) { return t->v; }
+
+struct token *toklist_eat()
+{
+ static struct token t;
+ t = tok;
+ input = next_token(input);
+ return &t;
+}
+struct token *toklist_peek() { return &tok; }
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) return 1;
+
+ input = next_token(argv[1]);
+
+ int value;
+ if(lr_parser(&value)) return 1;
+
+ printf("INPUT: '%s'\n", argv[1]);
+ printf("OUTPUT: %d\n", value);
+
+ return 0;
+}
diff --git a/demos/sample-files/parser-skeleton.c b/demos/sample-files/parser-skeleton.c
deleted file mode 100644
index f601369..0000000
--- a/demos/sample-files/parser-skeleton.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "lr-parser.c"
-#include "bin/generated.c"
-
-#include "parts/toklist.h"
-
-enum symbol {
- PLUS = 0,
- MINUS,
- LPAREN,
- RPAREN,
- N0, N1,
- END_INPUT,
-
- EP, E, T, N,
- SYMBOLS_END,
-};
-
-static inline symbol char_to_token(char c)
-{
- switch(c) {
- case '+': return PLUS;
- case '-': return MINUS;
- case '(': return LPAREN;
- case ')': return RPAREN;
- case '0': return N0;
- case '1': return N1;
- case 0 : return END_INPUT;
- default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1);
- }
-}
-
-static char *input;
-
-symbol toklist_eat() { return char_to_token(*(input++)); } // unsafe
-symbol toklist_peek() { return char_to_token(*input); } // unsafe
-
-int main(int argc, char **argv)
-{
- if(argc != 2) {
- fprintf(stderr, "ERROR: Not enough arguments\n");
- return 1;
- }
-
- input = argv[1];
-
- printf("INPUT: '%s'\n", input);
- printf("OUTPUT: %d\n", lr_parser());
- return 0;
-}
diff --git a/lr-parser.c b/lr-parser.c
index 0d244fd..ad7dae5 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -20,23 +20,25 @@ typedef int stack_item;
static stack_item stack_bottom[STACK_CAP];
static stack_item *stack_head = stack_bottom;
-int lr_parser()
+int lr_parser(int *value)
{
-#define push(item) ({ if(++stack_head - stack_bottom < STACK_CAP ) (*stack_head = item); else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; }})
+#define push(item) do { \
+ if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \
+ else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; } \
+ } while(0)
#define pop() (--stack_head)
#define eat() toklist_eat()
-#define peek() ({ \
- symbol s = toklist_peek(); \
- if(!symbol_is_valid(s)) { fprintf(stderr, "ERROR: Unknown token '%d'\n", s); return 1;} \
- s; })
+#define peek() toklist_peek()
while(1) {
- struct action a = table[(size_t)*stack_head][peek()];
+ struct action a = table[(size_t)*stack_head][token_sym(peek())];
switch(a.type) {
- case ACTION_SHIFT:
- push(eat());
- push(-10000000); // the semantic action
+ case ACTION_SHIFT:;
+ struct token *t = eat();
+
+ push(token_sym(t));
+ push(token_val(t));
push(a.arg);
break;
case ACTION_REDUCE:
@@ -57,12 +59,13 @@ int lr_parser()
push(a_goto.arg);
break;
case ACTION_ACCEPT:
- return *(stack_head-1);
+ *value = *(stack_head-1);
+ return 0;
case ACTION_NOT_SET:
default:
fprintf(stderr,
- "ERROR: Unexpected token '%d' at state %zu\n",
- peek(), (size_t)*stack_head);
+ "ERROR: Unexpected symbol '%d' at state %zu\n",
+ token_sym(peek()), (size_t)*stack_head);
return 1;
}
}
@@ -124,19 +127,38 @@ struct action **table = (struct action *([])){
size_t table_states = 13;
-// implement toklist
-static symbol toklist[] = {N0, PLUS, N1, END_INPUT};
-static symbol *tok = toklist;
+// implement toklist.h
+struct token {
+ symbol s;
+ int v;
+};
+
+static struct token toklist[] = {{N0}, {PLUS}, {N1}, {END_INPUT}};
+static const size_t ntoklist = sizeof(toklist)/sizeof(*toklist);
-symbol toklist_eat() { return *(tok++); } // unsafe
-symbol toklist_peek() { return *tok; } // unsafe
+static size_t tok;
+
+struct token *toklist_eat()
+{
+ if(tok == ntoklist) return toklist + ntoklist-1;
+ return toklist + tok++;
+}
+struct token *toklist_peek() { return toklist + tok; }
+
+symbol token_sym(struct token *t) { return t->s; }
+int token_val(struct token *t) { return t->v; }
int none(int *stack_head) {(void)stack_head; return 0;}
semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none};
int main(void)
{
- return lr_parser();
+ int value;
+
+ if(lr_parser(&value)) return 1;
+ printf("%d\n", value);
+
+ return 0;
}
#endif
diff --git a/parts/table.h b/parts/table.h
index 486466f..fc63488 100644
--- a/parts/table.h
+++ b/parts/table.h
@@ -12,7 +12,7 @@ extern struct action {
extern size_t table_states;
-extern int (*table_fill)();
+extern int (*table_fill)(); // non-zero on fail
extern void (*table_free)();
void table_print();
diff --git a/parts/toklist.h b/parts/toklist.h
index f32cd25..760846f 100644
--- a/parts/toklist.h
+++ b/parts/toklist.h
@@ -5,10 +5,10 @@
struct token;
-// /*extern*/ struct token *toklist_eat();
-// /*extern*/ struct token *toklist_peek();
+symbol token_sym(struct token *t); // UB for NULL
+int token_val(struct token *t); // UB for NULL
-symbol toklist_eat();
-symbol toklist_peek();
+struct token *toklist_eat(); // always non-NULL
+struct token *toklist_peek(); // always non-NULL
#endif