aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh4
-rw-r--r--clr-table.c56
-rw-r--r--demos/generate-parser.c12
-rw-r--r--demos/sample-files/parser-skeleton.c30
-rw-r--r--lr-parser.c11
-rw-r--r--slr-table.c3
6 files changed, 81 insertions, 35 deletions
diff --git a/build.sh b/build.sh
index 7b39c8c..ac98b68 100755
--- a/build.sh
+++ b/build.sh
@@ -16,7 +16,7 @@ function shared
function leak
{
- valgrind --leak-check=full --show-leak-kinds=all -s bin/$1
+ valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 $2
}
# cc lexer -D_LEXER_STANDALONE
@@ -46,4 +46,4 @@ shared demos/sample-files/defs
leak "generate-parser bin/defs.so" > bin/generated.c
cc demos/sample-files/parser-skeleton # this includes bin/generated.c
-leak parser-skeleton
+leak parser-skeleton "0-1+(1+0)-1+0"
diff --git a/clr-table.c b/clr-table.c
index 82f553d..23345ec 100644
--- a/clr-table.c
+++ b/clr-table.c
@@ -1,5 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
+
+// TODO: idk but a good parser should just exit(1)
+// like in itemset_handle()
#ifndef XCALLOC_IMPLEMENTED
#define XCALLOC_IMPLEMENTED
@@ -34,9 +38,9 @@ struct item {
symbol lookahead;
};
-#ifndef _LAZY_LALR
static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; }
-#else
+
+#ifdef _LAZY_LALR
static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
#endif
@@ -63,27 +67,31 @@ static void itemset_print(struct item *set, size_t nset)
static size_t itemset_handle(struct item *set, size_t nset)
{
+#ifdef _LAZY_LALR
+ int use_state = SIZE_MAX;
+#endif
+
// 1. is set in seen_sets
for(size_t i = 0; i < nseen_sets; i++) {
-#ifndef _LAZY_LALR
- if(seen_sets[i].nitems != nset) continue;
-
- int _seen = 0;
- for(size_t j = 0; j < nset; j++) {
- _seen = 0;
- for(size_t k = 0; k < nset; k++)
- if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
- if(!_seen) break;
+ if(seen_sets[i].nitems == nset) {
+ int _seen = 0;
+ for(size_t j = 0; j < nset; j++) {
+ _seen = 0;
+ for(size_t k = 0; k < nset; k++)
+ if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
+ if(!_seen) break;
+ }
+ if(_seen) return seen_sets[i].state;
}
- if(_seen) return seen_sets[i].state;
-#else
+
+#ifdef _LAZY_LALR
int _same_core = 1;
for(size_t j = 0; j < nset; j++) {
for(size_t k = 0; k < seen_sets[i].nitems; k++)
if(!item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 0;
if(!_same_core) break;
}
- if(_same_core) return seen_sets[i].state;
+ if(_same_core) use_state = seen_sets[i].state;
#endif
}
@@ -99,7 +107,11 @@ static size_t itemset_handle(struct item *set, size_t nset)
seen_sets[nseen_sets].items[i] = set[i];
// 3. insert new state
+#ifdef _LAZY_LALR
+ size_t new_state = seen_sets[nseen_sets++].state = (use_state != SIZE_MAX) ? use_state : table_states++;
+#else
size_t new_state = seen_sets[nseen_sets++].state = table_states++;
+#endif
if(new_state >= TABLE_CAP) {
fprintf(stderr, "ERROR: TABLE_CAP exceeded\n");
exit(1);
@@ -218,7 +230,12 @@ cleanup:
static int table_insert(size_t state, symbol sym, struct action a)
{
- if(table[state][sym].type != ACTION_NOT_SET) {
+ if(table[state][sym].type != ACTION_NOT_SET)
+#ifdef _LAZY_LALR
+ if(table[state][sym].type != a.type &&
+ table[state][sym].arg != a.arg)
+#endif
+ {
fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym);
fprintf(stderr, "\t{%d %zu} vs {%d %zu}\n",
table[state][sym].type, table[state][sym].arg,
@@ -301,6 +318,11 @@ int main(void)
table_fill();
table_print();
+ for(size_t i = 0; i < nseen_sets; i++)
+ {
+ printf("\nSTATE: %zu\n", i);
+ itemset_print(seen_sets[i].items, seen_sets[i].nitems);
+ }
table_free();
util_tables_free();
@@ -332,8 +354,8 @@ int main(void)
* +---+------------------------+
* | 0 | s1 s2 g4 g5 |
* | 1 | s1 s2 g3 |
- * | 2 | r3 r3 |
- * | 3 | r2 r2 |
+ * | 2 | r3 r3 r3 |
+ * | 3 | r2 r2 r2 |
* | 4 | a |
* | 5 | s1 s2 g6 |
* | 6 | r1 |
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index fc022be..8d50ad6 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -16,20 +16,19 @@ size_t total_productions;
#include "util-tables.c"
-// _LAZY_LALR on clr does not work??
-#include "slr-table.c"
+#define _LAZY_LALR
+#include "clr-table.c"
void *xdlsym(void *handle, char *sym)
{
void *r = dlsym(handle, sym);
if(!r) {
- fprintf(stderr, "ERROR: Symbol \"%s\" was not found\n", sym);
+ fprintf(stderr, "ERROR: Symbol '%s' was not found\n", sym);
exit(1);
}
return r;
}
-
#define GET_VARIABLE(var, handle) \
var = *(typeof(&var))xdlsym(handle, #var)
@@ -46,12 +45,11 @@ int main(int argc, char **argv)
GET_VARIABLE(symbol_is_valid, handle);
GET_VARIABLE(grammar, handle);
GET_VARIABLE(total_productions, handle);
-
util_tables_fill();
- table_fill() && (exit(1), 1);
+ table_fill() && (fprintf(stderr, "ERROR: Table couldn't be generated\n"), exit(1), 1);
printf("size_t total_symbols = %zu;\n", total_symbols);
- printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s), {return s >= 0 && s < total_symbols;})\n");
+ printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s), {return s < total_symbols;})\n");
printf("struct production _grammar[] = {\n");
grammar_print_cstyle();
diff --git a/demos/sample-files/parser-skeleton.c b/demos/sample-files/parser-skeleton.c
index facbc1b..031d829 100644
--- a/demos/sample-files/parser-skeleton.c
+++ b/demos/sample-files/parser-skeleton.c
@@ -18,13 +18,33 @@ enum symbol {
SYMBOLS_END,
};
-static symbol toklist[] = {N0, PLUS, N1, END_INPUT};
-static symbol *tok = toklist;
+static inline symbol char_to_token(char c)
+{
+ switch(c) {
+ case '+': return PLUS;
+ case '-': return MINUS;
+ case '(': return LPAREN;
+ case ')': return RPAREN;
+ case '0': return N0;
+ case '1': return N1;
+ case 0 : return END_INPUT;
+ default: fprintf(stderr, "ERROR: Unknown character '%c'\n", c); exit(1);
+ }
+}
-symbol toklist_eat() { return *(tok++); } // unsafe
-symbol toklist_peek() { return *tok; } // unsafe
+static char *input;
-int main(void)
+symbol toklist_eat() { return char_to_token(*(input++)); } // unsafe
+symbol toklist_peek() { return char_to_token(*input); } // unsafe
+
+int main(int argc, char **argv)
{
+ if(argc != 2) {
+ fprintf(stderr, "ERROR: Not enough arguments\n");
+ return 1;
+ }
+
+ input = argv[1];
+
return lr_parser();
}
diff --git a/lr-parser.c b/lr-parser.c
index 9f7ea24..3ab9af4 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -18,10 +18,13 @@ static stack_item *stack_head = stack_bottom;
int lr_parser()
{
-#define push(item) ({ if(++stack_head - stack_bottom < STACK_CAP ) (*stack_head = item); else { fprintf(stderr, "ERROR: STACK_CAP exceeded"); return 1; }})
+#define push(item) ({ if(++stack_head - stack_bottom < STACK_CAP ) (*stack_head = item); else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; }})
#define pop() (--stack_head)
#define eat() toklist_eat()
-#define peek() ({ symbol s = toklist_peek(); if(!symbol_is_valid(s)) return 1; s; })
+#define peek() ({ \
+ symbol s = toklist_peek(); \
+ if(!symbol_is_valid(s)) { fprintf(stderr, "ERROR: Unknown token '%d'\n", s); return 1;} \
+ s; })
while(1) {
struct action a = table[(size_t)*stack_head][peek()];
@@ -38,7 +41,7 @@ int lr_parser()
struct action a_goto = table[(size_t)*stack_head][lhs];
if(a_goto.type != ACTION_GOTO) {
fprintf(stderr,
- "ERROR: Expected goto action for symbol %d at state %zu",
+ "ERROR: Expected goto action for token '%d' at state %zu\n",
lhs, (size_t)*stack_head);
return 1;
}
@@ -51,7 +54,7 @@ int lr_parser()
case ACTION_NOT_SET:
default:
fprintf(stderr,
- "ERROR: Unexpected error symbol %d at state %zu",
+ "ERROR: Unexpected token '%d' at state %zu\n",
peek(), (size_t)*stack_head);
return 1;
}
diff --git a/slr-table.c b/slr-table.c
index 9bcdc3e..fde8e78 100644
--- a/slr-table.c
+++ b/slr-table.c
@@ -1,6 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
+// TODO: idk but a good parser shouldn't just exit(1)
+// like in itemset_handle()
+
#ifndef XCALLOC_IMPLEMENTED
#define XCALLOC_IMPLEMENTED
void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }