aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2025-07-02 22:55:08 +0300
committerkartofen <mladenovnasko0@gmail.com>2025-07-02 22:55:08 +0300
commit5064a7ebce75a26d0405c92040f1a40187fcc7e3 (patch)
tree7a41182cf329e77ebb760969e3f220f60079c187
parenta67266ff72280b85fed7ec498967a855a5735639 (diff)
turn clr into lalr and first steps for generating a parser
-rwxr-xr-xbuild.sh16
-rw-r--r--clr-table.c87
-rw-r--r--demos/generate-parser.c25
-rw-r--r--demos/instant-parser.c (renamed from fusion.c)2
-rw-r--r--demos/sample-files/defs.c23
-rw-r--r--lr-parser.c12
-rw-r--r--parts/symbol.h2
-rw-r--r--parts/table.h2
-rw-r--r--parts/toklist.h2
-rw-r--r--parts/util-tables.h3
-rw-r--r--slr-table.c9
-rw-r--r--util-tables.c18
12 files changed, 139 insertions, 62 deletions
diff --git a/build.sh b/build.sh
index 464d86a..d7b6f33 100755
--- a/build.sh
+++ b/build.sh
@@ -5,7 +5,13 @@ set -xe
function cc
{
mkdir -p bin
- gcc -Wall -Wextra -Wpedantic -g $2 $1.c -o bin/$(basename $1)
+ gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o bin/$(basename $1)
+}
+
+function shared
+{
+ mkdir -p bin
+ gcc -Wall -Wextra -Wpedantic -I. -shared -fPIC $2 $1.c -o bin/$(basename $1).so
}
function leak
@@ -19,8 +25,11 @@ function leak
# cc util-tables -D_UTIL_TABLES_STANDALONE
# cc slr-table -D_SLR_TABLE_STANDALONE
# cc clr-table -D_CLR_TABLE_STANDALONE
+# cc clr-table "-D_CLR_TABLE_STANDALONE -D_LAZY_LALR"
# cc lr-parser -D_LR_PARSER_STANDALONE
-cc fusion
+# cc demos/instant-parser
+cc demos/generate-parser
+shared demos/sample-files/defs
# leak lexer
# leak recursive-ascent
@@ -29,4 +38,5 @@ cc fusion
# leak slr-table
# leak clr-table
# leak lr-parser
-leak fusion
+# leak instant-parser
+leak "generate-parser bin/defs.so"
diff --git a/clr-table.c b/clr-table.c
index 68ce3a4..37b2cd6 100644
--- a/clr-table.c
+++ b/clr-table.c
@@ -13,11 +13,12 @@ extern void *xcalloc(size_t n, size_t size);
#include "parts/grammar.h"
#include "parts/util-tables.h"
-// Implements
+// Implements
#include "parts/table.h"
#define TABLE_CAP 64
-struct action *table[TABLE_CAP];
+static struct action *__table[TABLE_CAP];
+struct action **table = __table;
size_t table_states = 0;
int table_fill();
@@ -33,7 +34,11 @@ struct item {
symbol lookahead;
};
+#ifndef _LAZY_LALR
static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; }
+#else
+static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
+#endif
#define SEEN_SETS_CAP 64
static struct {
@@ -60,6 +65,7 @@ static size_t itemset_handle(struct item *set, size_t nset)
{
// 1. is set in seen_sets
for(size_t i = 0; i < nseen_sets; i++) {
+#ifndef _LAZY_LALR
if(seen_sets[i].nitems != nset) continue;
int _seen = 0;
@@ -70,6 +76,15 @@ static size_t itemset_handle(struct item *set, size_t nset)
if(!_seen) break;
}
if(_seen) return seen_sets[i].state;
+#else
+ int _same_core = 1;
+ for(size_t j = 0; j < nset; j++) {
+ for(size_t k = 0; k < seen_sets[i].nitems; k++)
+ if(!item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 0;
+ if(!_same_core) break;
+ }
+ if(_same_core) return seen_sets[i].state;
+#endif
}
// 2. add set to seen_sets
@@ -120,7 +135,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia
if(dot == p->nRHS) {
if(sym != 0) continue; // do it 1 time
- if(table_insert(state, closure_set[j].lookahead, (struct action) {
+ if(table_insert(state, closure_set[j].lookahead, (struct action){
ACTION_REDUCE, closure_set[j].prod_idx}))
return 1;
continue;
@@ -137,7 +152,7 @@ static int itemset_insert(size_t state, struct item *initial_set, size_t ninitia
}
if(ngoto == 0) continue;
-
+
if(symbol_is_input_end(sym)) {
if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0}))
return 1;
@@ -164,13 +179,13 @@ static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_
int **is_in_closure = xcalloc(total_productions, sizeof(*is_in_closure));
for(size_t i = 0; i < total_productions; i++)
is_in_closure[i] = xcalloc(total_symbols, sizeof(**is_in_closure));
-
+
#define add_item(prod_idx, lookahead, ...) do { \
if(is_in_closure[prod_idx][lookahead]) break; \
is_in_closure[prod_idx][lookahead] = 1; \
if(nout++ >= nout_max) goto cleanup; \
out_set[nout-1] = __VA_ARGS__; \
- } while(0)
+ } while(0)
for(size_t i = 0; i < nout; i++)
{
@@ -185,7 +200,7 @@ static size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_
(struct item){j, 0, out_set[i].lookahead});
continue;
}
-
+
symbol next_symbol = p->RHS[out_set[i].dot+1];
for(size_t terminal = 0; terminal < total_symbols; terminal++) {
if(!symbol_is_terminal(terminal)) continue;
@@ -233,12 +248,12 @@ void table_free()
#ifdef _CLR_TABLE_STANDALONE
#ifndef CHOOSE_GRAMMAR
-#define CHOOSE_GRAMMAR 0 // 0 or 1
+#define CHOOSE_GRAMMAR 1 // 0 or 1
#endif
// implement symbol.h
enum symbol {
-#if (CHOOSE_GRAMMAR == 0)
+#if (CHOOSE_GRAMMAR == 0)
ID, EQUAL, STAR,
END_INPUT,
EP, E, L, R,
@@ -251,7 +266,7 @@ enum symbol {
#endif
};
-const size_t total_symbols = SYMBOLS_END;
+size_t total_symbols = SYMBOLS_END;
int symbol_is_terminal(symbol s) { return s < EP; }
int symbol_is_input_end(symbol s) { return s == END_INPUT; }
@@ -292,34 +307,34 @@ int main(void)
#endif
-/* +---+------------------------+
+/* +----------------------------+
+ * | CLR |
+ * +---+------------------------+
* | - | 0 1 2 3 4 5 |
* +---+------------------------+
- * | 0 | s1 s2 g4 g5 |
- * | 1 | s1 s2 g3 |
- * | 2 | r3 r3 |
- * | 3 | r2 r2 |
- * | 4 | a |
- * | 5 | s6 s7 g9 |
- * | 6 | s6 s7 g8 |
- * | 7 | r3 |
- * | 8 | r2 |
+ * | 0 | s1 s2 g4 g5 |
+ * | 1 | s1 s2 g3 |
+ * | 2 | r3 r3 |
+ * | 3 | r2 r2 |
+ * | 4 | a |
+ * | 5 | s6 s7 g9 |
+ * | 6 | s6 s7 g8 |
+ * | 7 | r3 |
+ * | 8 | r2 |
* | 9 | r1 |
* +---+------------------------+
-
- 0 1 2 3 4 5 6 7
- 0 s1 s2 g5 g6 g13
- 1 r4 r4
- 2 s1 s2 g3 g4
- 3 r5 r5
- 4 r3 r3
- 5 a
- 6 s7 r5
- 7 s8 s9 g10 g12
- 8 r4
- 9 s8 s9 g10 g11
-10 r5
-11 r3
-12 r1
-13 r2
+ *
+ * +----------------------------+
+ * | LALR |
+ * +---+------------------------+
+ * | - | 0 1 2 3 4 5 |
+ * +---+------------------------+
+ * | 0 | s1 s2 g4 g5 |
+ * | 1 | s1 s2 g3 |
+ * | 2 | r3 r3 |
+ * | 3 | r2 r2 |
+ * | 4 | a |
+ * | 5 | s1 s2 g6 |
+ * | 6 | r1 |
+ * +---+------------------------+
*/
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
new file mode 100644
index 0000000..b0a769d
--- /dev/null
+++ b/demos/generate-parser.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <dlfcn.h>
+
+#include <parts/table.h>
+
+struct action **table;
+size_t table_states;
+size_t total_symbols;
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) return 1;
+
+ void *handle = dlopen(argv[1], RTLD_LAZY);
+ if(!handle) { puts(dlerror()); return 1; }
+
+ table = *(typeof(&table))dlsym(handle, "table");
+ table_states = *(typeof(&table_states))dlsym(handle, "table_states");
+ total_symbols = *(typeof(&total_symbols))dlsym(handle, "total_symbols");
+
+ table_print();
+
+ dlclose(handle);
+ return 0;
+}
diff --git a/fusion.c b/demos/instant-parser.c
index 3dc7dba..3a82b07 100644
--- a/fusion.c
+++ b/demos/instant-parser.c
@@ -14,7 +14,7 @@ enum symbol {
SYMBOLS_END,
};
-const size_t total_symbols = SYMBOLS_END;
+size_t total_symbols = SYMBOLS_END;
int symbol_is_terminal(symbol s) { return s < EP; }
int symbol_is_input_end(symbol s) { return s == END_INPUT; }
diff --git a/demos/sample-files/defs.c b/demos/sample-files/defs.c
new file mode 100644
index 0000000..797c86d
--- /dev/null
+++ b/demos/sample-files/defs.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <stddef.h>
+
+#include <parts/table.h>
+
+struct action **table = (struct action *([])){
+ (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 12},{2, 11},{2, 7},},
+ (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 4},{2, 11},{2, 7},},
+ (struct action[]){{3, 6},{3, 6},{0, 0},{3, 6},{0, 0},{0, 0},{3, 6},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{3, 7},{3, 7},{0, 0},{3, 7},{0, 0},{0, 0},{3, 7},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{1, 5},{1, 8},{0, 0},{1, 10},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 6},{2, 7},},
+ (struct action[]){{3, 1},{3, 1},{0, 0},{3, 1},{0, 0},{0, 0},{3, 1},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{3, 5},{3, 5},{0, 0},{3, 5},{0, 0},{0, 0},{3, 5},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{0, 0},{2, 9},{2, 7},},
+ (struct action[]){{3, 2},{3, 2},{0, 0},{3, 2},{0, 0},{0, 0},{3, 2},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{3, 4},{3, 4},{0, 0},{3, 4},{0, 0},{0, 0},{3, 4},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{3, 3},{3, 3},{0, 0},{3, 3},{0, 0},{0, 0},{3, 3},{0, 0},{0, 0},{0, 0},{0, 0},},
+ (struct action[]){{1, 5},{1, 8},{0, 0},{0, 0},{0, 0},{0, 0},{4, 0},{0, 0},{0, 0},{0, 0},{0, 0},},
+};
+
+size_t total_symbols = 10;
+size_t table_states = 13;
diff --git a/lr-parser.c b/lr-parser.c
index 41cc45b..2358164 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -19,10 +19,10 @@ int lr_parser()
#define pop() (--stack_head)
#define eat() toklist_eat()
#define peek() ({ symbol s = toklist_peek(); if(!symbol_is_valid(s)) return 1; s; })
-
+
while(1) {
struct action a = table[(size_t)*stack_head][peek()];
-
+
switch(a.type) {
case ACTION_SHIFT:
push(eat());
@@ -30,7 +30,7 @@ int lr_parser()
break;
case ACTION_REDUCE:
for(size_t i = 0; i < 2*grammar[a.arg].nRHS; i++) pop();
-
+
symbol lhs = grammar[a.arg].LHS;
struct action a_goto = table[(size_t)*stack_head][lhs];
if(a_goto.type != ACTION_GOTO) {
@@ -39,7 +39,7 @@ int lr_parser()
lhs, (size_t)*stack_head);
return 1;
}
-
+
push(lhs);
push(a_goto.arg);
break;
@@ -72,7 +72,7 @@ enum symbol {
SYMBOLS_END,
};
-const size_t total_symbols = SYMBOLS_END;
+size_t total_symbols = SYMBOLS_END;
int symbol_is_valid(symbol s) { return s < SYMBOLS_END; }
@@ -92,7 +92,7 @@ struct production grammar[] = {
const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
// implement table.h
-struct action *table[] = {
+struct action **table = (struct action *([])){
(struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 12},{2, 11},{2, 7},},
(struct action[]){{0, 0},{0, 0},{1, 1},{0, 0},{1, 2},{1, 3},{0, 0},{0, 0},{2, 4},{2, 11},{2, 7},},
(struct action[]){{3, 6},{3, 6},{0, 0},{3, 6},{0, 0},{0, 0},{3, 6},{0, 0},{0, 0},{0, 0},{0, 0},},
diff --git a/parts/symbol.h b/parts/symbol.h
index d3cb5cd..5f865ec 100644
--- a/parts/symbol.h
+++ b/parts/symbol.h
@@ -2,7 +2,7 @@
#define SYMBOL_H
typedef unsigned int symbol;
-extern const size_t total_symbols;
+extern size_t total_symbols;
// extern char *symbol_to_str[] ...
extern int symbol_is_terminal(symbol s);
diff --git a/parts/table.h b/parts/table.h
index 3b54312..23c61dc 100644
--- a/parts/table.h
+++ b/parts/table.h
@@ -8,7 +8,7 @@ extern struct action {
ACTION_ACCEPT
} type;
size_t arg;
-} *table[];
+} **table;
extern size_t table_states;
diff --git a/parts/toklist.h b/parts/toklist.h
index b6fd10d..9a7b8ce 100644
--- a/parts/toklist.h
+++ b/parts/toklist.h
@@ -5,5 +5,5 @@
extern symbol toklist_eat();
extern symbol toklist_peek();
-
+
#endif
diff --git a/parts/util-tables.h b/parts/util-tables.h
index a6d788a..66ecab5 100644
--- a/parts/util-tables.h
+++ b/parts/util-tables.h
@@ -5,4 +5,7 @@
extern int **follow;
extern int **first;
+extern void util_tables_fill();
+extern void util_tables_free();
+
#endif
diff --git a/slr-table.c b/slr-table.c
index 76399d7..d5f4505 100644
--- a/slr-table.c
+++ b/slr-table.c
@@ -17,7 +17,8 @@ extern void *xcalloc(size_t n, size_t size);
#include "parts/table.h"
#define TABLE_CAP 64
-struct action *table[TABLE_CAP];
+static struct action *__table[TABLE_CAP];
+struct action **table = __table;
size_t table_states = 0;
int table_fill();
@@ -224,7 +225,7 @@ enum symbol {
SYMBOLS_END,
};
-const size_t total_symbols = SYMBOLS_END;
+size_t total_symbols = SYMBOLS_END;
int symbol_is_terminal(symbol s) { return s < EP; }
int symbol_is_input_end(symbol s) { return s == END_INPUT; }
@@ -251,9 +252,9 @@ int main(void)
{
util_tables_fill();
table_fill();
-
+
table_print();
-
+
table_free();
util_tables_free();
}
diff --git a/util-tables.c b/util-tables.c
index 83a015c..09df914 100644
--- a/util-tables.c
+++ b/util-tables.c
@@ -27,20 +27,20 @@ void util_tables_fill()
follow[i] = xcalloc(total_symbols, sizeof(*follow[i]));
first[i] = xcalloc(total_symbols, sizeof(*follow[i]));
}
-
+
for(size_t sym = 0; sym < total_symbols; sym++) {
first[sym][sym] = 1;
}
-
+
for(size_t i = 0; i < total_productions; i++) {
struct production *p = &grammar[i];
-
+
first[p->LHS][p->RHS[0]] = 1;
-
+
for(size_t j = 1; j < p->nRHS; j++)
follow[p->RHS[j-1]][p->RHS[j]] = 1;
}
-
+
#define set(e) if((e) != 1) changed = (e) = 1
int changed;
@@ -66,7 +66,7 @@ void util_tables_free()
free(follow[i]);
free(first[i]);
}
-
+
free(follow);
free(first);
}
@@ -75,7 +75,7 @@ void util_tables_print()
{
char *s1 = "-- FIRST --";
printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s1))/2, "", s1);
-
+
printf(" ");
for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
printf("\n");
@@ -88,7 +88,7 @@ void util_tables_print()
}
printf("\n");
-
+
char *s2 = "-- FOLLOW --";
printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s2))/2, "", s2);
@@ -119,7 +119,7 @@ enum symbol {
SYMBOLS_END,
};
-const size_t total_symbols = SYMBOLS_END;
+size_t total_symbols = SYMBOLS_END;
int symbol_is_terminal(symbol s) { return s < E; }
int symbol_is_nonterminal(symbol s) { return s >= E; }