aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2025-06-30 00:26:51 +0300
committerkartofen <mladenovnasko0@gmail.com>2025-06-30 00:26:51 +0300
commit7f796bc571941a9c14eeb3a65d349d628f022275 (patch)
treefa38a0c2da26d6635f8306649caab2086a27a873
parent1cbaa39e3cc7e8948cbd5be5f2d18170fcdebfe0 (diff)
CLR yesss
-rwxr-xr-xbuild.sh33
-rw-r--r--clr-table.c319
-rw-r--r--recursive/recursive-ascent-descent.c (renamed from recursive-ascent-descent.c)0
-rw-r--r--recursive/recursive-ascent.c (renamed from recursive-ascent.c)0
-rw-r--r--slr-table.c (renamed from lr0-table.c)78
-rw-r--r--util-tables.c151
6 files changed, 514 insertions, 67 deletions
diff --git a/build.sh b/build.sh
index d69fa83..855d574 100755
--- a/build.sh
+++ b/build.sh
@@ -2,12 +2,27 @@
set -xe
-# gcc -Wall -Wextra -g -D_LEXER_STANDALONE lexer.c -o lexer
-# gcc -Wall -Wextra -g recursive-ascent.c -o recursive-ascent
-# gcc -Wall -Wextra -g recursive-ascent-descent.c -o recursive-ascent-descent
-gcc -Wall -Wextra -g lr0-table.c -D_LR0_TABLE_STANDALONE -o lr0-table
-
-# valgrind --leak-check=full --show-leak-kinds=all -s ./lexer
-# valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent
-# valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent-descent
-valgrind --leak-check=full --show-leak-kinds=all -s ./lr0-table
+function cc
+{
+ mkdir -p bin
+ gcc -Wall -Wextra -Wpedantic -g $2 $1.c -o bin/$(basename $1)
+}
+
+function leak
+{
+ valgrind --leak-check=full --show-leak-kinds=all -s bin/$1
+}
+
+# cc lexer -D_LEXER_STANDALONE
+# cc recursive/recursive-ascent
+# cc recursive/recursive-ascent-descent
+# cc util-tables -D_UTIL_TABLES_STANDALONE
+# cc slr-table -D_SLR_TABLE_STANDALONE
+cc clr-table -D_CLR_TABLE_STANDALONE
+
+# leak lexer
+# leak recursive-ascent
+# leak recursive-ascent-descent
+# leak util-tables
+# leak slr-table
+leak clr-table
diff --git a/clr-table.c b/clr-table.c
new file mode 100644
index 0000000..92e415f
--- /dev/null
+++ b/clr-table.c
@@ -0,0 +1,319 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef XCALLOC_IMPLEMENTED
+#define XCALLOC_IMPLEMENTED
+void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+#else
+extern void *xcalloc(size_t n, size_t size);
+#endif
+
+typedef unsigned int symbol;
+extern const size_t total_symbols;
+
+extern int symbol_is_terminal(symbol s);
+extern int symbol_is_input_end(symbol s);
+
+extern struct production {
+ symbol LHS;
+ symbol *RHS;
+ size_t nRHS;
+} grammar[];
+extern const size_t total_productions;
+
+void grammar_print()
+{
+ for(size_t i = 0; i < total_productions; i++) {
+ printf("%d --> ", grammar[i].LHS);
+ for(size_t j = 0; j < grammar[i].nRHS; j++)
+ printf("%d ", grammar[i].RHS[j]);
+ printf("\n");
+ }
+}
+
+extern int **first;
+extern int **follow;
+
+struct action {
+ enum action_type {
+ ACTION_NOT_SET = 0, ACTION_SHIFT,
+ ACTION_GOTO, ACTION_REDUCE,
+ ACTION_ACCEPT
+ } type;
+ size_t arg;
+};
+
+#define TABLE_CAP 64
+struct action *table[TABLE_CAP];
+size_t table_states = 0;
+
+void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); }
+void table_free() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); }
+int table_insert(size_t state, symbol sym, struct action a);
+void table_print();
+
+struct item {
+ size_t prod_idx;
+ size_t dot;
+ symbol lookahead;
+};
+
+int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx && i1->lookahead == i2->lookahead) ? 1 : 0; }
+
+#define SEEN_SETS_CAP 64
+static struct {
+ struct item *items;
+ size_t nitems;
+ size_t state;
+} seen_sets[SEEN_SETS_CAP];
+static size_t nseen_sets;
+
+void seen_sets_free() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);}
+
+size_t itemset_handle(struct item *set, size_t nset);
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial);
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout);
+void itemset_print(struct item *set, size_t nset)
+{
+ printf("{");
+ for(size_t i = 0; i < nset; i++)
+ printf("{%zu, %zu, %d}, ", set[i].prod_idx, set[i].dot, set[i].lookahead);
+ printf("}\n");
+}
+
+size_t itemset_handle(struct item *set, size_t nset)
+{
+ // 1. is set in seen_sets
+ for(size_t i = 0; i < nseen_sets; i++) {
+ if(seen_sets[i].nitems != nset) continue;
+
+ int _seen = 0;
+ for(size_t j = 0; j < nset; j++) {
+ _seen = 0;
+ for(size_t k = 0; k < nset; k++)
+ if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
+ if(!_seen) break;
+ }
+ if(_seen) return seen_sets[i].state;
+ }
+
+ // 2. add set to seen_sets
+ if(nseen_sets >= SEEN_SETS_CAP) {
+ fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n");
+ exit(1);
+ }
+
+ seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set));
+ seen_sets[nseen_sets].nitems = nset;
+ for(size_t i = 0; i < nset; i++)
+ seen_sets[nseen_sets].items[i] = set[i];
+
+ // 3. insert new state
+ size_t new_state = seen_sets[nseen_sets++].state = table_states++;
+ if(new_state >= TABLE_CAP) {
+ fprintf(stderr, "ERROR: TABLE_CAP exceeded\n");
+ exit(1);
+ }
+
+ if(itemset_insert(new_state, set, nset)) {
+ fprintf(stderr, "ERROR: itemset_insert failed\n");
+ exit(1);
+ }
+
+ return new_state;
+}
+
+#define CLOSURE_SET_CAP 64
+#define GOTO_SET_CAP 32
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial)
+{
+ struct item closure_set[CLOSURE_SET_CAP];
+
+ size_t nclosure = itemset_closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP);
+ if(nclosure > CLOSURE_SET_CAP) {
+ fprintf(stderr, "ERROR: CLOSURE_SET_CAP exceeded\n");
+ return 1;
+ }
+
+ for(size_t sym = 0; sym < total_symbols; sym++) {
+ struct item goto_set[GOTO_SET_CAP];
+ size_t ngoto = 0;
+
+ for(size_t j = 0; j < nclosure; j++) {
+ struct production *p = &grammar[closure_set[j].prod_idx];
+ size_t dot = closure_set[j].dot;
+
+ if(dot == p->nRHS) {
+ if(sym != 0) continue;
+ if(table_insert(state, closure_set[j].lookahead, (struct action) {
+ ACTION_REDUCE, closure_set[j].prod_idx}))
+ return 1;
+ continue;
+ }
+
+ if(p->RHS[dot] == sym) {
+ if(ngoto >= GOTO_SET_CAP) {
+ fprintf(stderr, "ERROR: GOTO_SET_CAP exceeded\n");
+ return 1;
+ }
+ goto_set[ngoto] = closure_set[j];
+ goto_set[ngoto++].dot++;
+ }
+ }
+
+ if(ngoto == 0) continue;
+
+ if(symbol_is_input_end(sym)) {
+ if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0}))
+ return 1;
+ continue;
+ }
+
+ size_t new_state = itemset_handle(goto_set, ngoto);
+
+ if(table_insert(state, sym, (struct action){
+ symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO,
+ new_state})) return 1;
+ }
+
+ return 0;
+}
+
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max)
+{
+ size_t nout = nin;
+
+ if(nout > nout_max) return nout;
+ for(size_t i = 0; i < nin; i++) out_set[i] = in_set[i];
+
+ int **is_in_closure = xcalloc(total_productions, sizeof(*is_in_closure));
+ for(size_t i = 0; i < total_productions; i++)
+ is_in_closure[i] = xcalloc(total_symbols, sizeof(**is_in_closure));
+
+#define add_item(prod_idx, lookahead, ...) do { \
+ if(is_in_closure[prod_idx][lookahead]) break; \
+ is_in_closure[prod_idx][lookahead] = 1; \
+ if(nout++ >= nout_max) goto cleanup; \
+ out_set[nout-1] = __VA_ARGS__; \
+ } while(0)
+
+ for(size_t i = 0; i < nout; i++)
+ {
+ struct production *p = &grammar[out_set[i].prod_idx];
+ if(out_set[i].dot == p->nRHS) continue;
+ symbol sym = p->RHS[out_set[i].dot];
+
+ for(size_t j = 0; j < total_productions; j++)
+ if(grammar[j].LHS == sym) {
+ if(out_set[i].dot + 1 == p->nRHS) {
+ add_item(j, out_set[i].lookahead,
+ (struct item){j, 0, out_set[i].lookahead});
+ continue;
+ }
+
+ symbol next_symbol = p->RHS[out_set[i].dot+1];
+ for(size_t terminal = 0; terminal < total_symbols; terminal++) {
+ if(!symbol_is_terminal(terminal)) continue;
+ if(first[next_symbol][terminal])
+ add_item(j, terminal, (struct item){j, 0, terminal});
+ }
+ }
+ }
+
+cleanup:
+ for(size_t i = 0; i < total_productions; i++) free(is_in_closure[i]);
+ free(is_in_closure);
+ return nout;
+}
+
+int table_insert(size_t state, symbol sym, struct action a)
+{
+ if(table[state][sym].type != ACTION_NOT_SET) {
+ fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym);
+ fprintf(stderr, "\t{%d %zu} vs {%d %zu}\n",
+ table[state][sym].type, table[state][sym].arg,
+ a.type, a.arg);
+ return 1;
+ }
+
+ table[state][sym] = a;
+ return 0;
+}
+
+void table_print()
+{
+ printf(" ");
+ for(size_t sym = 0; sym < total_symbols; sym++) printf("%2zu ", sym);
+ printf("\n");
+
+ char action_to_char[] = {[ACTION_SHIFT] = 's', [ACTION_REDUCE] = 'r', [ACTION_GOTO] = 'g'};
+ for(size_t i = 0; i < table_states; i++) {
+ printf("%2zu ", i);
+ for(size_t sym = 0; sym < total_symbols; sym++)
+ if(table[i][sym].type == ACTION_ACCEPT) printf(" a ");
+ else if(table[i][sym].type) printf("%c%-2zu ", action_to_char[table[i][sym].type], table[i][sym].arg);
+ else printf(" ");
+ printf("\n");
+ }
+}
+
+#ifdef _CLR_TABLE_STANDALONE
+
+#include "util-tables.c"
+
+#ifndef CHOOSE_GRAMMAR
+#define CHOOSE_GRAMMAR 1 // 0 or 1
+#endif
+
+enum symbol {
+#if (CHOOSE_GRAMMAR == 0)
+ ID, EQUAL, STAR,
+ END_INPUT,
+ EP, E, L, R,
+ SYMBOLS_END,
+#else
+ SC, SD,
+ END_INPUT,
+ EP, E, C,
+ SYMBOLS_END
+#endif
+};
+
+const size_t total_symbols = SYMBOLS_END;
+
+int symbol_is_terminal(symbol s) { return s < EP; }
+int symbol_is_input_end(symbol s) { return s == END_INPUT; }
+
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+struct production grammar[] = {
+#if (CHOOSE_GRAMMAR == 0)
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, L, EQUAL, R),
+ PROD(E, -->, R),
+ PROD(L, -->, STAR, R),
+ PROD(L, -->, ID),
+ PROD(R, -->, L),
+#else
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, C, C),
+ PROD(C, -->, SC, C),
+ PROD(C, -->, SD),
+#endif
+};
+
+const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
+
+int main(void)
+{
+ util_tables_fill();
+ table_allocate();
+
+ itemset_handle((struct item[]){{0, 0, END_INPUT}}, 1);
+ table_print();
+
+ seen_sets_free();
+ table_free();
+ util_tables_free();
+}
+
+#endif
diff --git a/recursive-ascent-descent.c b/recursive/recursive-ascent-descent.c
index 628540d..628540d 100644
--- a/recursive-ascent-descent.c
+++ b/recursive/recursive-ascent-descent.c
diff --git a/recursive-ascent.c b/recursive/recursive-ascent.c
index 7f69e57..7f69e57 100644
--- a/recursive-ascent.c
+++ b/recursive/recursive-ascent.c
diff --git a/lr0-table.c b/slr-table.c
index 721bdb5..2454a65 100644
--- a/lr0-table.c
+++ b/slr-table.c
@@ -1,21 +1,17 @@
#include <stdio.h>
#include <stdlib.h>
-#ifndef MAX_SYMBOLS
-#define MAX_SYMBOLS 64
-#endif
-
-#ifdef XCALLOC_IMPLEMENTED
-extern void *xcalloc(size_t n, size_t size);
-#else
+#ifndef XCALLOC_IMPLEMENTED
+#define XCALLOC_IMPLEMENTED
void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+#else
+extern void *xcalloc(size_t n, size_t size);
#endif
typedef unsigned int symbol;
-
extern const size_t total_symbols;
+
extern int symbol_is_terminal(symbol s);
-extern int symbol_is_nonterminal(symbol s);
extern int symbol_is_input_end(symbol s);
extern struct production {
@@ -35,10 +31,7 @@ void grammar_print()
}
}
-static int follow[MAX_SYMBOLS][MAX_SYMBOLS];
-
-void follow_table_fill();
-void follow_table_print();
+extern int **follow;
struct action {
enum action_type {
@@ -50,9 +43,11 @@ struct action {
};
#define TABLE_CAP 64
-static struct action table[TABLE_CAP][MAX_SYMBOLS]; // *(table[total_symbols]);
-static size_t table_states;
+struct action *table[TABLE_CAP];
+size_t table_states = 0;
+void table_allocate() { for(size_t i = 0; i < TABLE_CAP; i++) table[i] = xcalloc(total_symbols, sizeof(*table[i])); }
+void table_free() { for(size_t i = 0; i < TABLE_CAP; i++) free(table[i]); }
int table_insert(size_t state, symbol sym, struct action a);
void table_print();
@@ -211,49 +206,11 @@ cleanup:
return nout;
}
-void follow_table_fill()
-{
- for(size_t i = 0; i < total_productions; i++) {
- struct production *p = &grammar[i];
- for(size_t j = 1; j < p->nRHS; j++)
- follow[p->RHS[j-1]][p->RHS[j]] = 1;
- }
-
-#define set(e) if((e) != 1) changed = (e) = 1
-
- int changed;
- do {
- changed = 0;
- for(size_t i = 0; i < total_productions; i++)
- for(size_t sym = 0; sym < total_symbols; sym++) {
- struct production *p = &grammar[i];
- if(follow[p->LHS][sym])
- set(follow[p->RHS[p->nRHS-1]][sym]);
- if(follow[sym][p->LHS])
- set(follow[sym][p->RHS[0]]);
- }
- } while(changed);
-}
-
-void follow_table_print()
-{
- printf(" ");
- for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
- printf("\n");
-
- for(size_t i = 0; i < total_symbols; i++) {
- printf("%2zu ", i);
- for(size_t j = 0; j < total_symbols; j++)
- printf(" %s ", follow[i][j] ? "x" : " ");
- printf("\n");
- }
-}
-
int table_insert(size_t state, symbol sym, struct action a)
{
if(table[state][sym].type != ACTION_NOT_SET) {
fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym);
- fprintf(stderr, "\t{%d %zu} vs {%d %zu}",
+ fprintf(stderr, "\t{%d %zu} vs {%d %zu}\n",
table[state][sym].type, table[state][sym].arg,
a.type, a.arg);
return 1;
@@ -280,7 +237,9 @@ void table_print()
}
}
-#ifdef _LR0_TABLE_STANDALONE
+#ifdef _SLR_TABLE_STANDALONE
+
+#include "util-tables.c"
enum symbol {
PLUS = 0,
@@ -293,10 +252,10 @@ enum symbol {
EP, E, T, N,
SYMBOLS_END,
};
+
const size_t total_symbols = SYMBOLS_END;
-int symbol_is_terminal(symbol s) { return s < E; }
-int symbol_is_nonterminal(symbol s) { return s >= E; }
+int symbol_is_terminal(symbol s) { return s < EP; }
int symbol_is_input_end(symbol s) { return s == END_INPUT; }
#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
@@ -315,12 +274,15 @@ const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
int main(void)
{
- follow_table_fill();
+ util_tables_fill();
+ table_allocate();
itemset_handle((struct item[]){{0, 0}}, 1);
table_print();
seen_sets_free();
+ table_free();
+ util_tables_free();
}
#endif
diff --git a/util-tables.c b/util-tables.c
new file mode 100644
index 0000000..0bd227d
--- /dev/null
+++ b/util-tables.c
@@ -0,0 +1,151 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef XCALLOC_IMPLEMENTED
+#define XCALLOC_IMPLEMENTED
+void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+#else
+extern void *xcalloc(size_t n, size_t size);
+#endif
+
+typedef unsigned int symbol;
+extern const size_t total_symbols;
+
+extern struct production {
+ symbol LHS;
+ symbol *RHS;
+ size_t nRHS;
+} grammar[];
+extern const size_t total_productions;
+
+int **follow;
+int **first;
+// int *nullable;
+
+void util_tables_fill()
+{
+ follow = calloc(total_symbols, sizeof(*follow));
+ first = calloc(total_symbols, sizeof(*follow));
+ for(size_t i = 0; i < total_symbols; i++) {
+ follow[i] = xcalloc(total_symbols, sizeof(*follow[i]));
+ first[i] = xcalloc(total_symbols, sizeof(*follow[i]));
+ }
+
+ for(size_t sym = 0; sym < total_symbols; sym++) {
+ first[sym][sym] = 1;
+ }
+
+ for(size_t i = 0; i < total_productions; i++) {
+ struct production *p = &grammar[i];
+
+ first[p->LHS][p->RHS[0]] = 1;
+
+ for(size_t j = 1; j < p->nRHS; j++)
+ follow[p->RHS[j-1]][p->RHS[j]] = 1;
+ }
+
+#define set(e) if((e) != 1) changed = (e) = 1
+
+ int changed;
+ do {
+ changed = 0;
+ for(size_t i = 0; i < total_productions; i++)
+ for(size_t sym = 0; sym < total_symbols; sym++) {
+ struct production *p = &grammar[i];
+ if(first[sym][p->LHS])
+ for(size_t j = 0; j < total_symbols; j++)
+ if(first[p->LHS][j]) set(first[sym][j]);
+ if(follow[p->LHS][sym])
+ set(follow[p->RHS[p->nRHS-1]][sym]);
+ if(follow[sym][p->LHS])
+ set(follow[sym][p->RHS[0]]);
+ }
+ } while(changed);
+}
+
+void util_tables_free()
+{
+ for(size_t i = 0; i < total_symbols; i++) {
+ free(follow[i]);
+ free(first[i]);
+ }
+
+ free(follow);
+ free(first);
+}
+
+void util_tables_print()
+{
+ char *s1 = "-- FIRST --";
+ printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s1))/2, "", s1);
+
+ printf(" ");
+ for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
+ printf("\n");
+
+ for(size_t i = 0; i < total_symbols; i++) {
+ printf("%2zu ", i);
+ for(size_t j = 0; j < total_symbols; j++)
+ printf(" %s ", first[i][j] ? "x" : " ");
+ printf("\n");
+ }
+
+ printf("\n");
+
+ char *s2 = "-- FOLLOW --";
+ printf(" %*s%s\n", (int)((total_symbols*3) - strlen(s2))/2, "", s2);
+
+ printf(" ");
+ for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
+ printf("\n");
+
+ for(size_t i = 0; i < total_symbols; i++) {
+ printf("%2zu ", i);
+ for(size_t j = 0; j < total_symbols; j++)
+ printf(" %s ", follow[i][j] ? "x" : " ");
+ printf("\n");
+ }
+}
+
+#ifdef _UTIL_TABLES_STANDALONE
+
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ N0, N1,
+ END_INPUT,
+
+ EP, E, T, N,
+ SYMBOLS_END,
+};
+const size_t total_symbols = SYMBOLS_END;
+
+int symbol_is_terminal(symbol s) { return s < E; }
+int symbol_is_nonterminal(symbol s) { return s >= E; }
+int symbol_is_input_end(symbol s) { return s == END_INPUT; }
+
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+struct production grammar[] = {
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, E, PLUS, T),
+ PROD(E, -->, E, MINUS, T),
+ PROD(E, -->, T),
+ PROD(T, -->, LPAREN, E, RPAREN),
+ PROD(T, -->, N),
+ PROD(N, -->, N0),
+ PROD(N, -->, N1),
+};
+const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
+
+int main(void)
+{
+ util_tables_fill();
+ util_tables_print();
+ util_tables_free();
+ return 0;
+}
+
+#endif