aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh6
-rw-r--r--lexer.c88
-rw-r--r--lr0-table.c (renamed from lr0.c)284
3 files changed, 204 insertions, 174 deletions
diff --git a/build.sh b/build.sh
index e1367ca..d69fa83 100755
--- a/build.sh
+++ b/build.sh
@@ -2,12 +2,12 @@
set -xe
-# gcc -Wall -Wextra -g lexer.c -o lexer
+# gcc -Wall -Wextra -g -D_LEXER_STANDALONE lexer.c -o lexer
# gcc -Wall -Wextra -g recursive-ascent.c -o recursive-ascent
# gcc -Wall -Wextra -g recursive-ascent-descent.c -o recursive-ascent-descent
-gcc -Wall -Wextra -g lr0.c -o lr0
+gcc -Wall -Wextra -g lr0-table.c -D_LR0_TABLE_STANDALONE -o lr0-table
# valgrind --leak-check=full --show-leak-kinds=all -s ./lexer
# valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent
# valgrind --leak-check=full --show-leak-kinds=all -s ./recursive-ascent-descent
-valgrind --leak-check=full --show-leak-kinds=all -s ./lr0
+valgrind --leak-check=full --show-leak-kinds=all -s ./lr0-table
diff --git a/lexer.c b/lexer.c
index 6df77c6..2fca9a7 100644
--- a/lexer.c
+++ b/lexer.c
@@ -10,12 +10,14 @@
// and more characters
// - add easier way to write chars to bits (maybe a singe string)
+#define ARR_LEN(arr) (sizeof(arr) / sizeof(*arr))
-#define ARR_LENGTH(arr) (sizeof(arr)/sizeof(*arr))
+typedef int token;
+extern const char *const token_to_string[];
+extern const struct string_token { char *s; token t;} strings[];
+extern const token separators[];
-#define popcount(x) (__builtin_popcount(x))
-
-#define MAPPED_CHARS 32
+#ifdef _LEXER_STANDALONE
#define TOKENS(X) \
X(TOKEN_NONE) \
@@ -33,13 +35,13 @@ enum token {
TOKENS(TOKEN_ENUM)
};
-const char * const to_string[] = {
+const char * const token_to_string[] = {
TOKENS(TOKEN_STRING)
};
-const struct {
+const struct string_token {
char *s;
- enum token t;
+ token t;
} strings[] = {
{"test", TOKEN_TEST},
{"retarded", TOKEN_RETARDED},
@@ -47,6 +49,10 @@ const struct {
{"tits", TOKEN_TITS},
};
+const token separators[] = {['{'] = TOKEN_LPAREN, ['}'] = TOKEN_RPAREN};
+
+#endif
+
const uint8_t char_to_bit[256] = {
['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
@@ -55,26 +61,24 @@ const uint8_t char_to_bit[256] = {
['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
};
-const enum token separators[] = {
- ['{'] = TOKEN_LPAREN,
- ['}'] = TOKEN_RPAREN
-};
-
struct level {
uint64_t bit_mask;
uint64_t *token_masks;
};
-struct level start_level = {0};
-struct level *bit_to_ptr[MAPPED_CHARS] = {0};
+#define MAPPED_CHARS 32
+static struct level start_level = {0};
+static struct level *bit_to_ptr[MAPPED_CHARS] = {0};
static size_t num_levels;
#define CHAR_TO_PTR(c) (bit_to_ptr[char_to_bit[c]])
-int compile_tables(void)
+#define popcount(x) (__builtin_popcount(x))
+
+int compile_lextables(void)
{
// max number of levels
- for(size_t i = 0; i < ARR_LENGTH(strings); i++)
+ for(size_t i = 0; i < ARR_LEN(strings); i++)
if(strlen(strings[i].s) > num_levels) num_levels = strlen(strings[i].s);
// allocated levels
@@ -86,11 +90,11 @@ int compile_tables(void)
// BUG: everything is repeated for the start_level
// populate bit_masks
- for(size_t i = 0; i < ARR_LENGTH(strings); i++) {
+ for(size_t i = 0; i < ARR_LEN(strings); i++) {
struct level *l = &start_level;
for(size_t j = 0; j < strlen(strings[i].s)+1; j++) {
uint8_t bit = char_to_bit[strings[i].s[j]];
-
+
l->bit_mask |= 1 << bit;
l = &bit_to_ptr[bit][j];
}
@@ -105,50 +109,50 @@ int compile_tables(void)
* sizeof(*l->token_masks));
memset(l->token_masks, 0, popcount(l->bit_mask)
* sizeof(*l->token_masks));
-
+
l = &bit_to_ptr[i][j];
}
}
// populate token_masks
- for(size_t i = 0; i < ARR_LENGTH(strings); i++) {
+ for(size_t i = 0; i < ARR_LEN(strings); i++) {
struct level *l = &start_level;
for(size_t j = 0; j < strlen(strings[i].s)+1; j++) {
- uint8_t bit = char_to_bit[strings[i].s[j]];
+ uint8_t bit = char_to_bit[strings[i].s[j]];
uint8_t idx = popcount(l->bit_mask & ((1 << bit) - 1));
l->token_masks[idx] |= 1 << strings[i].t;
l = &bit_to_ptr[bit][j];
}
}
-
+
return 0;
}
-void print_tables(void)
+void print_lextables(void)
{
- for(size_t i = 0; i < 256; i++)
+ for(size_t i = 0; i < 256; i++)
for(size_t j = 0; j < num_levels; j++)
if(CHAR_TO_PTR(i)[j].bit_mask) {
- printf("%c, %d, %32b ", (char)i, j, CHAR_TO_PTR(i)[j].bit_mask);
+ printf("%c, %zu, %32lb ", (char)i, j, CHAR_TO_PTR(i)[j].bit_mask);
printf("{ ");
for(size_t k = 0; k < popcount(CHAR_TO_PTR(i)[j].bit_mask); k++)
- printf("%b ", CHAR_TO_PTR(i)[j].token_masks[k]);
+ printf("%lb ", CHAR_TO_PTR(i)[j].token_masks[k]);
printf(" }\n");
}
-
- printf(" %32b ", start_level.bit_mask);
+
+ printf(" %32lb ", start_level.bit_mask);
printf("{ ");
for(size_t k = 0; k < popcount(start_level.bit_mask); k++)
- printf("%b ", start_level.token_masks[k]);
+ printf("%lb ", start_level.token_masks[k]);
printf(" }\n");
printf(" %32s\n", "zyxwvutsrqponmlkjihgfedcbaE ");
}
-int tokenize_string(char *string, enum token *t, size_t t_len)
+int tokenize_string(char *string, token *t, size_t t_len)
{
size_t ntokens = 0;
size_t i = 0;
@@ -170,14 +174,14 @@ int tokenize_string(char *string, enum token *t, size_t t_len)
while(!separators[string[i]] && char_to_bit[string[i]] != 1) i++;
break;
}
-
+
uint64_t idx = popcount(l->bit_mask & ((1 << bit) - 1));
token_mask &= l->token_masks[idx];
if(bit == 1) break;
i++;
}
-
+
// BUG: not checking of ntokens is in t_len
if(token_mask)
t[ntokens++] = __builtin_ctz(token_mask);
@@ -188,11 +192,11 @@ int tokenize_string(char *string, enum token *t, size_t t_len)
off = ++i;
}
-
+
return ntokens;
}
-void free_tables(void)
+void free_lextables(void)
{
free(start_level.token_masks);
for(size_t i = 0; i < MAPPED_CHARS; i++) {
@@ -204,22 +208,26 @@ void free_tables(void)
}
}
+#ifdef _LEXER_STANDALONE
+
// exit with a given code, default is 1
#define DIE(...) (printf("ERROR %s:%d\n", __FILE__, __LINE__), __VA_OPT__(exit(__VA_ARGS__),) exit(1), 1)
int main(void)
{
- compile_tables() && DIE();
+ compile_lextables() && DIE();
- enum token t[120] = {0};
+ token t[120] = {0};
size_t ntokens = tokenize_string("tits tits2 retarded wow{tits}test test }}{{test", t, 120);
ntokens || DIE(10);
-
+
for(size_t i = 0; i < ntokens; i++)
- printf("%s\n", to_string[t[i]]);
+ printf("%s\n", token_to_string[t[i]]);
- print_tables();
- free_tables();
+ print_lextables();
+ free_lextables();
return 0;
}
+
+#endif
diff --git a/lr0.c b/lr0-table.c
index ecd12b5..721bdb5 100644
--- a/lr0.c
+++ b/lr0-table.c
@@ -1,45 +1,33 @@
#include <stdio.h>
#include <stdlib.h>
-#define ARR_LEN(...) (sizeof(__VA_ARGS__) / sizeof(*(__VA_ARGS__)))
+#ifndef MAX_SYMBOLS
+#define MAX_SYMBOLS 64
+#endif
-enum symbol {
- PLUS = 0,
- MINUS,
- LPAREN,
- RPAREN,
- N0, N1,
- END_INPUT,
-
- EP, E, T, N,
- SYMBOL_COUNT,
-};
-
-static inline int is_terminal(enum symbol s) { return s < E; }
-static inline int is_nonterminal(enum symbol s) { return s >= E; }
+#ifdef XCALLOC_IMPLEMENTED
+extern void *xcalloc(size_t n, size_t size);
+#else
+void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+#endif
-struct production {
- enum symbol LHS;
- enum symbol *RHS;
- size_t nRHS;
-};
+typedef unsigned int symbol;
-#define PROD(LHS, _, ...) {LHS, (enum symbol[]){__VA_ARGS__}, sizeof((enum symbol[]){__VA_ARGS__})/sizeof(enum symbol)}
+extern const size_t total_symbols;
+extern int symbol_is_terminal(symbol s);
+extern int symbol_is_nonterminal(symbol s);
+extern int symbol_is_input_end(symbol s);
-static struct production grammar[] = {
- PROD(EP, ->, E, END_INPUT),
- PROD(E, -->, E, PLUS, T),
- PROD(E, -->, E, MINUS, T),
- PROD(E, -->, T),
- PROD(T, -->, LPAREN, E, RPAREN),
- PROD(T, -->, N),
- PROD(N, -->, N0),
- PROD(N, -->, N1),
-};
+extern struct production {
+ symbol LHS;
+ symbol *RHS;
+ size_t nRHS;
+} grammar[];
+extern const size_t total_productions;
-void print_grammar()
+void grammar_print()
{
- for(size_t i = 0; i < ARR_LEN(grammar); i++) {
+ for(size_t i = 0; i < total_productions; i++) {
printf("%d --> ", grammar[i].LHS);
for(size_t j = 0; j < grammar[i].nRHS; j++)
printf("%d ", grammar[i].RHS[j]);
@@ -47,21 +35,10 @@ void print_grammar()
}
}
-static int follow[SYMBOL_COUNT][SYMBOL_COUNT];
-void fill_follow_table();
-void print_follow_table()
-{
- printf(" ");
- for(size_t i = 0; i < SYMBOL_COUNT; i++) printf("%2zu ", i);
- printf("\n");
-
- for(size_t i = 0; i < SYMBOL_COUNT; i++) {
- printf("%2zu ", i);
- for(size_t j = 0; j < SYMBOL_COUNT; j++)
- printf(" %s ", follow[i][j] ? "x" : " ");
- printf("\n");
- }
-}
+static int follow[MAX_SYMBOLS][MAX_SYMBOLS];
+
+void follow_table_fill();
+void follow_table_print();
struct action {
enum action_type {
@@ -72,45 +49,19 @@ struct action {
size_t arg;
};
-// THIS IS BAD, DONT DO IT LIKE THAT
#define TABLE_CAP 64
-static struct action table[TABLE_CAP][SYMBOL_COUNT];
-// DO IT LIKE THAT
-// static struct action *(table[SYMBOL_COUNT]);
-static size_t states;
-
-int table_insert(size_t state, enum symbol sym, struct action a);
-void print_table()
-{
- printf(" ");
- for(size_t sym = 0; sym < SYMBOL_COUNT; sym++) printf("%2zu ", sym);
- printf("\n");
-
- char action_to_char[] = {[ACTION_SHIFT] = 's', [ACTION_REDUCE] = 'r', [ACTION_GOTO] = 'g'};
- for(size_t i = 0; i < states; i++) {
- printf("%2zu ", i);
- for(size_t sym = 0; sym < SYMBOL_COUNT; sym++)
- if(table[i][sym].type == ACTION_ACCEPT) printf(" a ");
- else if(table[i][sym].type) printf("%c%-2zu ", action_to_char[table[i][sym].type], table[i][sym].arg);
- else printf(" ");
- printf("\n");
- }
-}
+static struct action table[TABLE_CAP][MAX_SYMBOLS]; // *(table[total_symbols]);
+static size_t table_states;
+int table_insert(size_t state, symbol sym, struct action a);
+void table_print();
struct item {
size_t prod_idx;
size_t dot;
};
-int items_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
-void print_set(struct item *set, size_t nset)
-{
- printf("{");
- for(size_t i = 0; i < nset; i++)
- printf("{%zu, %zu}, ", set[i].prod_idx, set[i].dot);
- printf("}\n");
-}
+int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
#define SEEN_SETS_CAP 64
static struct {
@@ -120,35 +71,30 @@ static struct {
} seen_sets[SEEN_SETS_CAP];
static size_t nseen_sets;
-void free_seen_sets() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);}
-
-size_t handle_set(struct item *set, size_t nset);
-int insert_set(size_t state, struct item *initial_set, size_t ninitial);
-size_t closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout);
-
-void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
+void seen_sets_free() { for(size_t i = 0; i < nseen_sets; i++) free(seen_sets[i].items);}
-int main(void)
+size_t itemset_handle(struct item *set, size_t nset);
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial);
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout);
+void itemset_print(struct item *set, size_t nset)
{
- fill_follow_table();
-
- handle_set((struct item[]){{0, 0}}, 1);
- print_table();
-
- free_seen_sets();
+ printf("{");
+ for(size_t i = 0; i < nset; i++)
+ printf("{%zu, %zu}, ", set[i].prod_idx, set[i].dot);
+ printf("}\n");
}
-size_t handle_set(struct item *set, size_t nset)
+size_t itemset_handle(struct item *set, size_t nset)
{
// 1. is set in seen_sets
for(size_t i = 0; i < nseen_sets; i++) {
if(seen_sets[i].nitems != nset) continue;
-
+
int _seen = 0;
for(size_t j = 0; j < nset; j++) {
_seen = 0;
for(size_t k = 0; k < nset; k++)
- if(items_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
+ if(item_eq(&seen_sets[i].items[k], &set[j])) _seen = 1;
if(!_seen) break;
}
if(_seen) return seen_sets[i].state;
@@ -157,49 +103,49 @@ size_t handle_set(struct item *set, size_t nset)
// 2. add set to seen_sets
if(nseen_sets >= SEEN_SETS_CAP) {
fprintf(stderr, "ERROR: SEEN_SET_CAP exceeded\n");
- exit(1);
+ exit(1);
}
-
+
seen_sets[nseen_sets].items = xcalloc(nset, sizeof(*set));
seen_sets[nseen_sets].nitems = nset;
for(size_t i = 0; i < nset; i++)
seen_sets[nseen_sets].items[i] = set[i];
// 3. insert new state
- size_t new_state = seen_sets[nseen_sets++].state = states++;
+ size_t new_state = seen_sets[nseen_sets++].state = table_states++;
if(new_state >= TABLE_CAP) {
fprintf(stderr, "ERROR: TABLE_CAP exceeded\n");
exit(1);
}
- if(insert_set(new_state, set, nset)) {
- fprintf(stderr, "ERROR: insert_set failed\n");
+ if(itemset_insert(new_state, set, nset)) {
+ fprintf(stderr, "ERROR: itemset_insert failed\n");
exit(1);
}
-
+
return new_state;
}
#define CLOSURE_SET_CAP 64
#define GOTO_SET_CAP 32
-int insert_set(size_t state, struct item *initial_set, size_t ninitial)
+int itemset_insert(size_t state, struct item *initial_set, size_t ninitial)
{
struct item closure_set[CLOSURE_SET_CAP];
-
- size_t nclosure = closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP);
+
+ size_t nclosure = itemset_closure(initial_set, ninitial, closure_set, CLOSURE_SET_CAP);
if(nclosure > CLOSURE_SET_CAP) {
fprintf(stderr, "ERROR: CLOSURE_SET_CAP exceeded\n");
return 1;
}
-
- for(size_t sym = 0; sym < SYMBOL_COUNT; sym++) {
+
+ for(size_t sym = 0; sym < total_symbols; sym++) {
struct item goto_set[GOTO_SET_CAP];
- size_t ngoto = 0;
-
+ size_t ngoto = 0;
+
for(size_t j = 0; j < nclosure; j++) {
struct production *p = &grammar[closure_set[j].prod_idx];
size_t dot = closure_set[j].dot;
-
+
if(dot == p->nRHS) {
if(!follow[p->LHS][sym]) continue;
if(table_insert(state, sym, (struct action){
@@ -207,50 +153,50 @@ int insert_set(size_t state, struct item *initial_set, size_t ninitial)
return 1;
continue;
}
-
+
if(p->RHS[dot] == sym) {
if(ngoto >= GOTO_SET_CAP) {
fprintf(stderr, "ERROR: GOTO_SET_CAP exceeded\n");
return 1;
- }
+ }
goto_set[ngoto] = closure_set[j];
goto_set[ngoto++].dot++;
}
}
if(ngoto == 0) continue;
-
- if(sym == END_INPUT) {
+
+ if(symbol_is_input_end(sym)) {
if(table_insert(state, sym, (struct action){ACTION_ACCEPT, 0}))
return 1;
continue;
}
-
- size_t new_state = handle_set(goto_set, ngoto);
+
+ size_t new_state = itemset_handle(goto_set, ngoto);
if(table_insert(state, sym, (struct action){
- is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO,
+ symbol_is_terminal(sym) ? ACTION_SHIFT : ACTION_GOTO,
new_state})) return 1;
}
-
+
return 0;
}
-size_t closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max)
+size_t itemset_closure(struct item *in_set, size_t nin, struct item *out_set, size_t nout_max)
{
size_t nout = nin;
-
+
if(nout > nout_max) return nout;
for(size_t i = 0; i < nin; i++) out_set[i] = in_set[i];
- int *is_in_closure = xcalloc(ARR_LEN(grammar), sizeof(int));
+ int *is_in_closure = xcalloc(total_productions, sizeof(int));
for(size_t i = 0; i < nout; i++)
{
struct production *p = &grammar[out_set[i].prod_idx];
if(out_set[i].dot == p->nRHS) continue;
- enum symbol sym = p->RHS[out_set[i].dot];
-
- for(size_t j = 0; j < ARR_LEN(grammar); j++)
+ symbol sym = p->RHS[out_set[i].dot];
+
+ for(size_t j = 0; j < total_productions; j++)
if(grammar[j].LHS == sym) {
if(is_in_closure[j]) continue;
is_in_closure[j] = 1;
@@ -265,21 +211,21 @@ cleanup:
return nout;
}
-void fill_follow_table()
+void follow_table_fill()
{
- for(size_t i = 0; i < ARR_LEN(grammar); i++) {
+ for(size_t i = 0; i < total_productions; i++) {
struct production *p = &grammar[i];
for(size_t j = 1; j < p->nRHS; j++)
follow[p->RHS[j-1]][p->RHS[j]] = 1;
}
#define set(e) if((e) != 1) changed = (e) = 1
-
+
int changed;
do {
changed = 0;
- for(size_t i = 0; i < ARR_LEN(grammar); i++)
- for(size_t sym = 0; sym < SYMBOL_COUNT; sym++) {
+ for(size_t i = 0; i < total_productions; i++)
+ for(size_t sym = 0; sym < total_symbols; sym++) {
struct production *p = &grammar[i];
if(follow[p->LHS][sym])
set(follow[p->RHS[p->nRHS-1]][sym]);
@@ -289,7 +235,21 @@ void fill_follow_table()
} while(changed);
}
-int table_insert(size_t state, enum symbol sym, struct action a)
+void follow_table_print()
+{
+ printf(" ");
+ for(size_t i = 0; i < total_symbols; i++) printf("%2zu ", i);
+ printf("\n");
+
+ for(size_t i = 0; i < total_symbols; i++) {
+ printf("%2zu ", i);
+ for(size_t j = 0; j < total_symbols; j++)
+ printf(" %s ", follow[i][j] ? "x" : " ");
+ printf("\n");
+ }
+}
+
+int table_insert(size_t state, symbol sym, struct action a)
{
if(table[state][sym].type != ACTION_NOT_SET) {
fprintf(stderr, "TABLE COLLISION on state '%zu' sym '%d'\n", state, sym);
@@ -298,7 +258,69 @@ int table_insert(size_t state, enum symbol sym, struct action a)
a.type, a.arg);
return 1;
}
-
+
table[state][sym] = a;
return 0;
}
+
+void table_print()
+{
+ printf(" ");
+ for(size_t sym = 0; sym < total_symbols; sym++) printf("%2zu ", sym);
+ printf("\n");
+
+ char action_to_char[] = {[ACTION_SHIFT] = 's', [ACTION_REDUCE] = 'r', [ACTION_GOTO] = 'g'};
+ for(size_t i = 0; i < table_states; i++) {
+ printf("%2zu ", i);
+ for(size_t sym = 0; sym < total_symbols; sym++)
+ if(table[i][sym].type == ACTION_ACCEPT) printf(" a ");
+ else if(table[i][sym].type) printf("%c%-2zu ", action_to_char[table[i][sym].type], table[i][sym].arg);
+ else printf(" ");
+ printf("\n");
+ }
+}
+
+#ifdef _LR0_TABLE_STANDALONE
+
+enum symbol {
+ PLUS = 0,
+ MINUS,
+ LPAREN,
+ RPAREN,
+ N0, N1,
+ END_INPUT,
+
+ EP, E, T, N,
+ SYMBOLS_END,
+};
+const size_t total_symbols = SYMBOLS_END;
+
+int symbol_is_terminal(symbol s) { return s < E; }
+int symbol_is_nonterminal(symbol s) { return s >= E; }
+int symbol_is_input_end(symbol s) { return s == END_INPUT; }
+
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+struct production grammar[] = {
+ PROD(EP, ->, E, END_INPUT),
+ PROD(E, -->, E, PLUS, T),
+ PROD(E, -->, E, MINUS, T),
+ PROD(E, -->, T),
+ PROD(T, -->, LPAREN, E, RPAREN),
+ PROD(T, -->, N),
+ PROD(N, -->, N0),
+ PROD(N, -->, N1),
+};
+
+const size_t total_productions = sizeof(grammar)/sizeof(*grammar);
+
+int main(void)
+{
+ follow_table_fill();
+
+ itemset_handle((struct item[]){{0, 0}}, 1);
+ table_print();
+
+ seen_sets_free();
+}
+
+#endif