#include #include #include #include #include // TODO: - lr parser is bad for debugging // - deal with errors #define INPUT_CAP 4096 #define ARENA_CAP 8192 //4096 #define ARENA_IMPLEMENTATION #include "util/arena.h" static char buf[ARENA_CAP]; static struct arena_ctx global_arena; static void *xalloc(size_t size) { void *addr = arena_allocate(&global_arena, size); if(!addr) { fprintf(stderr, "ERROR: Arena empty\n"); exit(1); } return addr; } static void *xcalloc(size_t nmemb, size_t size) { return xalloc(nmemb * size); } static void xfree(void *ptr) { (void)ptr; return; } // macros to create the g_ variant of the function // where everything is cast to and from intptr_t // so it plays nicely with the lr parser // #define EMPTY() // #define DEFER(m) m EMPTY() // #define EVAL3(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) // #define EVAL2(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) // #define EVAL1(...) __VA_ARGS__ // #define A1(type, name) type name // #define A2(type, name) intptr_t name // #define A3(type, name) (type)name // #define _map2() map2 // #define map2(F, type, name, ...) F(type, name) __VA_OPT__(, DEFER(_map2)()(F, __VA_ARGS__)) // #define map(a, ...) DEFER(_map2)()(a, EVAL1 __VA_ARGS__) // #define g(ret, name, args) EVAL3( \ ret name(map(A1, args)); \ intptr_t g_##name(map(A2, args)) \ { return (intptr_t)name(map(A3, args)); } \ ret name(map(A1, args))) #include "util/list.h" struct ast_strlist { char *str; struct list_head list; }; struct ast_expr { enum { AST_NUMBER, AST_VARIABLE, AST_FIELDLIST, AST_DECLARATION, AST_DEFINITION, AST_OPERATION, AST_PARENLIST } type; union { int number; struct ast_vrbl { int is_atom; char *iden; } variable; struct ast_fiel { struct ast_vrbl variable; struct list_head *fields_strlist; } fieldlist; struct ast_decl { struct ast_vrbl variable; int typed; } declaration; struct ast_defn { struct ast_decl declartion; struct list_head *block_exprlist; } definition; struct ast_oprn { enum { AST_OP_AND, AST_OP_OR } optype; struct list_head *left_exprlist; struct list_head *right_exprlist; } operation; struct list_head *paren_exprlist; }; struct list_head list; }; #define NEW(t, ...) ((struct ast_##t){__VA_ARGS__}) #define g_NEW(t, ...) ((stack_item){.t = (struct ast_##t){__VA_ARGS__}}) #define LST(v) ({ typeof(v) *r = xalloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; }) #define g_LST(v) ((stack_item){.list = LST(v)}) void ast_vrbl_print(struct ast_vrbl *vrbl); void ast_fiel_print(struct ast_fiel *fiel); void ast_decl_print(struct ast_decl *decl); void ast_defn_print(struct ast_defn *defn); void ast_oprn_print(struct ast_oprn *oprn); void ast_expr_print(struct ast_expr *expr); void ast_exprlist_print(struct list_head *list); void ast_exprlist_free(struct list_head *list); // generated #include "bin/lbp.h" #include "bin/lbp.c" #include "util/dict.h" static struct dict types_dict; static struct string_token types_strings[] = { {"int", T_INT}, {"enum", T_ENUM}, {"struct", T_STRUCT}, {"function", ST_FUNCTION}, {"big", ST_BIG}, {"little", ST_LITTLE}, {"native", ST_NATIVE}, }; static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings); static uint8_t dict_lowercase_char_to_bit[256] = { ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; #include "parts/toklist.h" struct token { symbol s; stack_item v; }; #define TOKEN_INIT(sym, val) (struct token){ .s = sym, .v = val } static void print_token(struct token *t); symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } static char *input; static size_t line = 1; static size_t active_region; static char *next_token(char *str); static inline char *substring(char *str, size_t sub_end); static inline char *linestart(char *strstart, char *pos); static inline size_t tillch(char *str, size_t len, char ch); #define strdup(...) _strdup(__VA_ARGS__) static inline char *_strdup(char *str); #include "util/queue.h" QUEUE_GENERATE(tokbuf, struct token, 16) struct token *toklist_eat() { static struct token t; tokbuf_dequeue(&t); // err not checked if(tokbuf_empty()) input = next_token(input); return &t; } struct token *toklist_peek() { static struct token t; tokbuf_peek(&t); // err not checked return &t; } // #define _LR_PARSER_DEBUG #include "lr-parser.c" int main(void) { char *filename = "stdin"; static char input_buf[INPUT_CAP]; if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { fprintf(stderr, "INPUT_CAP reached\n"); return 1; } global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); // DICT_SET_ALLOCATOR(&types_dict, xcalloc, xfree); dict_compile(&types_dict); input = next_token(input_buf); // while(1) { // struct token *tok = toklist_eat(); // print_token(tok); // if(token_sym(tok) == END_INPUT) break; // } return 0; stack_item value; struct lr_errinfo *errinfo; if((errinfo = lr_parser(&value))) { char *l = linestart(input_buf, input); fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, line, input - l - active_region+1, lr_err_str(errinfo)); size_t indent = fprintf(stderr, " %zu ", line); fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n'))); fprintf(stderr, "%*s| %*s", indent, "", input - l - active_region, ""); if(active_region == 0) active_region = 1; fprintf(stderr, "^"); for(size_t i = 0; i < active_region-1; i++) fprintf(stderr, "~"); fprintf(stderr, "\n\n"); goto cleanup; } ast_exprlist_print(value.list); // ast_exprlist_free(value.list); cleanup: dict_free(&types_dict); return 0; } static void print_token(struct token *tok) { // TODO: unfinished function printf("%s\n", symbol_to_str[token_sym(tok)]); switch(token_sym(tok)) { case IDEN: case ATOM: printf(" %s\n", (char *)token_val(tok)); break; default: break; } } // STR UTIL #define strdup(...) _strdup(__VA_ARGS__) static inline char *_strdup(char *str) { return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); } static inline char *substring(char *str, size_t sub_end) { static char sub[128]; if(!str) return sub; if(sub_end+1 > sizeof(sub)) return NULL; sub[sub_end] = '\0'; return memcpy(sub, str, sub_end); } static inline char *linestart(char *strstart, char *pos) { while(pos-- > strstart) if(*pos == '\n') return pos+1; return strstart; } static inline size_t tillch(char *str, size_t len, char ch) { for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; return len; } // LEXER static inline int issep(char c) { return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; } static inline int tillsep(char *str) { size_t i = 0; while(!issep(str[i++])); return i-1; } static char *typelist_tokenize(char *str) { size_t off = 0; while(!issep(str[off]) && str[off] != '-') off++; if(off > 0) { int s = dict_check(&types_dict, substring(str, off)); if(s < 0) { fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); } else { tokbuf_enqueue(&TOKEN_INIT(s, { .num = s })); } } str += off; switch(str[0]) { case '-': return typelist_tokenize(str+1); case '(': size_t depth = 0; while((str = next_token(str))) if(str[0] == '(') depth++; else if(*(str-1) == ')') { if(depth > 0) depth--; else if(str[0] == '-') return typelist_tokenize(str+1); else return str; } return NULL; default: return str; } } static char *next_token(char *str) { if(!str) return str; struct token tok = {0}; size_t off = 0; char c0 = str[0]; if(c0 == '\n') line++; if(isspace(c0)) return next_token(str+1); if(c0 == '\0') tok.s = END_INPUT; else { off = tillsep(str); if(off == 0) { // sep switch(str[off++]) { case ',': tok.s = COMMA; break; case ';': tok.s = SEMICOL; break; case '.': tok.s = DOT; break; case '(': tok.s = LPAREN; break; case ')': tok.s = RPAREN; break; case '{': tok.s = LBRACE; break; case '}': tok.s = RBRACE; break; case '/': char *s2 = str; tok.s = TYPELIST_START; tokbuf_enqueue(&tok); if(!(s2 = typelist_tokenize(s2+off))) goto fail; tok.s = TYPELIST_END; tokbuf_enqueue(&tok); active_region = s2 - str; return s2; default: fprintf(stderr, "ERROR: Unknown sep '%c'\n", c0); break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; tok.v = (stack_item){ .num = atoi(substring(str, off)) }; // not really } else { // iden or atom (possibly with fields) active_region = off; int hasfield = 0; size_t sub_off; do { sub_off = tillch(str + 1, off - 1, ':') + 1; if(hasfield) tokbuf_enqueue(&TOKEN_INIT(COLON, { .num=0 })); tokbuf_enqueue( &TOKEN_INIT((!hasfield && str[0] == ':') ? ATOM : IDEN, { .str = strdup(substring(str+hasfield, sub_off-hasfield))})); } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); return str; } } tokbuf_enqueue(&tok); active_region = off; return str+off; fail: exit(14); // tok.s = END_INPUT; tokbuf_enqueue(&tok); // return NULL; } // ast printing void ast_vrbl_print(struct ast_vrbl *vrbl) { printf("%s%s", vrbl->is_atom ? ":" : "", vrbl->iden); } void ast_fiel_print(struct ast_fiel *fiel) { ast_vrbl_print(&fiel->variable); list_for_each_entry(struct ast_strlist, entry, list, fiel->fields_strlist) { printf(":%s", entry->str); } } void ast_decl_print(struct ast_decl *decl) { // TODO: implement } void ast_defn_print(struct ast_defn *defn) { // TODO: implement } void ast_oprn_print(struct ast_oprn *oprn) { ast_exprlist_print(oprn->left_exprlist); if(oprn->optype == AST_OP_AND) printf(",\n"); else printf(";\n"); ast_exprlist_print(oprn->right_exprlist); } void ast_expr_print(struct ast_expr *expr) { ast_exprlist_print(&expr->list); } void ast_exprlist_print(struct list_head *list) { list_for_each_entry(struct ast_expr, entry, list, list) { switch(entry->type) { case AST_NUMBER: printf("%d", entry->number); break; case AST_VARIABLE: ast_vrbl_print(&entry->variable); break; case AST_FIELDLIST: ast_fiel_print(&entry->fieldlist); break; case AST_DECLARATION: ast_decl_print(&entry->declaration); break; case AST_OPERATION: ast_oprn_print(&entry->operation); break; case AST_PARENLIST: printf("("); ast_exprlist_print(entry->paren_exprlist); printf(")"); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } printf(" "); } printf("\n"); } void ast_exprlist_free(struct list_head *list) { list_for_each_safe(l, list) { struct ast_expr *entry = list_entry(l, typeof(*entry), list); switch(entry->type) { case AST_NUMBER: break; case AST_VARIABLE: break; case AST_FIELDLIST: list_for_each_safe(l, entry->fieldlist.fields_strlist) free(list_entry(l, struct ast_strlist, list)); break; case AST_DECLARATION: break; case AST_DEFINITION: break; case AST_OPERATION: ast_exprlist_free(entry->operation.left_exprlist); ast_exprlist_free(entry->operation.right_exprlist); break; case AST_PARENLIST: ast_exprlist_free(entry->paren_exprlist); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } free(entry); } }