#include #include #include #include #include #include // TODO: - lr parser is bad for debugging (now its better) // - deal with errors (the token queue for example)!!!! // - ast should show the specific operation (match, assignment, etc) // - merge the debuginfo for each reduction ! #define MIN(a, b) ((a) > (b) ? (b) : (a)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define INPUT_CAP 4096 #define ARENA_CAP 4096 * 3 #define ARENA_IMPLEMENTATION #include "util/arena.h" static char buf[ARENA_CAP]; static struct arena_ctx global_arena; static void *xalloc(size_t size) { void *addr = arena_allocate(&global_arena, size); if(!addr) { fprintf(stderr, "ERROR: Arena empty\n"); exit(1); } return addr; } static void *xcalloc(size_t nmemb, size_t size) { return xalloc(nmemb * size); } static void xfree(void *ptr) { (void)ptr; return; } // macros to create the g_ variant of the function // where everything is cast to and from intptr_t // so it plays nicely with the lr parser // #define EMPTY() // #define DEFER(m) m EMPTY() // #define EVAL3(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) // #define EVAL2(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) // #define EVAL1(...) __VA_ARGS__ // #define A1(type, name) type name // #define A2(type, name) intptr_t name // #define A3(type, name) (type)name // #define _map2() map2 // #define map2(F, type, name, ...) F(type, name) __VA_OPT__(, DEFER(_map2)()(F, __VA_ARGS__)) // #define map(a, ...) DEFER(_map2)()(a, EVAL1 __VA_ARGS__) // #define g(ret, name, args) EVAL3( \ ret name(map(A1, args)); \ intptr_t g_##name(map(A2, args)) \ { return (intptr_t)name(map(A3, args)); } \ ret name(map(A1, args))) #include "util/list.h" struct datatype { int typeflags; // TODO: add ptr char *iden; struct list_head *function_exprlist; }; struct strlist { char *str; struct list_head list; }; // TODO: // - type of each each exprlist, be it assignement, match, function call, etc // - revise expr types, etc // struct ast_exprlist { // enum { AST_OP_ASSIGNMENT, AST_OP_TYPE, AST_OP_CALL } type; // struct list_head *exprlist; // } // struct ast_exprlist { // enum { AST_OP_ASSIGNMENT, AST_OP_IMPLEMENT, AST_OP_MATCH, AST_OP_EQUAL, AST_OP_CALL } type; // struct list_head *exprlist; // }; // struct ast_expr { // enum { AST_LITERAL, AST_IDENTIFIER, AST_DECLARATION, AST_SUB } type; // struct datatype datatype; // struct list_head list; // } struct ast_expr { enum { AST_NUMBER, AST_VARIABLE, AST_DECLARATION, AST_FIELDLIST, AST_OPERATION, AST_PARENLIST, AST_BRACELIST } type; union { int number; struct ast_vrbl { int is_atom; char *iden; } variable; struct ast_fiel { struct ast_vrbl variable; struct list_head *fields_strlist; } fieldlist; struct ast_oprn { enum { AST_OP_AND, AST_OP_OR } optype; struct list_head *left_exprlist; struct list_head *right_exprlist; } operation; struct list_head *exprlist; }; struct datatype datatype; struct list_head list; }; #define AST(t, ...) ((struct ast_##t){__VA_ARGS__}) #define g_AST(t, ...) ((stack_item){.t = {__VA_ARGS__}}) #define NEW(t, ...) ((struct t){__VA_ARGS__}) #define g_NEW(t, ...) ((stack_item){.t = {__VA_ARGS__}}) #define LST(v) ({ typeof(v) *r = malloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; }) #define g_LST(v) ((stack_item){.list = LST(v)}) // void ast_decide_datatype(struct list_head *list); void ast_print(struct list_head *list); void ast_free(struct list_head *list); // generated #include "bin/lbp.h" enum { DATATYPE_INT = 0, DATATYPE_STRUCT, DATATYPE_ENUM }; enum { SUBTYPE_FUNCTION = 1 << 0, SUBTYPE_LITTLE = 1 << 1, SUBTYPE_BIG = 1 << 2, SUBTYPE_NATIVE = 1 << 3 }; #define TYPEFLAGS(type, subtype) (((subtype) << 3) | (type)) #define flags2type(flags) ((flags) & 7) #define flags2subtype(flags) ((flags) >> 3) #include "bin/lbp.c" #include "util/dict.h" static struct dict types_dict; static struct string_token types_strings[] = { {"int", T_INT}, {"enum", T_ENUM}, {"struct", T_STRUCT}, {"function", ST_FUNCTION}, {"big", ST_BIG}, {"little", ST_LITTLE}, {"native", ST_NATIVE}, }; static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings); static uint8_t dict_lowercase_char_to_bit[256] = { ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; static inline char *substring(char *str, size_t sub_end); static inline char *linestart(char *strstart, char *pos); static inline size_t tillch(char *str, size_t len, char ch); #define strdup(...) _strdup(__VA_ARGS__) static inline char *_strdup(char *str); #include "parts/toklist.h" struct token { symbol s; stack_item v; }; #define TOKEN_INIT(sym, val) (struct token){ .s = sym, .v = val } symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } static void print_token(struct token *t); static size_t line = 1; static size_t active_region; static char *next_token(char *str); #include "util/queue.h" QUEUE_GENERATE(tokbuf, struct token, 16) static char *input; struct token *toklist_eat() { static struct token t; tokbuf_dequeue(&t); // err not checked if(tokbuf_empty()) input = next_token(input); return &t; } struct token *toklist_peek() { static struct token t; tokbuf_peek(&t); // err not checked return &t; } struct debuginfo { size_t line; size_t active_region; char *end_ptr; }; #define debuginfo_merge(out, ...) _debuginfo_merge(out, __VA_ARGS__, NULL); void _debuginfo_merge(struct debuginfo *out, ...); void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message); // #define _LR_PARSER_DEBUG #include "lr-parser.c" int main(void) { static char *filename = "stdin"; static char input_buf[INPUT_CAP]; if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { fprintf(stderr, "INPUT_CAP reached\n"); return 1; } global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); // DICT_SET_ALLOCATOR(&types_dict, xcalloc, xfree); dict_compile(&types_dict); input = next_token(input_buf); // while(1) { // struct token *tok = toklist_eat(); // print_token(tok); // if(token_sym(tok) == END_INPUT) break; // } return 0; struct lr_parseinfo parseinfo; void *value = lr_parser(&parseinfo); if(parseinfo.type == LR_ABORTED) { goto cleanup; } else if(parseinfo.type) { errmsg_print(filename, input_buf, NULL, lr_err_str(&parseinfo)); goto cleanup; } ast_print(((stack_item *)value)->list); ast_free(((stack_item *)value)->list); cleanup: dict_free(&types_dict); return 0; } static void print_token(struct token *tok) { // TODO: unfinished function printf("%s\n", symbol_to_str[token_sym(tok)]); switch(token_sym(tok)) { case IDEN: case ATOM: printf(" %s\n", (char *)token_val(tok)); break; default: break; } } void _debuginfo_merge(struct debuginfo *out, ...) { va_list ap; va_start(ap, out); *out = *va_arg(ap, struct debuginfo *); struct debuginfo *arg; while((arg = va_arg(ap, struct debuginfo *))) { intptr_t start = MIN((intptr_t)out->end_ptr - out->active_region, (intptr_t)arg->end_ptr - arg->active_region); intptr_t end = MAX((intptr_t)out->end_ptr, (intptr_t)arg->end_ptr); out->active_region = end - start; out->end_ptr = (char *)end; } va_end(ap); } void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message) { if(!debuginfo) debuginfo = &(struct debuginfo){.line = line, .active_region = active_region, .end_ptr = input}; char *l = linestart(input_buf, debuginfo->end_ptr); fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, debuginfo->line, debuginfo->end_ptr - l - debuginfo->active_region+1, message); int indent = fprintf(stderr, " %zu ", debuginfo->line); fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n'))); fprintf(stderr, "%*s| %*s", indent, "", debuginfo->end_ptr - l - debuginfo->active_region, ""); if(active_region == 0) debuginfo->active_region = 1; for(size_t i = 0; i < debuginfo->active_region; i++) fprintf(stderr, "~"); fprintf(stderr, "\n"); } // STR UTIL #define strdup(...) _strdup(__VA_ARGS__) static inline char *_strdup(char *str) { return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); } static inline char *substring(char *str, size_t sub_end) { static char sub[128]; if(!str) return sub; if(sub_end+1 > sizeof(sub)) return NULL; sub[sub_end] = '\0'; return memcpy(sub, str, sub_end); } static inline char *linestart(char *strstart, char *pos) { while(pos-- > strstart) if(*pos == '\n') return pos+1; return strstart; } static inline size_t tillch(char *str, size_t len, char ch) { for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; return len; } // LEXER static inline int issep(char c) { return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; } static inline int tillsep(char *str) { size_t i = 0; while(!issep(str[i++])); return i-1; } static char *typelist_tokenize(char *str) { size_t off = 0; while(!issep(str[off]) && str[off] != '-') off++; if(off > 0) { int s = dict_check(&types_dict, substring(str, off)); if(s < 0) { fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); } else { tokbuf_enqueue(&TOKEN_INIT(s, { .num = s })); } } str += off; switch(str[0]) { case '-': return typelist_tokenize(str+1); case '(': size_t depth = 0; while((str = next_token(str))) if(str[0] == '(') depth++; else if(*(str-1) == ')') { if(depth > 0) depth--; else if(str[0] == '-') return typelist_tokenize(str+1); else return str; } return NULL; default: return str; } } static char *next_token(char *str) { if(!str) return str; struct token tok = {0}; size_t off = 0; char c0 = str[0]; if(c0 == '\n') line++; if(isspace(c0)) return next_token(str+1); if(c0 == '\0') tok.s = END_INPUT; else { off = tillsep(str); if(off == 0) { // sep switch(str[off++]) { case ',': tok.s = COMMA; break; case ';': tok.s = SEMICOL; break; case '.': tok.s = DOT; break; case '(': tok.s = LPAREN; break; case ')': tok.s = RPAREN; break; case '{': tok.s = LBRACE; break; case '}': tok.s = RBRACE; break; case '/': char *s2 = str; tok.s = TYPELIST_START; tokbuf_enqueue(&tok); if(!(s2 = typelist_tokenize(s2+off))) goto fail; tok.s = TYPELIST_END; tokbuf_enqueue(&tok); active_region = s2 - str; return s2; default: fprintf(stderr, "ERROR: Unknown sep '%c'\n", c0); break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; tok.v = (stack_item){ .num = atoi(substring(str, off)) }; // not really } else { // iden or atom (possibly with fields) active_region = off; int hasfield = 0; size_t sub_off; do { sub_off = tillch(str + 1, off - 1, ':') + 1; if(hasfield) tokbuf_enqueue(&TOKEN_INIT(COLON, { .num=0 })); int skip = hasfield || str[0] == ':'; tokbuf_enqueue( &TOKEN_INIT((skip && !hasfield) ? ATOM : IDEN, { .str = strdup(substring(str+skip, sub_off-skip))})); } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); return str; } } tokbuf_enqueue(&tok); active_region = off; return str+off; fail: exit(14); // tok.s = END_INPUT; tokbuf_enqueue(&tok); // return NULL; } // ast printing int ident = 0; #define INDENT() printf("%*s", ident*2, ""); void ast_exprlist_print(struct list_head *list); void datatype_print(struct datatype *datatype) { // TOOD: fix, very messy switch(flags2type(datatype->typeflags)) { case DATATYPE_INT: printf("int"); break; case DATATYPE_ENUM: printf("enum"); break; case DATATYPE_STRUCT: printf("struct"); break; } if(datatype->iden) printf("(%s)", datatype->iden); for(int i = 0; i < 3; i++) switch(flags2subtype(datatype->typeflags) & (1 << i)) { case 0: break; case SUBTYPE_FUNCTION: printf("-function("); ast_exprlist_print(datatype->function_exprlist); printf(")"); break; default: printf("-%d", i); break; } } void ast_vrbl_print(struct ast_vrbl *vrbl) { printf("%s%s", vrbl->is_atom ? ":" : "", vrbl->iden); } void ast_fiel_print(struct ast_fiel *fiel) { ast_vrbl_print(&fiel->variable); list_for_each_entry(struct strlist, entry, list, fiel->fields_strlist) { printf(":%s", entry->str); } } void ast_decl_print(struct ast_vrbl *vrbl, struct datatype *datatype) { ast_vrbl_print(vrbl); printf("/"); datatype_print(datatype); } void ast_oprn_print(struct ast_oprn *oprn) { ast_exprlist_print(oprn->left_exprlist); if(oprn->optype == AST_OP_AND) printf(",\n"); else printf(";\n"); INDENT(); ast_exprlist_print(oprn->right_exprlist); } void ast_expr_print(struct ast_expr *expr) { ast_exprlist_print(&expr->list); } void ast_exprlist_print(struct list_head *list) { list_for_each_entry(struct ast_expr, entry, list, list) { switch(entry->type) { case AST_NUMBER: printf("%d", entry->number); break; case AST_VARIABLE: ast_vrbl_print(&entry->variable); break; case AST_FIELDLIST: ast_fiel_print(&entry->fieldlist); break; case AST_DECLARATION: ast_decl_print(&entry->variable, &entry->datatype); break; case AST_OPERATION: ast_oprn_print(&entry->operation); break; case AST_PARENLIST: printf("(\n"); ident++; INDENT(); ast_exprlist_print(entry->exprlist); printf("\n"); ident--; INDENT(); printf(")"); break; case AST_BRACELIST: printf("{\n"); ident++; INDENT(); ast_exprlist_print(entry->exprlist); printf(".\n"); ident--; INDENT(); printf("}"); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } if(entry->list.next) printf(" "); } } void ast_print(struct list_head *list) { ast_exprlist_print(list); printf(".\n"); } void ast_free(struct list_head *list) { list_for_each_safe(l, list) { struct ast_expr *entry = list_entry(l, typeof(*entry), list); switch(entry->type) { case AST_NUMBER: break; case AST_VARIABLE: break; case AST_FIELDLIST: list_for_each_safe(l, entry->fieldlist.fields_strlist) free(list_entry(l, struct strlist, list)); break; case AST_DECLARATION: break; case AST_OPERATION: ast_free(entry->operation.left_exprlist); ast_free(entry->operation.right_exprlist); break; case AST_BRACELIST: case AST_PARENLIST: ast_free(entry->exprlist); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } ast_free(entry->datatype.function_exprlist); free(entry); } }