diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-08-26 01:17:10 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-08-26 01:17:10 +0300 |
| commit | 46e786db9d1b48b8fbc3502e36f093b755f3e09f (patch) | |
| tree | 9e279216e68f3fe4b0849d1e07184fe674dc551f /demos/sample-files/lbp-skeleton.c | |
| parent | 1c83c514c8108fccfec9764da5e4563b98eb871b (diff) | |
grammar for the grammar and lexing and parsing of a new language lbp
Diffstat (limited to 'demos/sample-files/lbp-skeleton.c')
| -rw-r--r-- | demos/sample-files/lbp-skeleton.c | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c new file mode 100644 index 0000000..ae0a17f --- /dev/null +++ b/demos/sample-files/lbp-skeleton.c @@ -0,0 +1,258 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <ctype.h> + +// TODO: lr parser is bad for debugging + +#define INPUT_CAP 4096 +#define ARENA_CAP 4096 + +#define ARENA_IMPLEMENTATION +#include "util/arena.h" + +static char buf[ARENA_CAP]; +static struct arena_ctx global_arena; +static void *xalloc(size_t sz) { + void *addr = arena_allocate(&global_arena, sz); + if(!addr) { + fprintf(stderr, "ERROR: Arena empty\n"); exit(1); + } + + return addr; +} + +// other things here +#include "util/list.h" +static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) +{ + if(head) list_add(new, head); + return new; +} + +#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new) + +// generated +#include "bin/lbp.h" +#include "bin/lbp.c" + +#include "util/dict.h" +static struct dict types_dict; +static struct string_token types_strings[] = { + {"int", T_INT}, + {"enum", T_ENUM}, + {"struct", T_STRUCT}, + {"function", ST_FUNCTION}, + {"big", ST_BIG}, + {"little", ST_LITTLE}, + {"native", ST_NATIVE}, +}; +static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings); +static uint8_t dict_lowercase_char_to_bit[256] = { + ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, + ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, + ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, + ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, + ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 +}; + + +#include "parts/toklist.h" +struct token { + symbol s; + intptr_t v; +}; + +#include "util/queue.h" +QUEUE_GENERATE(tokbuf, struct token, 16) + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return t->v; } + +static void print_token(struct token *t); +static char *next_token(char *str); + +static char *input; + +struct token *toklist_eat() +{ + static struct token t; + tokbuf_dequeue(&t); // err not checked + if(tokbuf_empty()) input = next_token(input); + return &t; +} + +struct token *toklist_peek() { + static struct token t; + tokbuf_peek(&t); // err not checked + return &t; +} + +// #define _LR_PARSER_DEBUG +#include "lr-parser.c" + +int main(void) +{ + static char input_buf[INPUT_CAP]; + if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { + fprintf(stderr, "INPUT_CAP reached\n"); + return 1; + } + + global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); + + types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); + dict_compile(&types_dict); + + input = next_token(input_buf); + + // while(1) { + // struct token *tok = toklist_eat(); + // print_token(tok); + // if(token_sym(tok) == END_INPUT) break; + // } return 0; + + intptr_t value; + if(lr_parser(&value)) { + fprintf(stderr, input); + return 1; + } + + fprintf(stderr, "OUTPUT: %jd\n", value); + + dict_free(&types_dict); + return 0; +} + +static void print_token(struct token *tok) +{ + printf("%s\n", symbol_to_str[token_sym(tok)]); + if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok)); +} + +// STR UTIL + +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str) +{ + return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(!str) return sub; + + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static inline size_t tillch(char *str, size_t len, char ch) +{ + for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; + return len; +} + +// LEXER + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || + c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static char *typelist_tokenize(char *str) +{ + size_t off = 0; + while(!issep(str[off]) && str[off] != '-') off++; + + if(off > 0) { + int s = dict_check(&types_dict, substring(str, off)); + if(s < 0) { + fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); + return NULL; + } + + tokbuf_enqueue(&(struct token){.s = s, .v = s}); + } + + str += off; + + switch(str[0]) { + case '-': return typelist_tokenize(str+1); + case '(': + while((str = next_token(str))) + if(*(str-1)== ')') { // not really + if(str[0] == '-') return typelist_tokenize(str+1); + else return str; + } + return NULL; + default: return str; + } +} + +static char *next_token(char *str) +{ + if(!str) return str; + + struct token tok = {0}; + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case ',': tok.s = COMMA; break; + case ';': tok.s = SEMICOL; break; + case '.': tok.s = DOT; break; + case '(': tok.s = LPAREN; break; + case ')': tok.s = RPAREN; break; + case '{': tok.s = LBRACE; break; + case '}': tok.s = RBRACE; break; + case '/': + tok.s = TYPELIST_START; tokbuf_enqueue(&tok); + if(!(str = typelist_tokenize(str+off))) goto fail; + tok.s = TYPELIST_END; tokbuf_enqueue(&tok); + return str; + default: break; + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = (intptr_t)atoi(substring(str, off)); // not really + } else { // iden or atom (possibly with fields) + int hasfield = 0; + size_t sub_off; + + do { + sub_off = tillch(str + 1, off - 1, ':') + 1; + if(hasfield) + tokbuf_enqueue(&(struct token){.s = COLON, .v = 0}); + + tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN, + .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))}); + } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); + + return str; + } + } + + tokbuf_enqueue(&tok); + return str+off; + +fail: + tokbuf_enqueue(&(struct token){.s = END_INPUT}); + return NULL; +} |
