diff options
author | kartofen <mladenovnasko0@gmail.com> | 2024-01-02 16:06:45 +0200 |
---|---|---|
committer | kartofen <mladenovnasko0@gmail.com> | 2024-01-02 16:06:45 +0200 |
commit | c837b2bb4ea71cedb434260b11c9f356e8b34e2d (patch) | |
tree | d8d0b5bc5e6831b75e0d71395326e0e206c1bf43 | |
parent | 536f787c627e3a2b245e529345b948dc4631817f (diff) |
things
-rw-r--r-- | files/test1.lisp | 7 | ||||
-rw-r--r-- | src/common.h | 1 | ||||
-rw-r--r-- | src/eval.h | 10 | ||||
-rw-r--r-- | src/lexer.c | 126 | ||||
-rw-r--r-- | src/lexer.h | 29 | ||||
-rw-r--r-- | src/main.c | 48 | ||||
-rw-r--r-- | src/parser.c | 90 | ||||
-rw-r--r-- | src/parser.h | 14 | ||||
-rw-r--r-- | src/value.c | 2 |
9 files changed, 174 insertions, 153 deletions
diff --git a/files/test1.lisp b/files/test1.lisp index 41325dd..d4d21ba 100644 --- a/files/test1.lisp +++ b/files/test1.lisp @@ -15,3 +15,10 @@ (6 7) ('1) 1 +'a + +''''''a +^ +(quote (quote (quote (quote (quote (quote a)))))) + +'a <=> (quote a) diff --git a/src/common.h b/src/common.h index f67af22..935f79a 100644 --- a/src/common.h +++ b/src/common.h @@ -3,6 +3,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #define __RED__ "\033[0;31m" #define __GREEN__ "\033[0;32m" @@ -3,14 +3,16 @@ #include "parser.h" -typedef struct eval *evaluator_t; +typedef struct eval *eval_t; struct eval { + // symtbl_t root; + // symtbl_t *cur; }; // TODO: add options for the evaluation -evaluator_t eval_create(); -void eval_destroy(evaluator_t evaluator); -int eval_ast(evaluator_t evaluator, parser_t ast); +eval_t eval_create(); +void eval_destroy(eval_t eval); +int eval_ast(eval_t eval, parser_t ast); #endif diff --git a/src/lexer.c b/src/lexer.c index 9659bb4..d71fd2f 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -11,27 +11,27 @@ // saves a token with no data // returns the index of the saved token; < 0 on fail -static int token_add(lexer_t lexer, enum token_enum type); +static int token_add(toklist_t *tokens, enum token_enum type); -// saves a token with the current identifier (lexer->iden) +// saves a token with an identifier (lexer -> iden) // returns 0 on success -static int token_add_iden(lexer_t lexer); +static int token_add_iden(toklist_t *tokens, char *iden, size_t *iden_sz); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) -static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_double_quote(lexer_t lexer); +static int on_generic_separator(lexer_t lexer, toklist_t *tokens, enum token_enum type); +static int on_double_quote(lexer_t lexer, toklist_t *tokens); #define EQ(ch) ch == -#define SEPARATOR_CALLBACK_TBL(X, lexer) \ +#define SEPARATOR_CALLBACK_TBL(X, ...) \ /* X(test, what to execute if the test succeeds) */ \ - X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ - X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ - X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ - X(EQ('"'), on_double_quote(lexer)) \ - X(isspace, on_generic_separator(lexer, TOKEN_TOKENS)) + X(EQ('('), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_OPEN); lexer->scope_depth++) \ + X(EQ(')'), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_CLOSE); lexer->scope_depth--) \ + X(EQ('\''), on_generic_separator(__VA_ARGS__, TOKEN_SPECIAL_QUOTE)) \ + X(EQ('"'), on_double_quote(__VA_ARGS__)) \ + X(isspace, on_generic_separator(__VA_ARGS__, TOKEN_TOKENS)) #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) @@ -58,13 +58,13 @@ static int on_double_quote(lexer_t lexer); } \ } else -int lexer_tokenize(lexer_t lexer, char *str, size_t len) -{ +int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len) +{ int callback_ret = 0; for(size_t i = 0; i < len; i++) { - SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} + SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer, toklist); if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -78,17 +78,9 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) return 0; } -lexer_t lexer_create(size_t tokens_cap) +lexer_t lexer_create() { lexer_t lexer = xmalloc(sizeof(struct lexer)); - - lexer->tokens_cap = tokens_cap; - lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); - - for(size_t i = 0; i < tokens_cap; i++) { - lexer->tokens[i].type = TOKEN_TOKENS; - } - lexer_reset(lexer); return lexer; } @@ -98,29 +90,22 @@ lexer_t lexer_create(size_t tokens_cap) void lexer_destroy(lexer_t lexer) { if(!lexer) return; - - if(lexer->tokens) { - for(size_t i = 0; i < lexer->ntokens; i++) - { - struct token *token = &lexer->tokens[i]; - - switch(lexer->tokens[i].type) { - MANAGE_TOKEN_TBL(CASE_FREE, token); - default: - err("lexer_reset: Unknown token type given"); - break; - } - } - free(lexer->tokens); - } - free(lexer); } void lexer_reset(lexer_t lexer) { - for(size_t i = 0; i < lexer->tokens_cap; i++) { - struct token *token = &lexer->tokens[i]; + memset(lexer->iden, 0, LEXER_IDEN_CAP); + lexer->iden_sz = 0; + + lexer->inside_string = false; + lexer->scope_depth = 0; +} + +void toklist_reset(toklist_t *toklist) +{ + for(size_t i = 0; i < toklist->ntokens; i++) { + struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_FREE, token); @@ -132,26 +117,21 @@ void lexer_reset(lexer_t lexer) token->type = TOKEN_TOKENS; token->value = NULL; } - - lexer->ntokens = 0; - - memset(lexer->iden, 0, LEXER_IDEN_CAP); - lexer->iden_sz = 0; - - lexer->inside_string = 0; + + toklist->ntokens = 0; } // print based on the given way to print #define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; -void lexer_print_tokens(lexer_t lexer) +void toklist_print(toklist_t *toklist) { // for the printing (see MANAGE_TOKEN_TBL) char buf[LEXER_IDEN_CAP]; size_t buf_sz = LEXER_IDEN_CAP; - for(size_t i = 0; i < lexer->ntokens; i++) { - struct token *token = &lexer->tokens[i]; + for(size_t i = 0; i < toklist->ntokens; i++) { + struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_PRINT, token); @@ -162,11 +142,17 @@ void lexer_print_tokens(lexer_t lexer) } } +bool lexer_has_finished(lexer_t lexer) +{ + if(!lexer->inside_string && lexer->scope_depth == 0) return true; + return false; +} + // ---------- Callback Functions ----------- // -static int on_double_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer, toklist_t *toklist) { - int ret = on_generic_separator(lexer, TOKEN_TOKENS); + int ret = on_generic_separator(lexer, toklist, TOKEN_TOKENS); if(ret < 0) { return ret; } else if(ret == 0) { @@ -174,7 +160,7 @@ static int on_double_quote(lexer_t lexer) return 1; } - if(token_add_iden(lexer)) { + if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } @@ -183,19 +169,19 @@ static int on_double_quote(lexer_t lexer) return 0; } -static int on_generic_separator(lexer_t lexer, enum token_enum type) +static int on_generic_separator(lexer_t lexer, toklist_t *toklist, enum token_enum type) { if(lexer->inside_string) { return 1; } - if(token_add_iden(lexer)) { + if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { - if(token_add(lexer, type) < 0) { + if(token_add(toklist, type) < 0) { err("token_add: failed"); return -1; } @@ -206,39 +192,39 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type) // ---------- Token Functions ----------- // -static int token_add(lexer_t lexer, enum token_enum type) +static int token_add(toklist_t *toklist, enum token_enum type) { - if(lexer->ntokens >= lexer->tokens_cap) { - err("tokens_cap of %ld has been reached", lexer->tokens_cap); + if(toklist->ntokens >= LEXER_TOK_CAP) { + err("tokens_cap of %ld has been reached", toklist->ntokens); return -1; } - lexer->tokens[lexer->ntokens].type = type; - return lexer->ntokens++; + toklist->tokens[toklist->ntokens].type = type; + return toklist->ntokens++; } -static int token_add_iden(lexer_t lexer) +static int token_add_iden(toklist_t *toklist, char *iden, size_t *iden_sz) { int ret = 1; - if(!lexer->iden_sz) return 0; + if(*iden_sz == 0) return 0; - int i = token_add(lexer, TOKEN_VALUE); + int i = token_add(toklist, TOKEN_VALUE); if(i < 0) { err("token_add: failed"); goto exit; } - value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); + value_t value = value_create(VALUE_LITERAL, iden, &ret); if(ret > 0) { - value = value_create(VALUE_SYMBOL, lexer->iden, &ret); + value = value_create(VALUE_SYMBOL, iden, &ret); } else if(ret < 0) { err("value_create: failed"); goto exit; } - lexer->tokens[i].value = value; + toklist->tokens[i].value = value; exit: - memset(lexer->iden, 0, lexer->iden_sz); - lexer->iden_sz = 0; + memset(iden, 0, *iden_sz); + *iden_sz = 0; return ret; } diff --git a/src/lexer.h b/src/lexer.h index fc13f24..e928430 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -6,8 +6,12 @@ #ifndef LEXER_IDEN_CAP #define LEXER_IDEN_CAP 512 #endif +#ifndef LEXER_TOK_CAP +#define LEXER_TOK_CAP 8192 +#endif typedef struct lexer *lexer_t; +typedef struct toklist toklist_t; struct token { enum token_enum { @@ -19,38 +23,39 @@ struct token { value_t value; }; -struct lexer { - struct token *tokens; - size_t tokens_cap; +struct toklist { + struct token tokens[LEXER_TOK_CAP]; size_t ntokens; +}; - // identifier - char iden[LEXER_IDEN_CAP]; +struct lexer { + char iden[LEXER_IDEN_CAP]; // identifier size_t iden_sz; - int inside_string; + bool inside_string; + int scope_depth; }; -// allocate a lexer with a maximum number of tokens_cap tokens // returns a lexer on success and NULL on fail -lexer_t lexer_create(size_t tokens_cap); +lexer_t lexer_create(); // deallocate a lexer void lexer_destroy(lexer_t lexer); -// reset a lexer to its default state without destroying it (faster) +// reset to its default state without destroying it void lexer_reset(lexer_t lexer); +void toklist_reset(toklist_t *toklist); // self explanatory -void lexer_print_tokens(lexer_t lexer); +void toklist_print(toklist_t *toklist); // turn the given non-null-terminated string str of lenght len // into into tokens // returns 0 on success -int lexer_tokenize(lexer_t lexer, char *str, size_t len); +int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len); // checks whether the lexer has finished (temp buffers like iden are empty) // returns 1 on finished, 0 on not finished -int lexer_has_finished(lexer_t lexer); +bool lexer_has_finished(lexer_t lexer); #endif @@ -10,59 +10,71 @@ // TODO: the lexer, parser, and eval functions should return -1 on fatal, and 1 on non fatal error #define READ_BUF_CAP 512 -#define DEFAULT_TOKENS_CAP 8192 // make it a command line arg lexer_t lexer = NULL; parser_t parser = NULL; -// evaluator_t eval = NULL; +// eval_t eval = NULL; int main(void) { int ret = 1; + toklist_t tokens = {0}; + ast_t ast_root = {0}; + char *filename = "files/test1.lisp"; - - lexer = lexer_create(DEFAULT_TOKENS_CAP); - if(!lexer) { - err("lexer_create: failed"); - goto fail; - } - FILE *fp = fopen(filename, "r"); if(!fp) { err("fopen: %s: %s", filename, strerror(errno)); goto fail; } - // tokenize input + toklist_reset(&tokens); + + lexer = lexer_create(); + if(!lexer) { + err("lexer_create: failed"); + goto fail; + } + char buf[READ_BUF_CAP]; size_t bytes = 0; while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) { - if(lexer_tokenize(lexer, buf, bytes)) { - fclose(fp); goto fail; + if(lexer_tokenize(lexer, &tokens, buf, bytes)) { + toklist_reset(&tokens); + fclose(fp); + goto fail; } if(bytes < READ_BUF_CAP) break; } - lexer_print_tokens(lexer); + if(!lexer_has_finished(lexer)) { + err("tokenization is not complete"); + } fclose(fp); - parser = parser_create(); + ast_reset(&ast_root); + + parser = parser_create(); if(!parser) { err("parser_create: failed"); goto fail; } - if(parser_parse_lexer(parser, lexer)) { + if(parser_parse_toklist(parser, &tokens, &ast_root)) { err("parser_parse_lexer: failed"); goto fail; } - parser_print_ast(parser); + toklist_print(&tokens); + toklist_reset(&tokens); + + ast_print(&ast_root); + ast_reset(&ast_root); - // evaluator = eval_create(); - // if(!evaluator) { + // eval = eval_create(); + // if(!eval) { // err("eval_create: failed"); // goto fail; // } diff --git a/src/parser.c b/src/parser.c index 654cda4..1458018 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,15 +21,16 @@ static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp); static struct sexp *quote_stack_pop(struct quote_node **head, int peek); // returns 0 on success -static int on_paren(parser_t parser, int paren_type); // 0 is open, 1 is close +enum paren_type { P_OPEN, P_CLOSE }; +static int on_paren(parser_t parser, enum paren_type type); // 0 is open, 1 is close static int on_quote(parser_t parent); static int on_value(parser_t parent, value_t value); -#define TOKEN_CALLBACK_TBL(X, parser, token) \ -/* X(test, execute on succes) */ \ - X(TOKEN_PARENTHS_OPEN, on_paren(parser, 0)) \ - X(TOKEN_PARENTHS_CLOSE, on_paren(parser, 1)) \ - X(TOKEN_SPECIAL_QUOTE, on_quote(parser)) \ +#define TOKEN_CALLBACK_TBL(X, parser, token) \ +/* X(test, execute on succes) */ \ + X(TOKEN_PARENTHS_OPEN, on_paren(parser, P_OPEN)) \ + X(TOKEN_PARENTHS_CLOSE, on_paren(parser, P_CLOSE)) \ + X(TOKEN_SPECIAL_QUOTE, on_quote(parser)) \ X(TOKEN_VALUE, on_value(parser, token->value)) #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) @@ -47,30 +48,38 @@ static int on_value(parser_t parent, value_t value); return -1; \ } break; -int parser_parse_lexer(parser_t parser, lexer_t lexer) -{ - for(size_t i = 0; i < lexer->ntokens; i++) { - struct token *token = &lexer->tokens[i]; +int parser_parse_toklist(parser_t parser, toklist_t *toklist, ast_t *ast) +{ + if(parser->cur_sexp == NULL) { + size_t index = sexp_add(&ast->sexp, AST_VALUE); + ast->sexp.children[index].value = value_copy(parser->begin_symbol_value); + + parser->cur_sexp = &ast->sexp; + } + + + for(size_t i = 0; i < toklist->ntokens; i++) { + struct token *token = &toklist->tokens[i]; - switch(token->type) { + switch(token->type) { TOKEN_CALLBACK_TBL(CASE_TYPE, parser, token); default: err("parser_parse_lexer: Unknown token type given"); break; } - if((token->type != TOKEN_SPECIAL_QUOTE) && - (parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1))) { - if(on_paren(parser, 1)); - quote_stack_pop(&parser->quote_head, 0); - } + if(token->type != TOKEN_SPECIAL_QUOTE) + while(parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1)) { + on_paren(parser, P_CLOSE); + quote_stack_pop(&parser->quote_head, 0); + } } - if(&parser->root == parser->cur_sexp) { + if(&ast->sexp == parser->cur_sexp) { return 0; } else { return 1; - } + } } parser_t parser_create() @@ -88,7 +97,6 @@ parser_t parser_create() parser->begin_symbol_value = VALUE_CREATE("begin"); parser->quote_symbol_value = VALUE_CREATE("quote"); - parser->root.nchildren = 0; parser->quote_head = NULL; parser_reset(parser); @@ -102,40 +110,35 @@ void parser_destroy(parser_t parser) value_destroy(parser->begin_symbol_value); value_destroy(parser->quote_symbol_value); - sexp_free(&parser->root); + parser_reset(parser); free(parser); } void parser_reset(parser_t parser) { - struct sexp *root = &parser->root; - - for(size_t i = 0; i < root->nchildren; i++) { - ast_free(&root->children[i]); - } - - sexp_init(root); - size_t index = sexp_add(root, AST_VALUE); - - root->children[index].value = value_copy(parser->begin_symbol_value); - parser->cur_sexp = &parser->root; - + parser->cur_sexp = NULL; while(quote_stack_pop(&parser->quote_head, 0) != NULL); } +void ast_reset(ast_t *ast) +{ + sexp_free(&ast->sexp); + sexp_init(&ast->sexp); +} + -void parser_print_ast(parser_t parser) +void ast_print(ast_t *ast) { - sexp_print(&parser->root, 0); + sexp_print(&ast->sexp, 0); } // ---------- Callback Functions ---------- // -static int on_paren(parser_t parser, int paren_type) +static int on_paren(parser_t parser, enum paren_type type) { - if(paren_type) { // !0 closing paren + if(type == P_CLOSE) { parser->cur_sexp = parser->cur_sexp->prev; - } else { // 0 opening paren + } else if(type == P_OPEN) { size_t index = sexp_add(parser->cur_sexp, AST_SEXP); struct sexp *prev = parser->cur_sexp; @@ -153,7 +156,7 @@ static int on_paren(parser_t parser, int paren_type) static int on_quote(parser_t parser) { // new sexp - on_paren(parser, 0); + on_paren(parser, P_OPEN); // add symbol to the sexp on_value(parser, parser->quote_symbol_value); @@ -192,6 +195,8 @@ static void sexp_init(struct sexp *sexp) static void sexp_print(struct sexp *sexp, int indent) { + // (void)indent; + // printf("( "); for(size_t i = 0; i < sexp->nchildren; i++) { struct ast *child = &sexp->children[i]; @@ -202,9 +207,11 @@ static void sexp_print(struct sexp *sexp, int indent) char buf[LEXER_IDEN_CAP]; size_t buf_sz = LEXER_IDEN_CAP; - - info("%d %s", indent, value_string(child->value, buf, buf_sz)); + + for(int i = 0; i < indent; i++) printf(" "); + printf("%s\n", value_string(child->value, buf, buf_sz)); } + // printf(")"); } static void sexp_free(struct sexp *sexp) @@ -212,7 +219,8 @@ static void sexp_free(struct sexp *sexp) for(size_t i = 0; i < sexp->nchildren; i++){ ast_free(&sexp->children[i]); } - free(sexp->children); + + if(sexp->children) free(sexp->children); } #define CASE_FREE(type, free_func, print_func) \ diff --git a/src/parser.h b/src/parser.h index 8fc5d6c..9193ca1 100644 --- a/src/parser.h +++ b/src/parser.h @@ -5,6 +5,7 @@ #include "lexer.h" typedef struct parser *parser_t; +typedef struct ast ast_t; struct ast { enum ast_type { @@ -26,7 +27,6 @@ struct ast { }; struct parser { - struct sexp root; struct sexp *cur_sexp; struct quote_node { @@ -45,16 +45,16 @@ parser_t parser_create(); // deallocate a parser void parser_destroy(parser_t parser); -// reset a parser to its default state without destroying it +// reset to its default state without destroying it // returns 0 on success void parser_reset(parser_t parser); +void ast_reset(ast_t *ast_root); // self explanatory -void parser_print_ast(parser_t parser); +void ast_print(ast_t *ast_root); -// turn the given lexer (which has already has tokens) into an ast -// returns 0 on success, > 0 when more tokens are needed, -// and < 0 on a fatal error -int parser_parse_lexer(parser_t parser, lexer_t lexer); +// turn the given toklist into an ast +// returns 0 on success, and < 0 on a fatal error +int parser_parse_toklist(parser_t parser, toklist_t *tokens, ast_t *ast); #endif diff --git a/src/value.c b/src/value.c index 289aa0f..e2b9b32 100644 --- a/src/value.c +++ b/src/value.c @@ -133,7 +133,7 @@ static char *symbol_string(char *symbol, char *buf, size_t buf_sz) #define MANAGE_LITERAL_TBL(X, literal) \ /* X(type, how to free how to print) */ \ - X(LITERAL_STRING, free(literal->string), EX("%s", literal->string)) \ + X(LITERAL_STRING, free(literal->string), EX("\"%s\"", literal->string)) \ X(LITERAL_NUM_INT, ;, EX("%ld", literal->num_int)) \ X(LITERAL_NUM_FLOAT, ;, EX("%f", literal->num_float)) |