diff options
author | kartofen <mladenovnasko0@gmail.com> | 2023-08-14 21:20:39 +0300 |
---|---|---|
committer | kartofen <mladenovnasko0@gmail.com> | 2023-08-14 21:20:39 +0300 |
commit | a7bb8ace49f5725e0f92336ab5af28b4c8900aff (patch) | |
tree | 5d00d7a5d159e9702b46c23542fffc09e591c271 | |
parent | f83187a830deff27ce0cdd4c175ffe2785461685 (diff) |
parser done
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.md | 18 | ||||
-rw-r--r-- | files/test1.lisp | 17 | ||||
-rw-r--r-- | src/ast.h | 38 | ||||
-rw-r--r-- | src/common.c | 33 | ||||
-rw-r--r-- | src/common.h | 10 | ||||
-rw-r--r-- | src/eval.h | 16 | ||||
-rw-r--r-- | src/lexer.c | 334 | ||||
-rw-r--r-- | src/lexer.h | 31 | ||||
-rw-r--r-- | src/main.c | 66 | ||||
-rw-r--r-- | src/parser.c | 258 | ||||
-rw-r--r-- | src/parser.h | 60 | ||||
-rw-r--r-- | src/value.c | 244 | ||||
-rw-r--r-- | src/value.h | 62 |
14 files changed, 878 insertions, 311 deletions
@@ -45,4 +45,4 @@ analyze: clean make leak: lispy - valgrind -s --leak-check=full $(BIND)/lispy + valgrind -s --leak-check=full --show-leak-kinds=all $(BIND)/lispy diff --git a/README.md b/README.md new file mode 100644 index 0000000..b844f7d --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +### Lispy + +A simple lisp/scheme interpreter + +### TODO + +* Support Quasiquote (and comma) +* Support macros +* Add FFI (Foreing Function Interface) (Maybe codegen whole c std lib) + +Remember: +* Need special procedures (procedures that don't evaluate their arguments like lambda, quote, if/cond) + +### Development Notes + +* Figure out how to deal with lexer and repl + * when the tokens aren + * diff --git a/files/test1.lisp b/files/test1.lisp index 3a44599..41325dd 100644 --- a/files/test1.lisp +++ b/files/test1.lisp @@ -1,4 +1,17 @@ -(define 'a 1) +(define a 1) (+ a 1) (+ a 0.1) -"string test () . '" +("string test () . '" hello blah) +(more blah (something)) + +(1 2 3) (list 1 2 3) (quote (1 2 3)) + +(1) +1 +'1 +1 + +'(2 3 4) +(6 7) +('1) +1 diff --git a/src/ast.h b/src/ast.h deleted file mode 100644 index bd2e628..0000000 --- a/src/ast.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef AST_H -#define AST_H - -#include "lexer.h" - -typedef struct node_t *ast_t; -struct ast_node { - enum { - NODE_SEXP, - NODE_SYMBOL, - NODE_LITERAL, - } type; - - union { - struct sexp { - struct ast_node **children; - size_t nchildren; - } sexp; - - char *symbol; - - union { - enum { - NODE_LITERAL_NUM, - NODE_LITERAL_STR, - } type; - - int number; - char *string; - } literal; - }; -}; - -ast_t ast_create(); -void ast_destroy(ast_t ast); -int ast_parse_lexer(ast_t ast, lexer_t lex); - -#endif diff --git a/src/common.c b/src/common.c new file mode 100644 index 0000000..084c584 --- /dev/null +++ b/src/common.c @@ -0,0 +1,33 @@ +#include <string.h> +#include <errno.h> +#include "common.h" + +void *xmalloc(size_t size) +{ + void *ret; + if(!(ret = malloc(size))) { + die("malloc: %s", strerror(errno)); + } + + return ret; +} + +void *xcalloc(size_t nmemb, size_t size) +{ + void *ret; + if(!(ret = calloc(nmemb, size))) { + die("calloc: %s", strerror(errno)); + } + + return ret; +} + +void *xrealloc(void *ptr, size_t size) +{ + void *ret; + if(!(ret = realloc(ptr, size))) { + die("realloc: %s", strerror(errno)); + } + + return ret; +} diff --git a/src/common.h b/src/common.h index a1daa03..f67af22 100644 --- a/src/common.h +++ b/src/common.h @@ -2,6 +2,7 @@ #define COMMON_H #include <stdio.h> +#include <stdlib.h> #define __RED__ "\033[0;31m" #define __GREEN__ "\033[0;32m" @@ -17,4 +18,13 @@ // #define info(...) printf(__VA_ARGS__); // #define err(...) printf(__VA_ARGS__); +#define die(...) do { \ + err(__VA_ARGS__); \ + abort(); \ + } while(0) + +void *xmalloc(size_t size); +void *xcalloc(size_t nmemb, size_t size); +void *xrealloc(void *ptr, size_t size); + #endif @@ -1,22 +1,16 @@ #ifndef EVAL_H #define EVAL_H -#include "ast.h" +#include "parser.h" -typedef struct eval *eval_t; - -// RunTime Tree -struct rtt { - -}; +typedef struct eval *evaluator_t; struct eval { - struct rtt *root; }; // TODO: add options for the evaluation -eval_t evaluator_create(); -void evaluator_destroy(eval_t evaluator); -int evaluator_eval_ast(eval_t evaluator, ast_t ast) +evaluator_t eval_create(); +void eval_destroy(evaluator_t evaluator); +int eval_ast(evaluator_t evaluator, parser_t ast); #endif diff --git a/src/lexer.c b/src/lexer.c index 71eed79..9659bb4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,64 +3,51 @@ #include <ctype.h> #include <errno.h> -// TODO: handle escaped quotes #include "common.h" #include "lexer.h" +#include "value.h" + +// TODO: handle escaping // saves a token with no data // returns the index of the saved token; < 0 on fail -static int save_empty_token(lexer_t lexer, enum token_enum type); +static int token_add(lexer_t lexer, enum token_enum type); -// saves a token with data which is the current identifier (lexer->iden) +// saves a token with the current identifier (lexer->iden) // returns 0 on success -static int save_current_identifier(lexer_t lexer); +static int token_add_iden(lexer_t lexer); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_quote(lexer_t lexer); -static int on_dot(lexer_t lexer); - -// try to convert the identifier (lexer->iden) to a given type -// returns > 0 on sucess, 0 on fail (iden isnt the given type), -// and < 0 on error -static int try_str(lexer_t lexer); -static int try_int(lexer_t lexer); -static int try_float(lexer_t lexer); -static int try_symbol(lexer_t lexer); - -#define SEPARATOR_CALLBACK_TBL(X, lexer) \ - X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ - X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ - X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ - X(EQ('.'), on_dot(lexer)) \ - X(EQ('"'), on_quote(lexer)) \ - X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS)) - -// X(token type, what to free, how to print on screen) -#define TOKEN_TYPES_INFO(X, token) \ - X(TOKEN_PARENTHS_OPEN, NULL, "(") \ - X(TOKEN_PARENTHS_CLOSE, NULL, ")") \ - X(TOKEN_SPECIAL_QUOTE, NULL, "'") \ - X(TOKEN_SPECIAL_DOT, NULL, ".") \ - X(TOKEN_LITERAL_STRING, token->string, "'%s'", token->string) \ - X(TOKEN_LITERAL_NUM_INT, NULL, "'%ld'", token->num_int) \ - X(TOKEN_LITERAL_NUM_FLOAT, NULL, "'%f'", token->num_float) \ - X(TOKEN_SYMBOL, token->symbol, "'%s'", token->symbol) - -#define IDENTIFY_IDENTIFIER_LIST(X) \ - X(try_str) \ - X(try_int) \ - X(try_float) \ - X(try_symbol) +static int on_double_quote(lexer_t lexer); #define EQ(ch) ch == -#define FN(f) f + +#define SEPARATOR_CALLBACK_TBL(X, lexer) \ +/* X(test, what to execute if the test succeeds) */ \ + X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ + X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ + X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ + X(EQ('"'), on_double_quote(lexer)) \ + X(isspace, on_generic_separator(lexer, TOKEN_TOKENS)) + +#define FN(fn, arg) "%s", fn(arg, buf, buf_sz) + +#define MANAGE_TOKEN_TBL(X, token) \ +/* X(type, how to free, how to print) */ \ + X(TOKEN_PARENTHS_OPEN, ;, "(") \ + X(TOKEN_PARENTHS_CLOSE, ;, ")") \ + X(TOKEN_SPECIAL_QUOTE, ;, "'") \ + X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \ + X(TOKEN_TOKENS, ;, "") \ + +// ---------- Exported Functions ---------- // // makes an if-else chain to test the character -// agains the seperator callback table -#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \ +// agains the separator callback table +#define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \ if(test_func(str[i])) { \ callback_ret = callback; \ if(callback_ret == 0) { \ @@ -77,7 +64,7 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) for(size_t i = 0; i < len; i++) { - SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {} + SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -93,49 +80,35 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) lexer_t lexer_create(size_t tokens_cap) { - lexer_t lexer = malloc(sizeof(struct lexer)); - if(!lexer) { - err("malloc: %s", strerror(errno)); - goto fail; - } + lexer_t lexer = xmalloc(sizeof(struct lexer)); - lexer->tokens = calloc(tokens_cap, sizeof(struct token)); - if(!lexer->tokens) { - err("malloc %s", strerror(errno)); - goto fail; - } + lexer->tokens_cap = tokens_cap; + lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); for(size_t i = 0; i < tokens_cap; i++) { - lexer->tokens[i].symbol = NULL; + lexer->tokens[i].type = TOKEN_TOKENS; } - lexer->tokens_cap = tokens_cap; - lexer->ntokens = 0; - - memset(lexer->iden, 0, LEXER_IDEN_CAP); - lexer->iden_sz = 0; - - lexer->inside_string = 0; - + lexer_reset(lexer); return lexer; -fail: - lexer_destroy(lexer); - return NULL; } -#define CASE_FREE_TOKEN(type, data, ...) \ - case type: if(data != NULL) { free(data); } break; +#define CASE_FREE(type, free_func, ...) case type: free_func; break; void lexer_destroy(lexer_t lexer) { if(!lexer) return; if(lexer->tokens) { - for(size_t i = 0; i < lexer->ntokens; i++) { + for(size_t i = 0; i < lexer->ntokens; i++) + { struct token *token = &lexer->tokens[i]; - switch(token->type) { - TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token) - default: break; + + switch(lexer->tokens[i].type) { + MANAGE_TOKEN_TBL(CASE_FREE, token); + default: + err("lexer_reset: Unknown token type given"); + break; } } free(lexer->tokens); @@ -144,18 +117,65 @@ void lexer_destroy(lexer_t lexer) free(lexer); } -// ------------------------------------------------- // +void lexer_reset(lexer_t lexer) +{ + for(size_t i = 0; i < lexer->tokens_cap; i++) { + struct token *token = &lexer->tokens[i]; + + switch(token->type) { + MANAGE_TOKEN_TBL(CASE_FREE, token); + default: + err("lexer_reset: Unknown token type given"); + break; + } + + token->type = TOKEN_TOKENS; + token->value = NULL; + } + + lexer->ntokens = 0; + + memset(lexer->iden, 0, LEXER_IDEN_CAP); + lexer->iden_sz = 0; + + lexer->inside_string = 0; +} + +// print based on the given way to print +#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; + +void lexer_print_tokens(lexer_t lexer) +{ + // for the printing (see MANAGE_TOKEN_TBL) + char buf[LEXER_IDEN_CAP]; + size_t buf_sz = LEXER_IDEN_CAP; + + for(size_t i = 0; i < lexer->ntokens; i++) { + struct token *token = &lexer->tokens[i]; + + switch(token->type) { + MANAGE_TOKEN_TBL(CASE_PRINT, token); + default: + err("lexer_print_tokens: Unknown token given"); + return; + } + } +} + +// ---------- Callback Functions ----------- // -static int on_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer) { int ret = on_generic_separator(lexer, TOKEN_TOKENS); - if(ret <= 0) { // it either failed or worked, both not inside a string - lexer->inside_string = 1; + if(ret < 0) { return ret; + } else if(ret == 0) { + lexer->inside_string = 1; + return 1; } - if(save_current_identifier(lexer)) { - err("save_current_identifier: failed"); + if(token_add_iden(lexer)) { + err("token_add_iden: failed"); return -1; } @@ -163,26 +183,20 @@ static int on_quote(lexer_t lexer) return 0; } -static int on_dot(lexer_t lexer) -{ - if(lexer->iden_sz != 0) return 1; - return on_generic_separator(lexer, TOKEN_SPECIAL_DOT); -} - static int on_generic_separator(lexer_t lexer, enum token_enum type) { if(lexer->inside_string) { return 1; } - if(save_current_identifier(lexer)) { - err("save_current_identifier: failed"); + if(token_add_iden(lexer)) { + err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { - if(save_empty_token(lexer, type) < 0) { - err("save_empty_token: failed"); + if(token_add(lexer, type) < 0) { + err("token_add: failed"); return -1; } } @@ -190,147 +204,41 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type) return 0; } -static int save_empty_token(lexer_t lexer, enum token_enum type) +// ---------- Token Functions ----------- // + +static int token_add(lexer_t lexer, enum token_enum type) { if(lexer->ntokens >= lexer->tokens_cap) { err("tokens_cap of %ld has been reached", lexer->tokens_cap); return -1; } - lexer->tokens[lexer->ntokens++].type = type; - return lexer->ntokens - 1; + lexer->tokens[lexer->ntokens].type = type; + return lexer->ntokens++; } -#define CHECK_IDEN(func) \ - if((ret = func(lexer))) { \ - if(ret < 0) { \ - err(#func ": failed"); \ - goto exit; \ - } \ - } else - -static int save_current_identifier(lexer_t lexer) +static int token_add_iden(lexer_t lexer) { int ret = 1; + if(!lexer->iden_sz) return 0; - if(lexer->iden_sz != 0) { - IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {} + int i = token_add(lexer, TOKEN_VALUE); + if(i < 0) { + err("token_add: failed"); + goto exit; } - ret = 0; + value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); + if(ret > 0) { + value = value_create(VALUE_SYMBOL, lexer->iden, &ret); + } else if(ret < 0) { + err("value_create: failed"); + goto exit; + } + + lexer->tokens[i].value = value; exit: memset(lexer->iden, 0, lexer->iden_sz); lexer->iden_sz = 0; return ret; } - - -// ------------------------------------------------- // - -static int try_str(lexer_t lexer) -{ - if(!lexer->inside_string) return 0; - - int i = save_empty_token(lexer, TOKEN_LITERAL_STRING); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].string = malloc(lexer->iden_sz+1); - if(!lexer->tokens[i].string) { - err("malloc: %s", strerror(errno)); - return -1; - } - - memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1); - return 1; -} - -static int try_int(lexer_t lexer) -{ - errno = ERANGE + 1; // set errno to not ERANGE - - char *endptr; - long num = strtol(lexer->iden, &endptr, 10); - - if(*endptr != '\0') { // the whole string isn't a number - return 0; - } - - if(errno == ERANGE) { - warn("Given integer literal %s is outside the possible range", lexer->iden); - } - - int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].num_int = num; - return 1; -} - -static int try_float(lexer_t lexer) -{ - errno = ERANGE + 1; // set errno to not ERANGE - - char *endptr; - float num = strtof(lexer->iden, &endptr); - - if(*endptr != '\0') { // the whole string isn't a number - return 0; - } - - if(errno == ERANGE) { - warn("Given float literal %s is outside the possible range", lexer->iden); - } - - int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].num_float = num; - return 1; -} - -static int try_symbol(lexer_t lexer) -{ - int i = save_empty_token(lexer, TOKEN_SYMBOL); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].symbol = malloc(lexer->iden_sz+1); - if(!lexer->tokens[i].symbol) { - err("malloc: %s", strerror(errno)); - return -1; - } - - memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1); - return 1; -} - -// ------------------------------------------------- // - -#ifdef DEBUG -#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break; - -void lexer_print_tokens(lexer_t lexer) -{ - for(size_t i = 0; i < lexer->ntokens; i++) { - struct token *token = &lexer->tokens[i]; - - info("Token %zu: %d", i, token->type); - - switch(token->type) { - TOKEN_TYPES_INFO(CASE_PRINT, token); - default: break; - } - } -} -#endif diff --git a/src/lexer.h b/src/lexer.h index 942be54..fc13f24 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,6 +1,8 @@ #ifndef LEXER_H #define LEXER_H +#include "value.h" + #ifndef LEXER_IDEN_CAP #define LEXER_IDEN_CAP 512 #endif @@ -9,19 +11,12 @@ typedef struct lexer *lexer_t; struct token { enum token_enum { - TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE, - TOKEN_SPECIAL_DOT, TOKEN_SPECIAL_QUOTE, - TOKEN_LITERAL_NUM_INT, TOKEN_LITERAL_STRING, - TOKEN_LITERAL_NUM_FLOAT, TOKEN_SYMBOL, + TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE, + TOKEN_SPECIAL_QUOTE, TOKEN_VALUE, TOKEN_TOKENS // number of token types } type; - union { - char *symbol; - char *string; - long num_int; - float num_float - }; + value_t value; }; struct lexer { @@ -37,19 +32,25 @@ struct lexer { }; // allocate a lexer with a maximum number of tokens_cap tokens -// returns a lexer on success, NULL on fail +// returns a lexer on success and NULL on fail lexer_t lexer_create(size_t tokens_cap); -// destroy a lexer +// deallocate a lexer void lexer_destroy(lexer_t lexer); +// reset a lexer to its default state without destroying it (faster) +void lexer_reset(lexer_t lexer); + +// self explanatory +void lexer_print_tokens(lexer_t lexer); + // turn the given non-null-terminated string str of lenght len // into into tokens // returns 0 on success int lexer_tokenize(lexer_t lexer, char *str, size_t len); -#ifdef DEBUG -void lexer_print_tokens(lexer_t lexer); -#endif +// checks whether the lexer has finished (temp buffers like iden are empty) +// returns 1 on finished, 0 on not finished +int lexer_has_finished(lexer_t lexer); #endif @@ -4,15 +4,17 @@ #include "common.h" #include "lexer.h" -// #include "ast.h" -// #include "eval.h" +#include "parser.h" +#include "eval.h" + +// TODO: the lexer, parser, and eval functions should return -1 on fatal, and 1 on non fatal error #define READ_BUF_CAP 512 #define DEFAULT_TOKENS_CAP 8192 // make it a command line arg lexer_t lexer = NULL; -// ast_t root = NULL; -// eval_t evaluator = NULL; +parser_t parser = NULL; +// evaluator_t eval = NULL; int main(void) { @@ -26,13 +28,13 @@ int main(void) goto fail; } - // tokenize input FILE *fp = fopen(filename, "r"); if(!fp) { err("fopen: %s: %s", filename, strerror(errno)); goto fail; } + // tokenize input char buf[READ_BUF_CAP]; size_t bytes = 0; while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) { if(lexer_tokenize(lexer, buf, bytes)) { @@ -42,36 +44,38 @@ int main(void) if(bytes < READ_BUF_CAP) break; } - fclose(fp); lexer_print_tokens(lexer); - // -------------- - -// ast = ast_create(); -// if(!ast) { -// err("ast_create: failed"); -// goto fail; -// } - -// if(ast_parse_lexer(ast, lexer)) { -// err("ast_parse_lexer: failed"); -// goto fail; -// } - -// evaluator = evaluator_create(); -// if(!evaluator) { -// err("evaluator_create: failed"); -// goto fail; -// } - -// if(evaluator_eval_ast(evaluator, ast)) { -// err("evaluator_eval_ast: failed"); -// goto fail; -// } + + fclose(fp); + + parser = parser_create(); + if(!parser) { + err("parser_create: failed"); + goto fail; + } + + if(parser_parse_lexer(parser, lexer)) { + err("parser_parse_lexer: failed"); + goto fail; + } + + parser_print_ast(parser); + + // evaluator = eval_create(); + // if(!evaluator) { + // err("eval_create: failed"); + // goto fail; + // } + + // if(eval_ast(eval, ast)) { + // err("eval_ast: failed"); + // goto fail; + // } ret = 0; fail: -// evaluator_destroy(eval); -// ast_destroy(ast); + // eval_destroy(eval); + parser_destroy(parser); lexer_destroy(lexer); return ret; } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..c4e37bd --- /dev/null +++ b/src/parser.c @@ -0,0 +1,258 @@ +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "common.h" +#include "parser.h" + +// TODO: Do all the todos + +// adds another child of type type to the sexp sexp +// returns the index of the added type and < 0 on fail +static size_t sexp_add(struct sexp *sexp, enum ast_type type); + +// self-explanatory +static void sexp_init(struct sexp *sexp); +static void sexp_print(struct sexp *sexp, int indent); +static void sexp_free(struct sexp *sexp); +static void ast_free(struct ast *ast); + +static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp); +static struct sexp *quote_stack_pop(struct quote_node **head, int peak); + +// returns 0 on success +static int on_paren(parser_t parser, int paren_type); // 0 is open, 1 is close +static int on_quote(parser_t parent); +static int on_value(parser_t parent, value_t value); + +#define TOKEN_CALLBACK_TBL(X, parser, token) \ +/* X(test, execute on succes) */ \ + X(TOKEN_PARENTHS_OPEN, on_paren(parser, 0)) \ + X(TOKEN_PARENTHS_CLOSE, on_paren(parser, 1)) \ + X(TOKEN_SPECIAL_QUOTE, on_quote(parser)) \ + X(TOKEN_VALUE, on_value(parser, token->value)) + +#define FN(fn, arg) "%s", fn(arg, buf, buf_sz) + +#define MANAGE_AST_TBL(X, ast) \ + X(AST_SEXP, sexp_free(&ast->sexp), "|") \ + X(AST_VALUE, value_destroy(ast->value), FN(value_string, ast->value)) \ + +// ---------- Exported Functions ---------- // + +#define CASE_TYPE(type, callback) \ + case type: \ + if(callback) { \ + err(#callback ": failed"); \ + return -1; \ + } break; + +int parser_parse_lexer(parser_t parser, lexer_t lexer) +{ + for(size_t i = 0; i < lexer->ntokens; i++) { + struct token *token = &lexer->tokens[i]; + + switch(token->type) { + TOKEN_CALLBACK_TBL(CASE_TYPE, parser, token); + default: + err("parser_parse_lexer: Unknown token type given"); + break; + } + + if((token->type != TOKEN_SPECIAL_QUOTE) && + (parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1))) { + if(on_paren(parser, 1)); + quote_stack_pop(&parser->quote_head, 0); + } + } + + if(&parser->root == parser->cur_sexp) { + return 0; + } else { + return 1; + } +} + +parser_t parser_create() +{ + parser_t parser = xmalloc(sizeof(struct parser)); + +#define VALUE_CREATE(str) \ + value_create(VALUE_SYMBOL, str, &ret); \ + if(ret) { \ + err("value_create: failed"); \ + return NULL; \ + } + + int ret = 0; + parser->begin_symbol_value = VALUE_CREATE("begin"); + parser->quote_symbol_value = VALUE_CREATE("quote"); + + parser->root.nchildren = 0; + parser->quote_head = NULL; + parser_reset(parser); + + return parser; +} + +void parser_destroy(parser_t parser) +{ + if(!parser) return; + + value_destroy(parser->begin_symbol_value); + value_destroy(parser->quote_symbol_value); + + sexp_free(&parser->root); + free(parser); +} + +void parser_reset(parser_t parser) +{ + struct sexp *root = &parser->root; + + for(size_t i = 0; i < root->nchildren; i++) { + ast_free(&root->children[i]); + } + + sexp_init(root); + size_t index = sexp_add(root, AST_VALUE); + + root->children[index].value = value_copy(parser->begin_symbol_value); + parser->cur_sexp = &parser->root; + + while(quote_stack_pop(&parser->quote_head, 0) != NULL); +} + + +void parser_print_ast(parser_t parser) +{ + sexp_print(&parser->root, 0); +} + +// ---------- Callback Functions ---------- // + +static int on_paren(parser_t parser, int paren_type) +{ + if(paren_type) { // !0 closing paren + parser->cur_sexp = parser->cur_sexp->prev; + } else { // 0 opening paren + size_t index = sexp_add(parser->cur_sexp, AST_SEXP); + + struct sexp *prev = parser->cur_sexp; + struct sexp *new = &prev->children[index].sexp; + + sexp_init(new); + + parser->cur_sexp = new; + parser->cur_sexp->prev = prev; + } + + return 0; +} + +static int on_quote(parser_t parser) +{ + // new sexp + on_paren(parser, 0); + + // add symbol to the sexp + on_value(parser, parser->quote_symbol_value); + + // save the current quote sexp + quote_stack_push(&parser->quote_head, parser->cur_sexp); + return 0; +} + +static int on_value(parser_t parser, value_t value) +{ + size_t index = sexp_add(parser->cur_sexp, AST_VALUE); + + value_t copied = value_copy(value); + + parser->cur_sexp->children[index].value = copied; + return 0; +} + +// ---------- Parser Functions ---------- // + +static size_t sexp_add(struct sexp *sexp, enum ast_type type) +{ + sexp->children = xrealloc(sexp->children, sizeof(struct ast) * (sexp->nchildren+1)); + + sexp->children[sexp->nchildren].type = type; + return sexp->nchildren++; +} + +static void sexp_init(struct sexp *sexp) +{ + sexp->children = NULL; + sexp->nchildren = 0; + sexp->prev = NULL; +} + +static void sexp_print(struct sexp *sexp, int indent) +{ + for(size_t i = 0; i < sexp->nchildren; i++) { + struct ast *child = &sexp->children[i]; + + if(child->type == AST_SEXP) { + sexp_print(&child->sexp, indent + 2); + continue; + } + + char buf[LEXER_IDEN_CAP]; + size_t buf_sz = LEXER_IDEN_CAP; + + info("%d %s", indent, value_string(child->value, buf, buf_sz)); + } +} + +static void sexp_free(struct sexp *sexp) +{ + for(size_t i = 0; i < sexp->nchildren; i++){ + ast_free(&sexp->children[i]); + } + free(sexp->children); +} + +#define CASE_FREE(type, free_func, print_func) \ + case type: free_func; return; + +static void ast_free(struct ast *ast) +{ + if(!ast) return; + + switch(ast->type) { + MANAGE_AST_TBL(CASE_FREE, ast) + default: + err("ast_free: Unknown ast type given"); + break; + } +} + +static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp) +{ + struct quote_node *node = xmalloc(sizeof(struct quote_node)); + + node->prev = *head; + node->cur_sexp = cur_sexp; + *head = node; +} + +static struct sexp *quote_stack_pop(struct quote_node **head, int peak) +{ + if(*head == NULL) { + return NULL; + } + + struct sexp *sexp = (*head)->cur_sexp; + + if(!peak) { + struct quote_node *prev = (*head)->prev; + free(*head); + *head = prev; + } + + return sexp; +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..8fc5d6c --- /dev/null +++ b/src/parser.h @@ -0,0 +1,60 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "value.h" +#include "lexer.h" + +typedef struct parser *parser_t; + +struct ast { + enum ast_type { + AST_SEXP, + AST_VALUE, + AST_TYPES // number of types + } type; + + union { + struct sexp { + struct ast *children; + size_t nchildren; + + struct sexp *prev; + } sexp; + + value_t value; + }; +}; + +struct parser { + struct sexp root; + struct sexp *cur_sexp; + + struct quote_node { + struct sexp *cur_sexp; + struct quote_node *prev; + } *quote_head; + + value_t begin_symbol_value; + value_t quote_symbol_value; +}; + +// allocate a parser +// returns a parser on success and NULL on fail +parser_t parser_create(); + +// deallocate a parser +void parser_destroy(parser_t parser); + +// reset a parser to its default state without destroying it +// returns 0 on success +void parser_reset(parser_t parser); + +// self explanatory +void parser_print_ast(parser_t parser); + +// turn the given lexer (which has already has tokens) into an ast +// returns 0 on success, > 0 when more tokens are needed, +// and < 0 on a fatal error +int parser_parse_lexer(parser_t parser, lexer_t lexer); + +#endif diff --git a/src/value.c b/src/value.c new file mode 100644 index 0000000..289aa0f --- /dev/null +++ b/src/value.c @@ -0,0 +1,244 @@ +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "common.h" +#include "value.h" + +// TODO: add the other types + +// save data as a symbol +// return 0 on sucess, < 0 on error +static int symbol_save(char **symbol, char *data); +// same as value_string +static char *symbol_string(char *symbol, char *buf, size_t buf_sz); + +// try to save data as a literal +// returns 0 on sucess, > 0 on fail (data isnt a possible literal), +// and < 0 on error +static int literal_save(struct literal *literal, char *data); +static void literal_free(struct literal *literal); +// same as value_string +static char *literal_string(struct literal *literal, char *buf, size_t buf_sz); + +// for a function that saves string data as a value +#define S_FN(fn, arg) fn(arg, (char *)data) +// for a function that saves a value as a string in buf[buf_sz] +#define P_FN(fn, arg) fn(arg, buf, buf_sz) + +#define MANAGE_VALUE_TBL(X, value) \ +/* X(type, save function, \ + how to free, how to save to string) */ \ + X(VALUE_SYMBOL, S_FN(symbol_save, &value->symbol), \ + free(value->symbol), P_FN(symbol_string, value->symbol)) \ + X(VALUE_LITERAL, S_FN(literal_save, &value->literal), \ + literal_free(&value->literal), P_FN(literal_string, &value->literal)) \ +// X(VALUE_PAIR, ...) +// X(VALUE_LIST, ...) +// X(VALUE_PROC, ...) + +// ------ Exported Functions Implementations ------- // +// ------------------------------------------------- // + +#define CASE_SAVE(type, save_func, free_func, string_func) \ + case type: *ret = save_func; break; + +value_t value_create(enum value_type type, void *data, int *ret) +{ + value_t value = xmalloc(sizeof(struct value)); + + value->type = VALUE_TYPES; + value->references = 1; + + switch(type) { + MANAGE_VALUE_TBL(CASE_SAVE, value); + default: + err("value_create: Unknown value type given"); + *ret = -1; + break; + } + + if(!*ret) { + value->type = type; + return value; + } + + value_destroy(value); + return NULL; +} + +value_t value_copy(value_t value) +{ + value->references++; + return value; +} + +#define CASE_FREE(type, save_func, free_func, string_func) \ + case type: free_func; break; + +void value_destroy(value_t value) +{ + if(!value) return; + if(--value->references > 0) { + return; + } + + + switch(value->type) { + MANAGE_VALUE_TBL(CASE_FREE, value); + default: break; + } + + free(value); +} + +#define CASE_STRING(type, save_func, free_func, string_func) \ + case type: return string_func; + +char *value_string(value_t value, char *buf, size_t buf_sz) +{ + switch(value->type) { + MANAGE_VALUE_TBL(CASE_STRING, value); + default: return NULL; + } +} + +// ---------------- Symbol Functions --------------- // + +static int symbol_save(char **symbol, char *data) +{ + size_t len = strlen(data) + 1; + + *symbol = xmalloc(len); + + memcpy(*symbol, data, len); + return 0; +} + +static char *symbol_string(char *symbol, char *buf, size_t buf_sz) +{ + size_t len = strlen(symbol) + 1; + if(len> buf_sz) { + return NULL; + } + + memcpy(buf, symbol, len); + return buf; +} + +// --------------- Literal Functions --------------- // + +// write a value in a printf format to buf[buf_sz] +#define EX(...) (((size_t)snprintf(buf, buf_sz, __VA_ARGS__) > buf_sz) ? NULL : buf) + +#define MANAGE_LITERAL_TBL(X, literal) \ +/* X(type, how to free how to print) */ \ + X(LITERAL_STRING, free(literal->string), EX("%s", literal->string)) \ + X(LITERAL_NUM_INT, ;, EX("%ld", literal->num_int)) \ + X(LITERAL_NUM_FLOAT, ;, EX("%f", literal->num_float)) + +// try to convert the data to a literal +// returns 0 on success, > 0 on fail (data isnt a literal), +// and < 0 on error +static int literal_try_string(struct literal *literal, char *data); +static int literal_try_int (struct literal *literal, char *data); +static int literal_try_float (struct literal *literal, char *data); + +// list of the try_type functions (order matters) +typedef int (*try_func)(struct literal *, char *); +try_func literal_try_funcs[] = { + literal_try_string, + literal_try_int, + literal_try_float, +}; + +static int literal_save(struct literal *literal, char *data) +{ + int ret = 0; + + for(size_t i = 0; i < sizeof(literal_try_funcs)/sizeof(try_func); i++) { + // <= 0 means that it either failed fatally, or succeeded + if((ret = literal_try_funcs[i](literal, data)) <= 0) break; + } + + return ret; +} + +#define CASE_LITERAL_FREE(type, free_func, string_func) \ + case type: free_func; break; + +static void literal_free(struct literal *literal) +{ + switch(literal->type) { + MANAGE_LITERAL_TBL(CASE_LITERAL_FREE, literal); + default: break; + } +} + +#define CASE_LITERAL_STRING(type, free_func, string_func) \ + case type: return string_func; + +static char *literal_string(struct literal *literal, char *buf, size_t buf_sz) +{ + switch(literal->type) { + MANAGE_LITERAL_TBL(CASE_LITERAL_STRING, literal); + default: return NULL; break; + } +} + +// Literal try_type functions + +static int literal_try_string(struct literal *literal, char *data) +{ + if(data[0] != '"') return 1; + + literal->type = LITERAL_STRING; + + size_t len = strlen(data); // no +1 needed since first character is discarded + literal->string = xmalloc(len); + + memcpy(literal->string, data + 1, len); + return 0; +} + +static int literal_try_int(struct literal *literal, char *data) +{ + errno = ERANGE + 1; // set errno to not ERANGE + + char *endptr; + long num = strtol(data, &endptr, 10); + + if(*endptr != '\0') { // the whole string isn't a number + return 1; + } + + if(errno == ERANGE) { + warn("Given integer literal %s is outside the possible range", data); + } + + literal->type = LITERAL_NUM_INT; + literal->num_int = num; + + return 0; +} + +static int literal_try_float (struct literal *literal, char *data) +{ + errno = ERANGE + 1; // set errno to not ERANGE + + char *endptr; + float num = strtof(data, &endptr); + + if(*endptr != '\0') { // the whole string isn't a number + return 1; + } + + if(errno == ERANGE) { + warn("Given float literal %s is outside the possible range", data); + } + + literal->type = LITERAL_NUM_FLOAT; + literal->num_float = num; + + return 0; +} diff --git a/src/value.h b/src/value.h new file mode 100644 index 0000000..60d2c43 --- /dev/null +++ b/src/value.h @@ -0,0 +1,62 @@ +#ifndef LITERAL_H +#define LITERAL_H + +typedef struct value *value_t; + +struct literal { + enum literal_type { + LITERAL_STRING, + LITERAL_NUM_INT, + LITERAL_NUM_FLOAT + } type; + + union { + char *string; + long num_int; + float num_float; + }; +}; + +// struct pair { +// }; +// struct proc { +// }; + +struct value { + enum value_type { + VALUE_SYMBOL, + VALUE_LITERAL, + VALUE_PAIR, + VALUE_LIST, + VALUE_PROC, + VALUE_TYPES + } type; + + int references; + + union { + char *symbol; + struct literal literal; + // struct pair pair; + // struct pari list; + // struct proc_t proc; + }; +}; + +// try to save data as a literal +// returns a value_t on success and NULL on fail +// ret is set to: 0 on sucess, > 0 on fail (data isnt this type), +// and < 0 on error +value_t value_create(enum value_type type, void *data, int *ret); + +// returns a value_t on success +value_t value_copy(value_t value); + +// destroys a value +void value_destroy(value_t value); + +// copy a string version of the value the buf[buf_sz] +// returns buf on success +char *value_string(value_t value, char *buf, size_t buf_sz); + +#endif |