aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2023-08-14 21:20:39 +0300
committerkartofen <mladenovnasko0@gmail.com>2023-08-14 21:20:39 +0300
commita7bb8ace49f5725e0f92336ab5af28b4c8900aff (patch)
tree5d00d7a5d159e9702b46c23542fffc09e591c271
parentf83187a830deff27ce0cdd4c175ffe2785461685 (diff)
parser done
-rw-r--r--Makefile2
-rw-r--r--README.md18
-rw-r--r--files/test1.lisp17
-rw-r--r--src/ast.h38
-rw-r--r--src/common.c33
-rw-r--r--src/common.h10
-rw-r--r--src/eval.h16
-rw-r--r--src/lexer.c334
-rw-r--r--src/lexer.h31
-rw-r--r--src/main.c66
-rw-r--r--src/parser.c258
-rw-r--r--src/parser.h60
-rw-r--r--src/value.c244
-rw-r--r--src/value.h62
14 files changed, 878 insertions, 311 deletions
diff --git a/Makefile b/Makefile
index 0cbcaeb..cf10bc1 100644
--- a/Makefile
+++ b/Makefile
@@ -45,4 +45,4 @@ analyze: clean
make
leak: lispy
- valgrind -s --leak-check=full $(BIND)/lispy
+ valgrind -s --leak-check=full --show-leak-kinds=all $(BIND)/lispy
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b844f7d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+### Lispy
+
+A simple lisp/scheme interpreter
+
+### TODO
+
+* Support Quasiquote (and comma)
+* Support macros
+* Add FFI (Foreing Function Interface) (Maybe codegen whole c std lib)
+
+Remember:
+* Need special procedures (procedures that don't evaluate their arguments like lambda, quote, if/cond)
+
+### Development Notes
+
+* Figure out how to deal with lexer and repl
+ * when the tokens aren
+ *
diff --git a/files/test1.lisp b/files/test1.lisp
index 3a44599..41325dd 100644
--- a/files/test1.lisp
+++ b/files/test1.lisp
@@ -1,4 +1,17 @@
-(define 'a 1)
+(define a 1)
(+ a 1)
(+ a 0.1)
-"string test () . '"
+("string test () . '" hello blah)
+(more blah (something))
+
+(1 2 3) (list 1 2 3) (quote (1 2 3))
+
+(1)
+1
+'1
+1
+
+'(2 3 4)
+(6 7)
+('1)
+1
diff --git a/src/ast.h b/src/ast.h
deleted file mode 100644
index bd2e628..0000000
--- a/src/ast.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef AST_H
-#define AST_H
-
-#include "lexer.h"
-
-typedef struct node_t *ast_t;
-struct ast_node {
- enum {
- NODE_SEXP,
- NODE_SYMBOL,
- NODE_LITERAL,
- } type;
-
- union {
- struct sexp {
- struct ast_node **children;
- size_t nchildren;
- } sexp;
-
- char *symbol;
-
- union {
- enum {
- NODE_LITERAL_NUM,
- NODE_LITERAL_STR,
- } type;
-
- int number;
- char *string;
- } literal;
- };
-};
-
-ast_t ast_create();
-void ast_destroy(ast_t ast);
-int ast_parse_lexer(ast_t ast, lexer_t lex);
-
-#endif
diff --git a/src/common.c b/src/common.c
new file mode 100644
index 0000000..084c584
--- /dev/null
+++ b/src/common.c
@@ -0,0 +1,33 @@
+#include <string.h>
+#include <errno.h>
+#include "common.h"
+
+void *xmalloc(size_t size)
+{
+ void *ret;
+ if(!(ret = malloc(size))) {
+ die("malloc: %s", strerror(errno));
+ }
+
+ return ret;
+}
+
+void *xcalloc(size_t nmemb, size_t size)
+{
+ void *ret;
+ if(!(ret = calloc(nmemb, size))) {
+ die("calloc: %s", strerror(errno));
+ }
+
+ return ret;
+}
+
+void *xrealloc(void *ptr, size_t size)
+{
+ void *ret;
+ if(!(ret = realloc(ptr, size))) {
+ die("realloc: %s", strerror(errno));
+ }
+
+ return ret;
+}
diff --git a/src/common.h b/src/common.h
index a1daa03..f67af22 100644
--- a/src/common.h
+++ b/src/common.h
@@ -2,6 +2,7 @@
#define COMMON_H
#include <stdio.h>
+#include <stdlib.h>
#define __RED__ "\033[0;31m"
#define __GREEN__ "\033[0;32m"
@@ -17,4 +18,13 @@
// #define info(...) printf(__VA_ARGS__);
// #define err(...) printf(__VA_ARGS__);
+#define die(...) do { \
+ err(__VA_ARGS__); \
+ abort(); \
+ } while(0)
+
+void *xmalloc(size_t size);
+void *xcalloc(size_t nmemb, size_t size);
+void *xrealloc(void *ptr, size_t size);
+
#endif
diff --git a/src/eval.h b/src/eval.h
index 01ed827..f7f9835 100644
--- a/src/eval.h
+++ b/src/eval.h
@@ -1,22 +1,16 @@
#ifndef EVAL_H
#define EVAL_H
-#include "ast.h"
+#include "parser.h"
-typedef struct eval *eval_t;
-
-// RunTime Tree
-struct rtt {
-
-};
+typedef struct eval *evaluator_t;
struct eval {
- struct rtt *root;
};
// TODO: add options for the evaluation
-eval_t evaluator_create();
-void evaluator_destroy(eval_t evaluator);
-int evaluator_eval_ast(eval_t evaluator, ast_t ast)
+evaluator_t eval_create();
+void eval_destroy(evaluator_t evaluator);
+int eval_ast(evaluator_t evaluator, parser_t ast);
#endif
diff --git a/src/lexer.c b/src/lexer.c
index 71eed79..9659bb4 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -3,64 +3,51 @@
#include <ctype.h>
#include <errno.h>
-// TODO: handle escaped quotes
#include "common.h"
#include "lexer.h"
+#include "value.h"
+
+// TODO: handle escaping
// saves a token with no data
// returns the index of the saved token; < 0 on fail
-static int save_empty_token(lexer_t lexer, enum token_enum type);
+static int token_add(lexer_t lexer, enum token_enum type);
-// saves a token with data which is the current identifier (lexer->iden)
+// saves a token with the current identifier (lexer->iden)
// returns 0 on success
-static int save_current_identifier(lexer_t lexer);
+static int token_add_iden(lexer_t lexer);
// used for tokens that separate things
// if type is TOKEN_TOKENS, then no empty token will be saved
// returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden)
static int on_generic_separator(lexer_t lexer, enum token_enum type);
-static int on_quote(lexer_t lexer);
-static int on_dot(lexer_t lexer);
-
-// try to convert the identifier (lexer->iden) to a given type
-// returns > 0 on sucess, 0 on fail (iden isnt the given type),
-// and < 0 on error
-static int try_str(lexer_t lexer);
-static int try_int(lexer_t lexer);
-static int try_float(lexer_t lexer);
-static int try_symbol(lexer_t lexer);
-
-#define SEPARATOR_CALLBACK_TBL(X, lexer) \
- X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
- X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
- X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
- X(EQ('.'), on_dot(lexer)) \
- X(EQ('"'), on_quote(lexer)) \
- X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS))
-
-// X(token type, what to free, how to print on screen)
-#define TOKEN_TYPES_INFO(X, token) \
- X(TOKEN_PARENTHS_OPEN, NULL, "(") \
- X(TOKEN_PARENTHS_CLOSE, NULL, ")") \
- X(TOKEN_SPECIAL_QUOTE, NULL, "'") \
- X(TOKEN_SPECIAL_DOT, NULL, ".") \
- X(TOKEN_LITERAL_STRING, token->string, "'%s'", token->string) \
- X(TOKEN_LITERAL_NUM_INT, NULL, "'%ld'", token->num_int) \
- X(TOKEN_LITERAL_NUM_FLOAT, NULL, "'%f'", token->num_float) \
- X(TOKEN_SYMBOL, token->symbol, "'%s'", token->symbol)
-
-#define IDENTIFY_IDENTIFIER_LIST(X) \
- X(try_str) \
- X(try_int) \
- X(try_float) \
- X(try_symbol)
+static int on_double_quote(lexer_t lexer);
#define EQ(ch) ch ==
-#define FN(f) f
+
+#define SEPARATOR_CALLBACK_TBL(X, lexer) \
+/* X(test, what to execute if the test succeeds) */ \
+ X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
+ X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
+ X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
+ X(EQ('"'), on_double_quote(lexer)) \
+ X(isspace, on_generic_separator(lexer, TOKEN_TOKENS))
+
+#define FN(fn, arg) "%s", fn(arg, buf, buf_sz)
+
+#define MANAGE_TOKEN_TBL(X, token) \
+/* X(type, how to free, how to print) */ \
+ X(TOKEN_PARENTHS_OPEN, ;, "(") \
+ X(TOKEN_PARENTHS_CLOSE, ;, ")") \
+ X(TOKEN_SPECIAL_QUOTE, ;, "'") \
+ X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \
+ X(TOKEN_TOKENS, ;, "") \
+
+// ---------- Exported Functions ---------- //
// makes an if-else chain to test the character
-// agains the seperator callback table
-#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \
+// agains the separator callback table
+#define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \
if(test_func(str[i])) { \
callback_ret = callback; \
if(callback_ret == 0) { \
@@ -77,7 +64,7 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)
for(size_t i = 0; i < len; i++)
{
- SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {}
+ SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {}
if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated
err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz);
@@ -93,49 +80,35 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)
lexer_t lexer_create(size_t tokens_cap)
{
- lexer_t lexer = malloc(sizeof(struct lexer));
- if(!lexer) {
- err("malloc: %s", strerror(errno));
- goto fail;
- }
+ lexer_t lexer = xmalloc(sizeof(struct lexer));
- lexer->tokens = calloc(tokens_cap, sizeof(struct token));
- if(!lexer->tokens) {
- err("malloc %s", strerror(errno));
- goto fail;
- }
+ lexer->tokens_cap = tokens_cap;
+ lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token));
for(size_t i = 0; i < tokens_cap; i++) {
- lexer->tokens[i].symbol = NULL;
+ lexer->tokens[i].type = TOKEN_TOKENS;
}
- lexer->tokens_cap = tokens_cap;
- lexer->ntokens = 0;
-
- memset(lexer->iden, 0, LEXER_IDEN_CAP);
- lexer->iden_sz = 0;
-
- lexer->inside_string = 0;
-
+ lexer_reset(lexer);
return lexer;
-fail:
- lexer_destroy(lexer);
- return NULL;
}
-#define CASE_FREE_TOKEN(type, data, ...) \
- case type: if(data != NULL) { free(data); } break;
+#define CASE_FREE(type, free_func, ...) case type: free_func; break;
void lexer_destroy(lexer_t lexer)
{
if(!lexer) return;
if(lexer->tokens) {
- for(size_t i = 0; i < lexer->ntokens; i++) {
+ for(size_t i = 0; i < lexer->ntokens; i++)
+ {
struct token *token = &lexer->tokens[i];
- switch(token->type) {
- TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token)
- default: break;
+
+ switch(lexer->tokens[i].type) {
+ MANAGE_TOKEN_TBL(CASE_FREE, token);
+ default:
+ err("lexer_reset: Unknown token type given");
+ break;
}
}
free(lexer->tokens);
@@ -144,18 +117,65 @@ void lexer_destroy(lexer_t lexer)
free(lexer);
}
-// ------------------------------------------------- //
+void lexer_reset(lexer_t lexer)
+{
+ for(size_t i = 0; i < lexer->tokens_cap; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ switch(token->type) {
+ MANAGE_TOKEN_TBL(CASE_FREE, token);
+ default:
+ err("lexer_reset: Unknown token type given");
+ break;
+ }
+
+ token->type = TOKEN_TOKENS;
+ token->value = NULL;
+ }
+
+ lexer->ntokens = 0;
+
+ memset(lexer->iden, 0, LEXER_IDEN_CAP);
+ lexer->iden_sz = 0;
+
+ lexer->inside_string = 0;
+}
+
+// print based on the given way to print
+#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break;
+
+void lexer_print_tokens(lexer_t lexer)
+{
+ // for the printing (see MANAGE_TOKEN_TBL)
+ char buf[LEXER_IDEN_CAP];
+ size_t buf_sz = LEXER_IDEN_CAP;
+
+ for(size_t i = 0; i < lexer->ntokens; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ switch(token->type) {
+ MANAGE_TOKEN_TBL(CASE_PRINT, token);
+ default:
+ err("lexer_print_tokens: Unknown token given");
+ return;
+ }
+ }
+}
+
+// ---------- Callback Functions ----------- //
-static int on_quote(lexer_t lexer)
+static int on_double_quote(lexer_t lexer)
{
int ret = on_generic_separator(lexer, TOKEN_TOKENS);
- if(ret <= 0) { // it either failed or worked, both not inside a string
- lexer->inside_string = 1;
+ if(ret < 0) {
return ret;
+ } else if(ret == 0) {
+ lexer->inside_string = 1;
+ return 1;
}
- if(save_current_identifier(lexer)) {
- err("save_current_identifier: failed");
+ if(token_add_iden(lexer)) {
+ err("token_add_iden: failed");
return -1;
}
@@ -163,26 +183,20 @@ static int on_quote(lexer_t lexer)
return 0;
}
-static int on_dot(lexer_t lexer)
-{
- if(lexer->iden_sz != 0) return 1;
- return on_generic_separator(lexer, TOKEN_SPECIAL_DOT);
-}
-
static int on_generic_separator(lexer_t lexer, enum token_enum type)
{
if(lexer->inside_string) {
return 1;
}
- if(save_current_identifier(lexer)) {
- err("save_current_identifier: failed");
+ if(token_add_iden(lexer)) {
+ err("token_add_iden: failed");
return -1;
}
if(type != TOKEN_TOKENS) {
- if(save_empty_token(lexer, type) < 0) {
- err("save_empty_token: failed");
+ if(token_add(lexer, type) < 0) {
+ err("token_add: failed");
return -1;
}
}
@@ -190,147 +204,41 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type)
return 0;
}
-static int save_empty_token(lexer_t lexer, enum token_enum type)
+// ---------- Token Functions ----------- //
+
+static int token_add(lexer_t lexer, enum token_enum type)
{
if(lexer->ntokens >= lexer->tokens_cap) {
err("tokens_cap of %ld has been reached", lexer->tokens_cap);
return -1;
}
- lexer->tokens[lexer->ntokens++].type = type;
- return lexer->ntokens - 1;
+ lexer->tokens[lexer->ntokens].type = type;
+ return lexer->ntokens++;
}
-#define CHECK_IDEN(func) \
- if((ret = func(lexer))) { \
- if(ret < 0) { \
- err(#func ": failed"); \
- goto exit; \
- } \
- } else
-
-static int save_current_identifier(lexer_t lexer)
+static int token_add_iden(lexer_t lexer)
{
int ret = 1;
+ if(!lexer->iden_sz) return 0;
- if(lexer->iden_sz != 0) {
- IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {}
+ int i = token_add(lexer, TOKEN_VALUE);
+ if(i < 0) {
+ err("token_add: failed");
+ goto exit;
}
- ret = 0;
+ value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret);
+ if(ret > 0) {
+ value = value_create(VALUE_SYMBOL, lexer->iden, &ret);
+ } else if(ret < 0) {
+ err("value_create: failed");
+ goto exit;
+ }
+
+ lexer->tokens[i].value = value;
exit:
memset(lexer->iden, 0, lexer->iden_sz);
lexer->iden_sz = 0;
return ret;
}
-
-
-// ------------------------------------------------- //
-
-static int try_str(lexer_t lexer)
-{
- if(!lexer->inside_string) return 0;
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_STRING);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].string = malloc(lexer->iden_sz+1);
- if(!lexer->tokens[i].string) {
- err("malloc: %s", strerror(errno));
- return -1;
- }
-
- memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1);
- return 1;
-}
-
-static int try_int(lexer_t lexer)
-{
- errno = ERANGE + 1; // set errno to not ERANGE
-
- char *endptr;
- long num = strtol(lexer->iden, &endptr, 10);
-
- if(*endptr != '\0') { // the whole string isn't a number
- return 0;
- }
-
- if(errno == ERANGE) {
- warn("Given integer literal %s is outside the possible range", lexer->iden);
- }
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].num_int = num;
- return 1;
-}
-
-static int try_float(lexer_t lexer)
-{
- errno = ERANGE + 1; // set errno to not ERANGE
-
- char *endptr;
- float num = strtof(lexer->iden, &endptr);
-
- if(*endptr != '\0') { // the whole string isn't a number
- return 0;
- }
-
- if(errno == ERANGE) {
- warn("Given float literal %s is outside the possible range", lexer->iden);
- }
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].num_float = num;
- return 1;
-}
-
-static int try_symbol(lexer_t lexer)
-{
- int i = save_empty_token(lexer, TOKEN_SYMBOL);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].symbol = malloc(lexer->iden_sz+1);
- if(!lexer->tokens[i].symbol) {
- err("malloc: %s", strerror(errno));
- return -1;
- }
-
- memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1);
- return 1;
-}
-
-// ------------------------------------------------- //
-
-#ifdef DEBUG
-#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break;
-
-void lexer_print_tokens(lexer_t lexer)
-{
- for(size_t i = 0; i < lexer->ntokens; i++) {
- struct token *token = &lexer->tokens[i];
-
- info("Token %zu: %d", i, token->type);
-
- switch(token->type) {
- TOKEN_TYPES_INFO(CASE_PRINT, token);
- default: break;
- }
- }
-}
-#endif
diff --git a/src/lexer.h b/src/lexer.h
index 942be54..fc13f24 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,6 +1,8 @@
#ifndef LEXER_H
#define LEXER_H
+#include "value.h"
+
#ifndef LEXER_IDEN_CAP
#define LEXER_IDEN_CAP 512
#endif
@@ -9,19 +11,12 @@ typedef struct lexer *lexer_t;
struct token {
enum token_enum {
- TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE,
- TOKEN_SPECIAL_DOT, TOKEN_SPECIAL_QUOTE,
- TOKEN_LITERAL_NUM_INT, TOKEN_LITERAL_STRING,
- TOKEN_LITERAL_NUM_FLOAT, TOKEN_SYMBOL,
+ TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE,
+ TOKEN_SPECIAL_QUOTE, TOKEN_VALUE,
TOKEN_TOKENS // number of token types
} type;
- union {
- char *symbol;
- char *string;
- long num_int;
- float num_float
- };
+ value_t value;
};
struct lexer {
@@ -37,19 +32,25 @@ struct lexer {
};
// allocate a lexer with a maximum number of tokens_cap tokens
-// returns a lexer on success, NULL on fail
+// returns a lexer on success and NULL on fail
lexer_t lexer_create(size_t tokens_cap);
-// destroy a lexer
+// deallocate a lexer
void lexer_destroy(lexer_t lexer);
+// reset a lexer to its default state without destroying it (faster)
+void lexer_reset(lexer_t lexer);
+
+// self explanatory
+void lexer_print_tokens(lexer_t lexer);
+
// turn the given non-null-terminated string str of lenght len
// into into tokens
// returns 0 on success
int lexer_tokenize(lexer_t lexer, char *str, size_t len);
-#ifdef DEBUG
-void lexer_print_tokens(lexer_t lexer);
-#endif
+// checks whether the lexer has finished (temp buffers like iden are empty)
+// returns 1 on finished, 0 on not finished
+int lexer_has_finished(lexer_t lexer);
#endif
diff --git a/src/main.c b/src/main.c
index 992e594..45e0041 100644
--- a/src/main.c
+++ b/src/main.c
@@ -4,15 +4,17 @@
#include "common.h"
#include "lexer.h"
-// #include "ast.h"
-// #include "eval.h"
+#include "parser.h"
+#include "eval.h"
+
+// TODO: the lexer, parser, and eval functions should return -1 on fatal, and 1 on non fatal error
#define READ_BUF_CAP 512
#define DEFAULT_TOKENS_CAP 8192 // make it a command line arg
lexer_t lexer = NULL;
-// ast_t root = NULL;
-// eval_t evaluator = NULL;
+parser_t parser = NULL;
+// evaluator_t eval = NULL;
int main(void)
{
@@ -26,13 +28,13 @@ int main(void)
goto fail;
}
- // tokenize input
FILE *fp = fopen(filename, "r");
if(!fp) {
err("fopen: %s: %s", filename, strerror(errno));
goto fail;
}
+ // tokenize input
char buf[READ_BUF_CAP]; size_t bytes = 0;
while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) {
if(lexer_tokenize(lexer, buf, bytes)) {
@@ -42,36 +44,38 @@ int main(void)
if(bytes < READ_BUF_CAP) break;
}
- fclose(fp);
lexer_print_tokens(lexer);
- // --------------
-
-// ast = ast_create();
-// if(!ast) {
-// err("ast_create: failed");
-// goto fail;
-// }
-
-// if(ast_parse_lexer(ast, lexer)) {
-// err("ast_parse_lexer: failed");
-// goto fail;
-// }
-
-// evaluator = evaluator_create();
-// if(!evaluator) {
-// err("evaluator_create: failed");
-// goto fail;
-// }
-
-// if(evaluator_eval_ast(evaluator, ast)) {
-// err("evaluator_eval_ast: failed");
-// goto fail;
-// }
+
+ fclose(fp);
+
+ parser = parser_create();
+ if(!parser) {
+ err("parser_create: failed");
+ goto fail;
+ }
+
+ if(parser_parse_lexer(parser, lexer)) {
+ err("parser_parse_lexer: failed");
+ goto fail;
+ }
+
+ parser_print_ast(parser);
+
+ // evaluator = eval_create();
+ // if(!evaluator) {
+ // err("eval_create: failed");
+ // goto fail;
+ // }
+
+ // if(eval_ast(eval, ast)) {
+ // err("eval_ast: failed");
+ // goto fail;
+ // }
ret = 0;
fail:
-// evaluator_destroy(eval);
-// ast_destroy(ast);
+ // eval_destroy(eval);
+ parser_destroy(parser);
lexer_destroy(lexer);
return ret;
}
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..c4e37bd
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,258 @@
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "common.h"
+#include "parser.h"
+
+// TODO: Do all the todos
+
+// adds another child of type type to the sexp sexp
+// returns the index of the added type and < 0 on fail
+static size_t sexp_add(struct sexp *sexp, enum ast_type type);
+
+// self-explanatory
+static void sexp_init(struct sexp *sexp);
+static void sexp_print(struct sexp *sexp, int indent);
+static void sexp_free(struct sexp *sexp);
+static void ast_free(struct ast *ast);
+
+static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp);
+static struct sexp *quote_stack_pop(struct quote_node **head, int peak);
+
+// returns 0 on success
+static int on_paren(parser_t parser, int paren_type); // 0 is open, 1 is close
+static int on_quote(parser_t parent);
+static int on_value(parser_t parent, value_t value);
+
+#define TOKEN_CALLBACK_TBL(X, parser, token) \
+/* X(test, execute on succes) */ \
+ X(TOKEN_PARENTHS_OPEN, on_paren(parser, 0)) \
+ X(TOKEN_PARENTHS_CLOSE, on_paren(parser, 1)) \
+ X(TOKEN_SPECIAL_QUOTE, on_quote(parser)) \
+ X(TOKEN_VALUE, on_value(parser, token->value))
+
+#define FN(fn, arg) "%s", fn(arg, buf, buf_sz)
+
+#define MANAGE_AST_TBL(X, ast) \
+ X(AST_SEXP, sexp_free(&ast->sexp), "|") \
+ X(AST_VALUE, value_destroy(ast->value), FN(value_string, ast->value)) \
+
+// ---------- Exported Functions ---------- //
+
+#define CASE_TYPE(type, callback) \
+ case type: \
+ if(callback) { \
+ err(#callback ": failed"); \
+ return -1; \
+ } break;
+
+int parser_parse_lexer(parser_t parser, lexer_t lexer)
+{
+ for(size_t i = 0; i < lexer->ntokens; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ switch(token->type) {
+ TOKEN_CALLBACK_TBL(CASE_TYPE, parser, token);
+ default:
+ err("parser_parse_lexer: Unknown token type given");
+ break;
+ }
+
+ if((token->type != TOKEN_SPECIAL_QUOTE) &&
+ (parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1))) {
+ if(on_paren(parser, 1));
+ quote_stack_pop(&parser->quote_head, 0);
+ }
+ }
+
+ if(&parser->root == parser->cur_sexp) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+parser_t parser_create()
+{
+ parser_t parser = xmalloc(sizeof(struct parser));
+
+#define VALUE_CREATE(str) \
+ value_create(VALUE_SYMBOL, str, &ret); \
+ if(ret) { \
+ err("value_create: failed"); \
+ return NULL; \
+ }
+
+ int ret = 0;
+ parser->begin_symbol_value = VALUE_CREATE("begin");
+ parser->quote_symbol_value = VALUE_CREATE("quote");
+
+ parser->root.nchildren = 0;
+ parser->quote_head = NULL;
+ parser_reset(parser);
+
+ return parser;
+}
+
+void parser_destroy(parser_t parser)
+{
+ if(!parser) return;
+
+ value_destroy(parser->begin_symbol_value);
+ value_destroy(parser->quote_symbol_value);
+
+ sexp_free(&parser->root);
+ free(parser);
+}
+
+void parser_reset(parser_t parser)
+{
+ struct sexp *root = &parser->root;
+
+ for(size_t i = 0; i < root->nchildren; i++) {
+ ast_free(&root->children[i]);
+ }
+
+ sexp_init(root);
+ size_t index = sexp_add(root, AST_VALUE);
+
+ root->children[index].value = value_copy(parser->begin_symbol_value);
+ parser->cur_sexp = &parser->root;
+
+ while(quote_stack_pop(&parser->quote_head, 0) != NULL);
+}
+
+
+void parser_print_ast(parser_t parser)
+{
+ sexp_print(&parser->root, 0);
+}
+
+// ---------- Callback Functions ---------- //
+
+static int on_paren(parser_t parser, int paren_type)
+{
+ if(paren_type) { // !0 closing paren
+ parser->cur_sexp = parser->cur_sexp->prev;
+ } else { // 0 opening paren
+ size_t index = sexp_add(parser->cur_sexp, AST_SEXP);
+
+ struct sexp *prev = parser->cur_sexp;
+ struct sexp *new = &prev->children[index].sexp;
+
+ sexp_init(new);
+
+ parser->cur_sexp = new;
+ parser->cur_sexp->prev = prev;
+ }
+
+ return 0;
+}
+
+static int on_quote(parser_t parser)
+{
+ // new sexp
+ on_paren(parser, 0);
+
+ // add symbol to the sexp
+ on_value(parser, parser->quote_symbol_value);
+
+ // save the current quote sexp
+ quote_stack_push(&parser->quote_head, parser->cur_sexp);
+ return 0;
+}
+
+static int on_value(parser_t parser, value_t value)
+{
+ size_t index = sexp_add(parser->cur_sexp, AST_VALUE);
+
+ value_t copied = value_copy(value);
+
+ parser->cur_sexp->children[index].value = copied;
+ return 0;
+}
+
+// ---------- Parser Functions ---------- //
+
+static size_t sexp_add(struct sexp *sexp, enum ast_type type)
+{
+ sexp->children = xrealloc(sexp->children, sizeof(struct ast) * (sexp->nchildren+1));
+
+ sexp->children[sexp->nchildren].type = type;
+ return sexp->nchildren++;
+}
+
+static void sexp_init(struct sexp *sexp)
+{
+ sexp->children = NULL;
+ sexp->nchildren = 0;
+ sexp->prev = NULL;
+}
+
+static void sexp_print(struct sexp *sexp, int indent)
+{
+ for(size_t i = 0; i < sexp->nchildren; i++) {
+ struct ast *child = &sexp->children[i];
+
+ if(child->type == AST_SEXP) {
+ sexp_print(&child->sexp, indent + 2);
+ continue;
+ }
+
+ char buf[LEXER_IDEN_CAP];
+ size_t buf_sz = LEXER_IDEN_CAP;
+
+ info("%d %s", indent, value_string(child->value, buf, buf_sz));
+ }
+}
+
+static void sexp_free(struct sexp *sexp)
+{
+ for(size_t i = 0; i < sexp->nchildren; i++){
+ ast_free(&sexp->children[i]);
+ }
+ free(sexp->children);
+}
+
+#define CASE_FREE(type, free_func, print_func) \
+ case type: free_func; return;
+
+static void ast_free(struct ast *ast)
+{
+ if(!ast) return;
+
+ switch(ast->type) {
+ MANAGE_AST_TBL(CASE_FREE, ast)
+ default:
+ err("ast_free: Unknown ast type given");
+ break;
+ }
+}
+
+static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp)
+{
+ struct quote_node *node = xmalloc(sizeof(struct quote_node));
+
+ node->prev = *head;
+ node->cur_sexp = cur_sexp;
+ *head = node;
+}
+
+static struct sexp *quote_stack_pop(struct quote_node **head, int peak)
+{
+ if(*head == NULL) {
+ return NULL;
+ }
+
+ struct sexp *sexp = (*head)->cur_sexp;
+
+ if(!peak) {
+ struct quote_node *prev = (*head)->prev;
+ free(*head);
+ *head = prev;
+ }
+
+ return sexp;
+}
+
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..8fc5d6c
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,60 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "value.h"
+#include "lexer.h"
+
+typedef struct parser *parser_t;
+
+struct ast {
+ enum ast_type {
+ AST_SEXP,
+ AST_VALUE,
+ AST_TYPES // number of types
+ } type;
+
+ union {
+ struct sexp {
+ struct ast *children;
+ size_t nchildren;
+
+ struct sexp *prev;
+ } sexp;
+
+ value_t value;
+ };
+};
+
+struct parser {
+ struct sexp root;
+ struct sexp *cur_sexp;
+
+ struct quote_node {
+ struct sexp *cur_sexp;
+ struct quote_node *prev;
+ } *quote_head;
+
+ value_t begin_symbol_value;
+ value_t quote_symbol_value;
+};
+
+// allocate a parser
+// returns a parser on success and NULL on fail
+parser_t parser_create();
+
+// deallocate a parser
+void parser_destroy(parser_t parser);
+
+// reset a parser to its default state without destroying it
+// returns 0 on success
+void parser_reset(parser_t parser);
+
+// self explanatory
+void parser_print_ast(parser_t parser);
+
+// turn the given lexer (which has already has tokens) into an ast
+// returns 0 on success, > 0 when more tokens are needed,
+// and < 0 on a fatal error
+int parser_parse_lexer(parser_t parser, lexer_t lexer);
+
+#endif
diff --git a/src/value.c b/src/value.c
new file mode 100644
index 0000000..289aa0f
--- /dev/null
+++ b/src/value.c
@@ -0,0 +1,244 @@
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "common.h"
+#include "value.h"
+
+// TODO: add the other types
+
+// save data as a symbol
+// return 0 on sucess, < 0 on error
+static int symbol_save(char **symbol, char *data);
+// same as value_string
+static char *symbol_string(char *symbol, char *buf, size_t buf_sz);
+
+// try to save data as a literal
+// returns 0 on sucess, > 0 on fail (data isnt a possible literal),
+// and < 0 on error
+static int literal_save(struct literal *literal, char *data);
+static void literal_free(struct literal *literal);
+// same as value_string
+static char *literal_string(struct literal *literal, char *buf, size_t buf_sz);
+
+// for a function that saves string data as a value
+#define S_FN(fn, arg) fn(arg, (char *)data)
+// for a function that saves a value as a string in buf[buf_sz]
+#define P_FN(fn, arg) fn(arg, buf, buf_sz)
+
+#define MANAGE_VALUE_TBL(X, value) \
+/* X(type, save function, \
+ how to free, how to save to string) */ \
+ X(VALUE_SYMBOL, S_FN(symbol_save, &value->symbol), \
+ free(value->symbol), P_FN(symbol_string, value->symbol)) \
+ X(VALUE_LITERAL, S_FN(literal_save, &value->literal), \
+ literal_free(&value->literal), P_FN(literal_string, &value->literal)) \
+// X(VALUE_PAIR, ...)
+// X(VALUE_LIST, ...)
+// X(VALUE_PROC, ...)
+
+// ------ Exported Functions Implementations ------- //
+// ------------------------------------------------- //
+
+#define CASE_SAVE(type, save_func, free_func, string_func) \
+ case type: *ret = save_func; break;
+
+value_t value_create(enum value_type type, void *data, int *ret)
+{
+ value_t value = xmalloc(sizeof(struct value));
+
+ value->type = VALUE_TYPES;
+ value->references = 1;
+
+ switch(type) {
+ MANAGE_VALUE_TBL(CASE_SAVE, value);
+ default:
+ err("value_create: Unknown value type given");
+ *ret = -1;
+ break;
+ }
+
+ if(!*ret) {
+ value->type = type;
+ return value;
+ }
+
+ value_destroy(value);
+ return NULL;
+}
+
+value_t value_copy(value_t value)
+{
+ value->references++;
+ return value;
+}
+
+#define CASE_FREE(type, save_func, free_func, string_func) \
+ case type: free_func; break;
+
+void value_destroy(value_t value)
+{
+ if(!value) return;
+ if(--value->references > 0) {
+ return;
+ }
+
+
+ switch(value->type) {
+ MANAGE_VALUE_TBL(CASE_FREE, value);
+ default: break;
+ }
+
+ free(value);
+}
+
+#define CASE_STRING(type, save_func, free_func, string_func) \
+ case type: return string_func;
+
+char *value_string(value_t value, char *buf, size_t buf_sz)
+{
+ switch(value->type) {
+ MANAGE_VALUE_TBL(CASE_STRING, value);
+ default: return NULL;
+ }
+}
+
+// ---------------- Symbol Functions --------------- //
+
+static int symbol_save(char **symbol, char *data)
+{
+ size_t len = strlen(data) + 1;
+
+ *symbol = xmalloc(len);
+
+ memcpy(*symbol, data, len);
+ return 0;
+}
+
+static char *symbol_string(char *symbol, char *buf, size_t buf_sz)
+{
+ size_t len = strlen(symbol) + 1;
+ if(len> buf_sz) {
+ return NULL;
+ }
+
+ memcpy(buf, symbol, len);
+ return buf;
+}
+
+// --------------- Literal Functions --------------- //
+
+// write a value in a printf format to buf[buf_sz]
+#define EX(...) (((size_t)snprintf(buf, buf_sz, __VA_ARGS__) > buf_sz) ? NULL : buf)
+
+#define MANAGE_LITERAL_TBL(X, literal) \
+/* X(type, how to free how to print) */ \
+ X(LITERAL_STRING, free(literal->string), EX("%s", literal->string)) \
+ X(LITERAL_NUM_INT, ;, EX("%ld", literal->num_int)) \
+ X(LITERAL_NUM_FLOAT, ;, EX("%f", literal->num_float))
+
+// try to convert the data to a literal
+// returns 0 on success, > 0 on fail (data isnt a literal),
+// and < 0 on error
+static int literal_try_string(struct literal *literal, char *data);
+static int literal_try_int (struct literal *literal, char *data);
+static int literal_try_float (struct literal *literal, char *data);
+
+// list of the try_type functions (order matters)
+typedef int (*try_func)(struct literal *, char *);
+try_func literal_try_funcs[] = {
+ literal_try_string,
+ literal_try_int,
+ literal_try_float,
+};
+
+static int literal_save(struct literal *literal, char *data)
+{
+ int ret = 0;
+
+ for(size_t i = 0; i < sizeof(literal_try_funcs)/sizeof(try_func); i++) {
+ // <= 0 means that it either failed fatally, or succeeded
+ if((ret = literal_try_funcs[i](literal, data)) <= 0) break;
+ }
+
+ return ret;
+}
+
+#define CASE_LITERAL_FREE(type, free_func, string_func) \
+ case type: free_func; break;
+
+static void literal_free(struct literal *literal)
+{
+ switch(literal->type) {
+ MANAGE_LITERAL_TBL(CASE_LITERAL_FREE, literal);
+ default: break;
+ }
+}
+
+#define CASE_LITERAL_STRING(type, free_func, string_func) \
+ case type: return string_func;
+
+static char *literal_string(struct literal *literal, char *buf, size_t buf_sz)
+{
+ switch(literal->type) {
+ MANAGE_LITERAL_TBL(CASE_LITERAL_STRING, literal);
+ default: return NULL; break;
+ }
+}
+
+// Literal try_type functions
+
+static int literal_try_string(struct literal *literal, char *data)
+{
+ if(data[0] != '"') return 1;
+
+ literal->type = LITERAL_STRING;
+
+ size_t len = strlen(data); // no +1 needed since first character is discarded
+ literal->string = xmalloc(len);
+
+ memcpy(literal->string, data + 1, len);
+ return 0;
+}
+
+static int literal_try_int(struct literal *literal, char *data)
+{
+ errno = ERANGE + 1; // set errno to not ERANGE
+
+ char *endptr;
+ long num = strtol(data, &endptr, 10);
+
+ if(*endptr != '\0') { // the whole string isn't a number
+ return 1;
+ }
+
+ if(errno == ERANGE) {
+ warn("Given integer literal %s is outside the possible range", data);
+ }
+
+ literal->type = LITERAL_NUM_INT;
+ literal->num_int = num;
+
+ return 0;
+}
+
+static int literal_try_float (struct literal *literal, char *data)
+{
+ errno = ERANGE + 1; // set errno to not ERANGE
+
+ char *endptr;
+ float num = strtof(data, &endptr);
+
+ if(*endptr != '\0') { // the whole string isn't a number
+ return 1;
+ }
+
+ if(errno == ERANGE) {
+ warn("Given float literal %s is outside the possible range", data);
+ }
+
+ literal->type = LITERAL_NUM_FLOAT;
+ literal->num_float = num;
+
+ return 0;
+}
diff --git a/src/value.h b/src/value.h
new file mode 100644
index 0000000..60d2c43
--- /dev/null
+++ b/src/value.h
@@ -0,0 +1,62 @@
+#ifndef LITERAL_H
+#define LITERAL_H
+
+typedef struct value *value_t;
+
+struct literal {
+ enum literal_type {
+ LITERAL_STRING,
+ LITERAL_NUM_INT,
+ LITERAL_NUM_FLOAT
+ } type;
+
+ union {
+ char *string;
+ long num_int;
+ float num_float;
+ };
+};
+
+// struct pair {
+// };
+// struct proc {
+// };
+
+struct value {
+ enum value_type {
+ VALUE_SYMBOL,
+ VALUE_LITERAL,
+ VALUE_PAIR,
+ VALUE_LIST,
+ VALUE_PROC,
+ VALUE_TYPES
+ } type;
+
+ int references;
+
+ union {
+ char *symbol;
+ struct literal literal;
+ // struct pair pair;
+ // struct pari list;
+ // struct proc_t proc;
+ };
+};
+
+// try to save data as a literal
+// returns a value_t on success and NULL on fail
+// ret is set to: 0 on sucess, > 0 on fail (data isnt this type),
+// and < 0 on error
+value_t value_create(enum value_type type, void *data, int *ret);
+
+// returns a value_t on success
+value_t value_copy(value_t value);
+
+// destroys a value
+void value_destroy(value_t value);
+
+// copy a string version of the value the buf[buf_sz]
+// returns buf on success
+char *value_string(value_t value, char *buf, size_t buf_sz);
+
+#endif