aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2023-06-17 23:42:31 +0300
committerkartofen <mladenovnasko0@gmail.com>2023-06-17 23:42:31 +0300
commitbcac686c1bf6a5c1dec2324269e2766babdc0fde (patch)
tree6483461015705efa8290a1ab05482a641739c1dd
lexer - done
-rw-r--r--.gitignore2
-rw-r--r--Makefile48
-rw-r--r--files/test1.lisp3
-rw-r--r--src/ast.h38
-rw-r--r--src/common.h20
-rw-r--r--src/eval.h22
-rw-r--r--src/lexer.c330
-rw-r--r--src/lexer.h55
-rw-r--r--src/main.c77
9 files changed, 595 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cbbd0b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+bin/
+obj/ \ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0cbcaeb
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,48 @@
+CC := gcc
+
+ifeq ($(PROD),1)
+CFLAGS := -std=c99 -O2 # production flags
+else
+CFLAGS := -std=c99 -Wall -Wextra -Wpedantic -g -DDEBUG # debug flags
+endif
+
+SRCD := src
+OBJD := obj
+BIND := bin
+TESTD := tests
+
+FILES = $(shell find $(SRCD)/ -type f 2> /dev/null)
+CSRCS = $(filter %.c, $(FILES))
+COBJS = $(CSRCS:$(SRCD)/%.c=$(OBJD)/%.o)
+
+CDEPS = $(COBJS:%.o=%.d)
+-include $(CDEPS)
+
+.PHONY: all clean lispy
+
+all: lispy
+lispy: $(BIND)/lispy
+
+clean:
+ rm -rf $(BIND)
+ rm -rf $(OBJD)
+
+$(OBJD)/%.o: $(SRCD)/%.c
+ mkdir -p $(dir $@)
+ $(CC) $(CFLAGS) -MMD -MF $(@:%.o=%.d) -c $< -o $@
+
+$(BIND)/%: $(COBJS)
+ mkdir -p $(dir $@)
+ $(CC) $(CFLAGS) $^ -o $@
+
+analyze: clean
+ scan-build \
+ -enable-checker alpha \
+ -enable-checker core \
+ -enable-checker deadcode \
+ -enable-checker security \
+ -enable-checker unix \
+ make
+
+leak: lispy
+ valgrind -s --leak-check=full $(BIND)/lispy
diff --git a/files/test1.lisp b/files/test1.lisp
new file mode 100644
index 0000000..f0d6ffe
--- /dev/null
+++ b/files/test1.lisp
@@ -0,0 +1,3 @@
+(define 'a 1)
+(+ a 1)
+(+ a 0.1)
diff --git a/src/ast.h b/src/ast.h
new file mode 100644
index 0000000..bd2e628
--- /dev/null
+++ b/src/ast.h
@@ -0,0 +1,38 @@
+#ifndef AST_H
+#define AST_H
+
+#include "lexer.h"
+
+typedef struct node_t *ast_t;
+struct ast_node {
+ enum {
+ NODE_SEXP,
+ NODE_SYMBOL,
+ NODE_LITERAL,
+ } type;
+
+ union {
+ struct sexp {
+ struct ast_node **children;
+ size_t nchildren;
+ } sexp;
+
+ char *symbol;
+
+ union {
+ enum {
+ NODE_LITERAL_NUM,
+ NODE_LITERAL_STR,
+ } type;
+
+ int number;
+ char *string;
+ } literal;
+ };
+};
+
+ast_t ast_create();
+void ast_destroy(ast_t ast);
+int ast_parse_lexer(ast_t ast, lexer_t lex);
+
+#endif
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..a1daa03
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,20 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <stdio.h>
+
+#define __RED__ "\033[0;31m"
+#define __GREEN__ "\033[0;32m"
+#define __YELLOW__ "\033[0;33m"
+#define __RESET__ "\033[0m"
+
+#define STR(x) #x
+#define XSTR(x) STR(x)
+
+#define info(...) do { fprintf(stdout, __GREEN__"[INFO]"__RESET__" "__VA_ARGS__); fprintf(stdout, "\n"); } while(0)
+#define err(...) do { fprintf(stderr, __RED__"[ERROR]"__RESET__" "__FILE__":"XSTR(__LINE__)": "__VA_ARGS__); fprintf(stderr, "\n"); }while(0)
+#define warn(...) do { fprintf(stderr, __YELLOW__"[WARN]"__RESET__" "__FILE__":"XSTR(__LINE__)": "__VA_ARGS__); fprintf(stderr, "\n"); }while(0)
+// #define info(...) printf(__VA_ARGS__);
+// #define err(...) printf(__VA_ARGS__);
+
+#endif
diff --git a/src/eval.h b/src/eval.h
new file mode 100644
index 0000000..01ed827
--- /dev/null
+++ b/src/eval.h
@@ -0,0 +1,22 @@
+#ifndef EVAL_H
+#define EVAL_H
+
+#include "ast.h"
+
+typedef struct eval *eval_t;
+
+// RunTime Tree
+struct rtt {
+
+};
+
+struct eval {
+ struct rtt *root;
+};
+
+// TODO: add options for the evaluation
+eval_t evaluator_create();
+void evaluator_destroy(eval_t evaluator);
+int evaluator_eval_ast(eval_t evaluator, ast_t ast)
+
+#endif
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..1acfd6d
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,330 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+// TODO: handle escaped quotes
+#include "common.h"
+#include "lexer.h"
+
+// saves a token with no data
+// returns the index of the saved token; < 0 on fail
+static int save_empty_token(lexer_t lexer, enum token_enum type);
+
+// saves a token with data which is the current identifier (lexer->iden)
+// returns 0 on success
+static int save_current_identifier(lexer_t lexer);
+
+// used for tokens that separate things, type is optional (TOKEN_TOKENS for default)
+// returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden)
+static int on_generic_separator(lexer_t lexer, enum token_enum type);
+static int on_quote(lexer_t lexer);
+static int on_dot(lexer_t lexer);
+
+// try to convert the identifier (lexer->iden) to a given type
+// returns > 0 on sucess, 0 on fail (iden isnt the given type),
+// and < 0 on error
+static int try_str(lexer_t lexer);
+static int try_int(lexer_t lexer);
+static int try_float(lexer_t lexer);
+static int try_symbol(lexer_t lexer);
+
+#define SEPARATOR_CALLBACK_TBL(X, lexer) \
+ X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
+ X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
+ X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
+ X(EQ('.'), on_dot(lexer)) \
+ X(EQ('"'), on_quote(lexer)) \
+ X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS))
+
+#define IDENTIFY_IDENTIFIER_LIST(X) \
+ X(try_str) \
+ X(try_int) \
+ X(try_float) \
+ X(try_symbol)
+
+// X(token type, what to free, how to print on screen)
+#define TOKEN_TYPES_INFO(X, token) \
+ X(TOKEN_PARENTHS_OPEN, NULL, "'('") \
+ X(TOKEN_PARENTHS_CLOSE, NULL, "')'") \
+ X(TOKEN_LITERAL_STRING, token->string, "%s", token->string) \
+ X(TOKEN_LITERAL_NUM_INT, NULL, "%ld", token->num_int) \
+ X(TOKEN_LITERAL_NUM_FLOAT, NULL, "%f", token->num_float) \
+ X(TOKEN_SYMBOL, token->symbol, "%s", token->symbol) \
+
+#define EQ(ch) ch ==
+#define FN(f) f
+
+// makes an if-else chain to test the character
+// agains the seperator callback table
+#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \
+ if(test_func(str[i])) { \
+ callback_ret = callback; \
+ if(callback_ret == 0) { \
+ continue; \
+ } else if(callback_ret < 0) { \
+ err(#callback ": failed"); \
+ return 1; \
+ } \
+ } else
+
+int lexer_tokenize(lexer_t lexer, char *str, size_t len)
+{
+ int callback_ret = 0;
+
+ for(size_t i = 0; i < len; i++)
+ {
+ SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {}
+
+ if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated
+ err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz);
+ return 1;
+ }
+
+ // add charater to identifier
+ lexer->iden[lexer->iden_sz++] = str[i];
+ }
+
+ return 0;
+}
+
+lexer_t lexer_create(size_t tokens_cap)
+{
+ lexer_t lexer = malloc(sizeof(struct lexer));
+ if(!lexer) {
+ err("malloc: %s", strerror(errno));
+ goto fail;
+ }
+
+ lexer->tokens = calloc(tokens_cap, sizeof(struct token));
+ if(!lexer->tokens) {
+ err("malloc %s", strerror(errno));
+ goto fail;
+ }
+
+ for(size_t i = 0; i < tokens_cap; i++) {
+ lexer->tokens[i].symbol = NULL;
+ }
+
+ lexer->tokens_cap = tokens_cap;
+ lexer->ntokens = 0;
+
+ memset(lexer->iden, 0, LEXER_IDEN_CAP);
+ lexer->iden_sz = 0;
+
+ lexer->inside_string = 0;
+
+ return lexer;
+fail:
+ lexer_destroy(lexer);
+ return NULL;
+}
+
+#define CASE_FREE_TOKEN(type, data, ...) \
+ case type: if(data != NULL) { free(data); } break;
+
+void lexer_destroy(lexer_t lexer)
+{
+ if(!lexer) return;
+
+ if(lexer->tokens) {
+ for(size_t i = 0; i < lexer->ntokens; i++) {
+ struct token *token = &lexer->tokens[i];
+ switch(token->type) {
+ TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token)
+ default: break;
+ }
+ }
+ free(lexer->tokens);
+ }
+
+ free(lexer);
+}
+
+// ------------------------------------------------- //
+
+static int on_quote(lexer_t lexer)
+{
+ int ret = on_generic_separator(lexer, TOKEN_TOKENS);
+ if(ret == 0) {
+ lexer->inside_string = 1;
+ return ret;
+ } else if(ret > 0) {
+ lexer->inside_string = 0;
+ return 0;
+ }
+
+ return ret;
+}
+
+static int on_dot(lexer_t lexer)
+{
+ if(lexer->iden_sz != 0) return 1;
+ on_generic_separator(lexer, TOKEN_SPECIAL_DOT);
+}
+
+static int on_generic_separator(lexer_t lexer, enum token_enum type)
+{
+ if(lexer->inside_string) {
+ return 1;
+ }
+
+ if(save_current_identifier(lexer)) {
+ err("save_current_identifier: failed");
+ return -1;
+ }
+
+ if(type != TOKEN_TOKENS) {
+ if(save_empty_token(lexer, type) < 0) {
+ err("save_empty_token: failed");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int save_empty_token(lexer_t lexer, enum token_enum type)
+{
+ if(lexer->ntokens >= lexer->tokens_cap) {
+ err("tokens_cap of %ld has been reached", lexer->tokens_cap);
+ return -1;
+ }
+
+ lexer->tokens[lexer->ntokens++].type = type;
+ return lexer->ntokens - 1;
+}
+
+#define CHECK_IDEN(func) \
+ if((ret = func(lexer))) { \
+ if(ret < 0) { \
+ err(#func ": failed"); \
+ goto exit; \
+ } \
+ } else
+
+static int save_current_identifier(lexer_t lexer)
+{
+ int ret = 1;
+
+ if(lexer->iden_sz != 0) {
+ IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {}
+ }
+
+ ret = 0;
+exit:
+ memset(lexer->iden, 0, lexer->iden_sz);
+ lexer->iden_sz = 0;
+ return ret;
+}
+
+
+// ------------------------------------------------- //
+
+static int try_str(lexer_t lexer)
+{
+ if(!lexer->inside_string) return 0;
+
+ int i = save_empty_token(lexer, TOKEN_LITERAL_STRING);
+ if(i < 0) {
+ err("save_empty_token: failed");
+ return -1;
+ }
+
+ lexer->tokens[i].string = malloc(lexer->iden_sz+1);
+ if(!lexer->tokens[i].string) {
+ err("malloc: %s", strerror(errno));
+ return -1;
+ }
+
+ memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1);
+ return 1;
+}
+
+static int try_int(lexer_t lexer)
+{
+ errno = ERANGE + 1; // set errno to not ERANGE
+
+ char *endptr;
+ long num = strtol(lexer->iden, &endptr, 10);
+
+ if(*endptr != '\0') { // the whole string isn't a number
+ return 0;
+ }
+
+ if(errno == ERANGE) {
+ warn("Given integer literal %s is outside the possible range", lexer->iden);
+ }
+
+ int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT);
+ if(i < 0) {
+ err("save_empty_token: failed");
+ return -1;
+ }
+
+ lexer->tokens[i].num_int = num;
+ return 1;
+}
+
+static int try_float(lexer_t lexer)
+{
+ errno = ERANGE + 1; // set errno to not ERANGE
+
+ char *endptr;
+ float num = strtof(lexer->iden, &endptr);
+
+ if(*endptr != '\0') { // the whole string isn't a number
+ return 0;
+ }
+
+ if(errno == ERANGE) {
+ warn("Given float literal %s is outside the possible range", lexer->iden);
+ }
+
+ int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT);
+ if(i < 0) {
+ err("save_empty_token: failed");
+ return -1;
+ }
+
+ lexer->tokens[i].num_float = num;
+ return 1;
+}
+
+static int try_symbol(lexer_t lexer)
+{
+ int i = save_empty_token(lexer, TOKEN_SYMBOL);
+ if(i < 0) {
+ err("save_empty_token: failed");
+ return -1;
+ }
+
+ lexer->tokens[i].symbol = malloc(lexer->iden_sz+1);
+ if(!lexer->tokens[i].symbol) {
+ err("malloc: %s", strerror(errno));
+ return -1;
+ }
+
+ memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1);
+ return 1;
+}
+
+// ------------------------------------------------- //
+
+#ifdef DEBUG
+#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break;
+
+void lexer_print_tokens(lexer_t lexer)
+{
+ for(size_t i = 0; i < lexer->ntokens; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ info("Token %zu: %d", i, token->type);
+
+ switch(token->type) {
+ TOKEN_TYPES_INFO(CASE_PRINT, token);
+ default: break;
+ }
+ }
+}
+#endif
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..942be54
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,55 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#ifndef LEXER_IDEN_CAP
+#define LEXER_IDEN_CAP 512
+#endif
+
+typedef struct lexer *lexer_t;
+
+struct token {
+ enum token_enum {
+ TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE,
+ TOKEN_SPECIAL_DOT, TOKEN_SPECIAL_QUOTE,
+ TOKEN_LITERAL_NUM_INT, TOKEN_LITERAL_STRING,
+ TOKEN_LITERAL_NUM_FLOAT, TOKEN_SYMBOL,
+ TOKEN_TOKENS // number of token types
+ } type;
+
+ union {
+ char *symbol;
+ char *string;
+ long num_int;
+ float num_float
+ };
+};
+
+struct lexer {
+ struct token *tokens;
+ size_t tokens_cap;
+ size_t ntokens;
+
+ // identifier
+ char iden[LEXER_IDEN_CAP];
+ size_t iden_sz;
+
+ int inside_string;
+};
+
+// allocate a lexer with a maximum number of tokens_cap tokens
+// returns a lexer on success, NULL on fail
+lexer_t lexer_create(size_t tokens_cap);
+
+// destroy a lexer
+void lexer_destroy(lexer_t lexer);
+
+// turn the given non-null-terminated string str of lenght len
+// into into tokens
+// returns 0 on success
+int lexer_tokenize(lexer_t lexer, char *str, size_t len);
+
+#ifdef DEBUG
+void lexer_print_tokens(lexer_t lexer);
+#endif
+
+#endif
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..992e594
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "common.h"
+#include "lexer.h"
+// #include "ast.h"
+// #include "eval.h"
+
+#define READ_BUF_CAP 512
+#define DEFAULT_TOKENS_CAP 8192 // make it a command line arg
+
+lexer_t lexer = NULL;
+// ast_t root = NULL;
+// eval_t evaluator = NULL;
+
+int main(void)
+{
+ int ret = 1;
+
+ char *filename = "files/test1.lisp";
+
+ lexer = lexer_create(DEFAULT_TOKENS_CAP);
+ if(!lexer) {
+ err("lexer_create: failed");
+ goto fail;
+ }
+
+ // tokenize input
+ FILE *fp = fopen(filename, "r");
+ if(!fp) {
+ err("fopen: %s: %s", filename, strerror(errno));
+ goto fail;
+ }
+
+ char buf[READ_BUF_CAP]; size_t bytes = 0;
+ while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) {
+ if(lexer_tokenize(lexer, buf, bytes)) {
+ fclose(fp); goto fail;
+ }
+
+ if(bytes < READ_BUF_CAP) break;
+ }
+
+ fclose(fp);
+ lexer_print_tokens(lexer);
+ // --------------
+
+// ast = ast_create();
+// if(!ast) {
+// err("ast_create: failed");
+// goto fail;
+// }
+
+// if(ast_parse_lexer(ast, lexer)) {
+// err("ast_parse_lexer: failed");
+// goto fail;
+// }
+
+// evaluator = evaluator_create();
+// if(!evaluator) {
+// err("evaluator_create: failed");
+// goto fail;
+// }
+
+// if(evaluator_eval_ast(evaluator, ast)) {
+// err("evaluator_eval_ast: failed");
+// goto fail;
+// }
+
+ ret = 0;
+fail:
+// evaluator_destroy(eval);
+// ast_destroy(ast);
+ lexer_destroy(lexer);
+ return ret;
+}