diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.h | 38 | ||||
| -rw-r--r-- | src/common.h | 20 | ||||
| -rw-r--r-- | src/eval.h | 22 | ||||
| -rw-r--r-- | src/lexer.c | 330 | ||||
| -rw-r--r-- | src/lexer.h | 55 | ||||
| -rw-r--r-- | src/main.c | 77 | 
6 files changed, 542 insertions, 0 deletions
| diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..bd2e628 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,38 @@ +#ifndef AST_H +#define AST_H + +#include "lexer.h" + +typedef struct node_t *ast_t; +struct ast_node { +    enum { +        NODE_SEXP, +        NODE_SYMBOL, +        NODE_LITERAL, +    } type; + +    union { +        struct sexp { +            struct ast_node **children; +            size_t nchildren; +        } sexp; + +        char *symbol; + +        union { +            enum { +                NODE_LITERAL_NUM, +                NODE_LITERAL_STR, +            } type; + +            int number; +            char *string; +        } literal; +    }; +}; + +ast_t ast_create(); +void ast_destroy(ast_t ast); +int ast_parse_lexer(ast_t ast, lexer_t lex); + +#endif diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..a1daa03 --- /dev/null +++ b/src/common.h @@ -0,0 +1,20 @@ +#ifndef COMMON_H +#define COMMON_H + +#include <stdio.h> + +#define __RED__    "\033[0;31m" +#define __GREEN__  "\033[0;32m" +#define __YELLOW__ "\033[0;33m" +#define __RESET__  "\033[0m" + +#define STR(x) #x +#define XSTR(x) STR(x) + +#define info(...) do { fprintf(stdout, __GREEN__"[INFO]"__RESET__"  "__VA_ARGS__); fprintf(stdout, "\n"); } while(0) +#define err(...)  do { fprintf(stderr, __RED__"[ERROR]"__RESET__" "__FILE__":"XSTR(__LINE__)": "__VA_ARGS__); fprintf(stderr, "\n"); }while(0) +#define warn(...) do { fprintf(stderr, __YELLOW__"[WARN]"__RESET__"  "__FILE__":"XSTR(__LINE__)": "__VA_ARGS__); fprintf(stderr, "\n"); }while(0) +// #define info(...) printf(__VA_ARGS__); +// #define err(...)  printf(__VA_ARGS__); + +#endif diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..01ed827 --- /dev/null +++ b/src/eval.h @@ -0,0 +1,22 @@ +#ifndef EVAL_H +#define EVAL_H + +#include "ast.h" + +typedef struct eval *eval_t; + +// RunTime Tree +struct rtt { + +}; + +struct eval { +    struct rtt *root; +}; + +// TODO: add options for the evaluation +eval_t evaluator_create(); +void evaluator_destroy(eval_t evaluator); +int evaluator_eval_ast(eval_t evaluator, ast_t ast) + +#endif diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..1acfd6d --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,330 @@ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + +// TODO: handle escaped quotes +#include "common.h" +#include "lexer.h" + +// saves a token with no data +// returns the index of the saved token; < 0 on fail +static int save_empty_token(lexer_t lexer, enum token_enum type); + +// saves a token with data which is the current identifier (lexer->iden) +// returns 0 on success +static int save_current_identifier(lexer_t lexer); + +// used for tokens that separate things, type is optional (TOKEN_TOKENS for default) +// returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) +static int on_generic_separator(lexer_t lexer, enum token_enum type); +static int on_quote(lexer_t lexer); +static int on_dot(lexer_t lexer); + +// try to convert the identifier (lexer->iden) to a given type +// returns > 0 on sucess, 0 on fail (iden isnt the given type), +//         and < 0 on error +static int try_str(lexer_t lexer); +static int try_int(lexer_t lexer); +static int try_float(lexer_t lexer); +static int try_symbol(lexer_t lexer); + +#define SEPARATOR_CALLBACK_TBL(X, lexer)                                \ +    X(EQ('('),      on_generic_separator(lexer, TOKEN_PARENTHS_OPEN))   \ +    X(EQ(')'),      on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE))  \ +    X(EQ('\''),     on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE))   \ +    X(EQ('.'),      on_dot(lexer))                                      \ +    X(EQ('"'),      on_quote(lexer))                                    \ +    X(FN(isspace),  on_generic_separator(lexer, TOKEN_TOKENS)) + +#define IDENTIFY_IDENTIFIER_LIST(X)   \ +    X(try_str)                        \ +    X(try_int)                        \ +    X(try_float)                      \ +    X(try_symbol) + +// X(token type, what to free, how to print on screen) +#define TOKEN_TYPES_INFO(X, token)                                    \ +    X(TOKEN_PARENTHS_OPEN,     NULL,          "'('")                  \ +    X(TOKEN_PARENTHS_CLOSE,    NULL,          "')'")                  \ +    X(TOKEN_LITERAL_STRING,    token->string, "%s", token->string)    \ +    X(TOKEN_LITERAL_NUM_INT,   NULL,          "%ld", token->num_int)  \ +    X(TOKEN_LITERAL_NUM_FLOAT, NULL,          "%f", token->num_float) \ +    X(TOKEN_SYMBOL,            token->symbol, "%s", token->symbol)    \ + +#define EQ(ch) ch == +#define FN(f)  f + +// makes an if-else chain to test the character +// agains the seperator callback table +#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \ +    if(test_func(str[i])) {                              \ +        callback_ret = callback;                          \ +        if(callback_ret == 0) {                           \ +            continue;                                     \ +        } else if(callback_ret < 0) {                     \ +            err(#callback ": failed");                    \ +            return 1;                                     \ +        }                                                 \ +    } else + +int lexer_tokenize(lexer_t lexer, char *str, size_t len) +{ +    int callback_ret = 0; + +    for(size_t i = 0; i < len; i++) +    { +        SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {} + +        if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated +            err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); +            return 1; +        } + +        // add charater to identifier +        lexer->iden[lexer->iden_sz++] = str[i]; +    } + +    return 0; +} + +lexer_t lexer_create(size_t tokens_cap) +{ +    lexer_t lexer = malloc(sizeof(struct lexer)); +    if(!lexer) { +        err("malloc: %s", strerror(errno)); +        goto fail; +    } + +    lexer->tokens = calloc(tokens_cap, sizeof(struct token)); +    if(!lexer->tokens) { +        err("malloc %s", strerror(errno)); +        goto fail; +    } + +    for(size_t i = 0; i < tokens_cap; i++) { +        lexer->tokens[i].symbol = NULL; +    } + +    lexer->tokens_cap = tokens_cap; +    lexer->ntokens = 0; + +    memset(lexer->iden, 0, LEXER_IDEN_CAP); +    lexer->iden_sz = 0; + +    lexer->inside_string = 0; + +    return lexer; +fail: +    lexer_destroy(lexer); +    return NULL; +} + +#define CASE_FREE_TOKEN(type, data, ...) \ +    case type: if(data != NULL) { free(data); } break; + +void lexer_destroy(lexer_t lexer) +{ +    if(!lexer) return; + +    if(lexer->tokens) { +        for(size_t i = 0; i < lexer->ntokens; i++) { +            struct token *token = &lexer->tokens[i]; +            switch(token->type) { +                TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token) +            default: break; +            } +        } +        free(lexer->tokens); +    } + +    free(lexer); +} + +// ------------------------------------------------- // + +static int on_quote(lexer_t lexer) +{ +    int ret = on_generic_separator(lexer, TOKEN_TOKENS); +    if(ret == 0) { +        lexer->inside_string = 1; +        return ret; +    } else if(ret > 0) { +        lexer->inside_string = 0; +        return 0; +    } + +    return ret; +} + +static int on_dot(lexer_t lexer) +{ +    if(lexer->iden_sz != 0) return 1; +    on_generic_separator(lexer, TOKEN_SPECIAL_DOT); +} + +static int on_generic_separator(lexer_t lexer, enum token_enum type) +{ +    if(lexer->inside_string) { +        return 1; +    } + +    if(save_current_identifier(lexer)) { +        err("save_current_identifier: failed"); +        return -1; +    } + +    if(type != TOKEN_TOKENS) { +        if(save_empty_token(lexer, type) < 0) { +            err("save_empty_token: failed"); +            return -1; +        } +    } + +    return 0; +} + +static int save_empty_token(lexer_t lexer, enum token_enum type) +{ +    if(lexer->ntokens >= lexer->tokens_cap) { +        err("tokens_cap of %ld has been reached", lexer->tokens_cap); +        return -1; +    } + +    lexer->tokens[lexer->ntokens++].type = type; +    return lexer->ntokens - 1; +} + +#define CHECK_IDEN(func)                        \ +    if((ret = func(lexer))) {                   \ +        if(ret < 0) {                           \ +            err(#func ": failed");              \ +            goto exit;                          \ +        }                                       \ +    } else + +static int save_current_identifier(lexer_t lexer) +{ +    int ret = 1; + +    if(lexer->iden_sz != 0) { +        IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {} +    } + +    ret = 0; +exit: +    memset(lexer->iden, 0, lexer->iden_sz); +    lexer->iden_sz = 0; +    return ret; +} + + +// ------------------------------------------------- // + +static int try_str(lexer_t lexer) +{ +    if(!lexer->inside_string) return 0; + +    int i = save_empty_token(lexer, TOKEN_LITERAL_STRING); +    if(i < 0) { +        err("save_empty_token: failed"); +        return -1; +    } + +    lexer->tokens[i].string = malloc(lexer->iden_sz+1); +    if(!lexer->tokens[i].string) { +       err("malloc: %s", strerror(errno)); +       return -1; +    } + +    memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1); +    return 1; +} + +static int try_int(lexer_t lexer) +{ +    errno = ERANGE + 1; // set errno to not ERANGE + +    char *endptr; +    long num = strtol(lexer->iden, &endptr, 10); + +    if(*endptr != '\0') { // the whole string isn't a number +        return 0; +    } + +    if(errno == ERANGE) { +        warn("Given integer literal %s is outside the possible range", lexer->iden); +    } + +    int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT); +    if(i < 0) { +        err("save_empty_token: failed"); +        return -1; +    } + +    lexer->tokens[i].num_int = num; +    return 1; +} + +static int try_float(lexer_t lexer) +{ +    errno = ERANGE + 1; // set errno to not ERANGE + +    char *endptr; +    float num = strtof(lexer->iden, &endptr); + +    if(*endptr != '\0') { // the whole string isn't a number +        return 0; +    } + +    if(errno == ERANGE) { +        warn("Given float literal %s is outside the possible range", lexer->iden); +    } + +    int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT); +    if(i < 0) { +        err("save_empty_token: failed"); +        return -1; +    } + +    lexer->tokens[i].num_float = num; +    return 1; +} + +static int try_symbol(lexer_t lexer) +{ +    int i = save_empty_token(lexer, TOKEN_SYMBOL); +    if(i < 0) { +        err("save_empty_token: failed"); +        return -1; +    } + +    lexer->tokens[i].symbol = malloc(lexer->iden_sz+1); +    if(!lexer->tokens[i].symbol) { +       err("malloc: %s", strerror(errno)); +       return -1; +    } + +    memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1); +    return 1; +} + +// ------------------------------------------------- // + +#ifdef DEBUG +#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break; + +void lexer_print_tokens(lexer_t lexer) +{ +    for(size_t i = 0; i < lexer->ntokens; i++) { +        struct token *token = &lexer->tokens[i]; + +        info("Token %zu: %d", i, token->type); + +        switch(token->type) { +            TOKEN_TYPES_INFO(CASE_PRINT, token); +        default: break; +        } +    } +} +#endif diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..942be54 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,55 @@ +#ifndef LEXER_H +#define LEXER_H + +#ifndef LEXER_IDEN_CAP +#define LEXER_IDEN_CAP 512 +#endif + +typedef struct lexer *lexer_t; + +struct token { +    enum token_enum { +        TOKEN_PARENTHS_OPEN,     TOKEN_PARENTHS_CLOSE, +        TOKEN_SPECIAL_DOT,       TOKEN_SPECIAL_QUOTE, +        TOKEN_LITERAL_NUM_INT,   TOKEN_LITERAL_STRING, +        TOKEN_LITERAL_NUM_FLOAT, TOKEN_SYMBOL, +        TOKEN_TOKENS // number of token types +    } type; + +    union { +        char *symbol; +        char *string; +        long num_int; +        float num_float +    }; +}; + +struct lexer { +    struct token *tokens; +    size_t tokens_cap; +    size_t ntokens; + +    // identifier +    char iden[LEXER_IDEN_CAP]; +    size_t iden_sz; + +    int inside_string; +}; + +// allocate a lexer with a maximum number of tokens_cap tokens +// returns a lexer on success, NULL on fail +lexer_t lexer_create(size_t tokens_cap); + +// destroy a lexer +void lexer_destroy(lexer_t lexer); + +// turn the given non-null-terminated string str of lenght len +// into into tokens +// returns 0 on success +int lexer_tokenize(lexer_t lexer, char *str, size_t len); + +#ifdef DEBUG +void lexer_print_tokens(lexer_t lexer); +#endif + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..992e594 --- /dev/null +++ b/src/main.c @@ -0,0 +1,77 @@ +#include <stdio.h> +#include <string.h> +#include <errno.h> + +#include "common.h" +#include "lexer.h" +// #include "ast.h" +// #include "eval.h" + +#define READ_BUF_CAP 512 +#define DEFAULT_TOKENS_CAP 8192 // make it a command line arg + +lexer_t lexer = NULL; +// ast_t root = NULL; +// eval_t evaluator = NULL; + +int main(void) +{ +    int ret = 1; + +    char *filename = "files/test1.lisp"; + +    lexer = lexer_create(DEFAULT_TOKENS_CAP); +    if(!lexer) { +        err("lexer_create: failed"); +        goto fail; +    } + +    // tokenize input +    FILE *fp = fopen(filename, "r"); +    if(!fp) { +        err("fopen: %s: %s", filename, strerror(errno)); +        goto fail; +    } + +    char buf[READ_BUF_CAP]; size_t bytes = 0; +    while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) { +        if(lexer_tokenize(lexer, buf, bytes)) { +            fclose(fp); goto fail; +        } + +        if(bytes < READ_BUF_CAP) break; +    } + +    fclose(fp); +    lexer_print_tokens(lexer); +    // -------------- + +//     ast = ast_create(); +//     if(!ast) { +//         err("ast_create: failed"); +//         goto fail; +//     } + +//     if(ast_parse_lexer(ast, lexer)) { +//         err("ast_parse_lexer: failed"); +//         goto fail; +//     } + +//     evaluator = evaluator_create(); +//     if(!evaluator) { +//         err("evaluator_create: failed"); +//         goto fail; +//     } + +//     if(evaluator_eval_ast(evaluator, ast)) { +//         err("evaluator_eval_ast: failed"); +//         goto fail; +//     } + +    ret = 0; +fail: +//     evaluator_destroy(eval); +//     ast_destroy(ast); +    lexer_destroy(lexer); +    return ret; +} | 
