diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.h | 38 | ||||
| -rw-r--r-- | src/common.c | 33 | ||||
| -rw-r--r-- | src/common.h | 10 | ||||
| -rw-r--r-- | src/eval.h | 16 | ||||
| -rw-r--r-- | src/lexer.c | 334 | ||||
| -rw-r--r-- | src/lexer.h | 31 | ||||
| -rw-r--r-- | src/main.c | 66 | ||||
| -rw-r--r-- | src/parser.c | 258 | ||||
| -rw-r--r-- | src/parser.h | 60 | ||||
| -rw-r--r-- | src/value.c | 244 | ||||
| -rw-r--r-- | src/value.h | 62 | 
11 files changed, 844 insertions, 308 deletions
| diff --git a/src/ast.h b/src/ast.h deleted file mode 100644 index bd2e628..0000000 --- a/src/ast.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef AST_H -#define AST_H - -#include "lexer.h" - -typedef struct node_t *ast_t; -struct ast_node { -    enum { -        NODE_SEXP, -        NODE_SYMBOL, -        NODE_LITERAL, -    } type; - -    union { -        struct sexp { -            struct ast_node **children; -            size_t nchildren; -        } sexp; - -        char *symbol; - -        union { -            enum { -                NODE_LITERAL_NUM, -                NODE_LITERAL_STR, -            } type; - -            int number; -            char *string; -        } literal; -    }; -}; - -ast_t ast_create(); -void ast_destroy(ast_t ast); -int ast_parse_lexer(ast_t ast, lexer_t lex); - -#endif diff --git a/src/common.c b/src/common.c new file mode 100644 index 0000000..084c584 --- /dev/null +++ b/src/common.c @@ -0,0 +1,33 @@ +#include <string.h> +#include <errno.h> +#include "common.h" + +void *xmalloc(size_t size) +{ +    void *ret; +    if(!(ret = malloc(size))) { +        die("malloc: %s", strerror(errno)); +    } + +    return ret; +} + +void *xcalloc(size_t nmemb, size_t size) +{ +    void *ret; +    if(!(ret = calloc(nmemb, size))) { +        die("calloc: %s", strerror(errno)); +    } + +    return ret; +} + +void *xrealloc(void *ptr, size_t size) +{ +    void *ret; +    if(!(ret = realloc(ptr, size))) { +        die("realloc: %s", strerror(errno)); +    } + +    return ret; +} diff --git a/src/common.h b/src/common.h index a1daa03..f67af22 100644 --- a/src/common.h +++ b/src/common.h @@ -2,6 +2,7 @@  #define COMMON_H  #include <stdio.h> +#include <stdlib.h>  #define __RED__    "\033[0;31m"  #define __GREEN__  "\033[0;32m" @@ -17,4 +18,13 @@  // #define info(...) printf(__VA_ARGS__);  // #define err(...)  printf(__VA_ARGS__); +#define die(...) do {     \ +        err(__VA_ARGS__); \ +        abort();          \ +    } while(0) + +void *xmalloc(size_t size); +void *xcalloc(size_t nmemb, size_t size); +void *xrealloc(void *ptr, size_t size); +  #endif @@ -1,22 +1,16 @@  #ifndef EVAL_H  #define EVAL_H -#include "ast.h" +#include "parser.h" -typedef struct eval *eval_t; - -// RunTime Tree -struct rtt { - -}; +typedef struct eval *evaluator_t;  struct eval { -    struct rtt *root;  };  // TODO: add options for the evaluation -eval_t evaluator_create(); -void evaluator_destroy(eval_t evaluator); -int evaluator_eval_ast(eval_t evaluator, ast_t ast) +evaluator_t eval_create(); +void eval_destroy(evaluator_t evaluator); +int eval_ast(evaluator_t evaluator, parser_t ast);  #endif diff --git a/src/lexer.c b/src/lexer.c index 71eed79..9659bb4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,64 +3,51 @@  #include <ctype.h>  #include <errno.h> -// TODO: handle escaped quotes  #include "common.h"  #include "lexer.h" +#include "value.h" + +// TODO: handle escaping  // saves a token with no data  // returns the index of the saved token; < 0 on fail -static int save_empty_token(lexer_t lexer, enum token_enum type); +static int token_add(lexer_t lexer, enum token_enum type); -// saves a token with data which is the current identifier (lexer->iden) +// saves a token with the current identifier (lexer->iden)  // returns 0 on success -static int save_current_identifier(lexer_t lexer); +static int token_add_iden(lexer_t lexer);  // used for tokens that separate things  //   if type is TOKEN_TOKENS, then no empty token will be saved  // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden)  static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_quote(lexer_t lexer); -static int on_dot(lexer_t lexer); - -// try to convert the identifier (lexer->iden) to a given type -// returns > 0 on sucess, 0 on fail (iden isnt the given type), -//         and < 0 on error -static int try_str(lexer_t lexer); -static int try_int(lexer_t lexer); -static int try_float(lexer_t lexer); -static int try_symbol(lexer_t lexer); - -#define SEPARATOR_CALLBACK_TBL(X, lexer)                                \ -    X(EQ('('),      on_generic_separator(lexer, TOKEN_PARENTHS_OPEN))   \ -    X(EQ(')'),      on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE))  \ -    X(EQ('\''),     on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE))   \ -    X(EQ('.'),      on_dot(lexer))                                      \ -    X(EQ('"'),      on_quote(lexer))                                    \ -    X(FN(isspace),  on_generic_separator(lexer, TOKEN_TOKENS)) - -// X(token type, what to free, how to print on screen) -#define TOKEN_TYPES_INFO(X, token)                                      \ -    X(TOKEN_PARENTHS_OPEN,     NULL,          "(")                      \ -    X(TOKEN_PARENTHS_CLOSE,    NULL,          ")")                      \ -    X(TOKEN_SPECIAL_QUOTE,     NULL,          "'")                      \ -    X(TOKEN_SPECIAL_DOT,       NULL,          ".")                      \ -    X(TOKEN_LITERAL_STRING,    token->string, "'%s'", token->string)    \ -    X(TOKEN_LITERAL_NUM_INT,   NULL,          "'%ld'", token->num_int)  \ -    X(TOKEN_LITERAL_NUM_FLOAT, NULL,          "'%f'", token->num_float) \ -    X(TOKEN_SYMBOL,            token->symbol, "'%s'", token->symbol) - -#define IDENTIFY_IDENTIFIER_LIST(X)   \ -    X(try_str)                        \ -    X(try_int)                        \ -    X(try_float)                      \ -    X(try_symbol) +static int on_double_quote(lexer_t lexer);  #define EQ(ch) ch == -#define FN(f)  f + +#define SEPARATOR_CALLBACK_TBL(X, lexer)                            \ +/*  X(test,     what to execute if the test succeeds) */            \ +    X(EQ('('),  on_generic_separator(lexer, TOKEN_PARENTHS_OPEN))   \ +    X(EQ(')'),  on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE))  \ +    X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE))   \ +    X(EQ('"'),  on_double_quote(lexer))                             \ +    X(isspace,  on_generic_separator(lexer, TOKEN_TOKENS)) + +#define FN(fn, arg) "%s", fn(arg, buf, buf_sz) + +#define MANAGE_TOKEN_TBL(X, token)           \ +/*  X(type,  how to free,  how to print) */  \ +    X(TOKEN_PARENTHS_OPEN,   ;, "(")         \ +    X(TOKEN_PARENTHS_CLOSE,  ;, ")")         \ +    X(TOKEN_SPECIAL_QUOTE,   ;, "'")         \ +    X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \ +    X(TOKEN_TOKENS,          ;, "")          \ + +// ---------- Exported Functions ---------- //  // makes an if-else chain to test the character -// agains the seperator callback table -#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \ +// agains the separator callback table +#define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \      if(test_func(str[i])) {                               \          callback_ret = callback;                          \          if(callback_ret == 0) {                           \ @@ -77,7 +64,7 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)      for(size_t i = 0; i < len; i++)      { -        SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {} +        SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {}          if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated              err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -93,49 +80,35 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)  lexer_t lexer_create(size_t tokens_cap)  { -    lexer_t lexer = malloc(sizeof(struct lexer)); -    if(!lexer) { -        err("malloc: %s", strerror(errno)); -        goto fail; -    } +    lexer_t lexer = xmalloc(sizeof(struct lexer)); -    lexer->tokens = calloc(tokens_cap, sizeof(struct token)); -    if(!lexer->tokens) { -        err("malloc %s", strerror(errno)); -        goto fail; -    } +    lexer->tokens_cap = tokens_cap; +    lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token));      for(size_t i = 0; i < tokens_cap; i++) { -        lexer->tokens[i].symbol = NULL; +        lexer->tokens[i].type = TOKEN_TOKENS;      } -    lexer->tokens_cap = tokens_cap; -    lexer->ntokens = 0; - -    memset(lexer->iden, 0, LEXER_IDEN_CAP); -    lexer->iden_sz = 0; - -    lexer->inside_string = 0; - +    lexer_reset(lexer);      return lexer; -fail: -    lexer_destroy(lexer); -    return NULL;  } -#define CASE_FREE_TOKEN(type, data, ...) \ -    case type: if(data != NULL) { free(data); } break; +#define CASE_FREE(type, free_func, ...) case type: free_func; break;  void lexer_destroy(lexer_t lexer)  {      if(!lexer) return;      if(lexer->tokens) { -        for(size_t i = 0; i < lexer->ntokens; i++) { +        for(size_t i = 0; i < lexer->ntokens; i++) +        {              struct token *token = &lexer->tokens[i]; -            switch(token->type) { -                TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token) -            default: break; +             +            switch(lexer->tokens[i].type) { +                MANAGE_TOKEN_TBL(CASE_FREE, token); +            default: +                err("lexer_reset: Unknown token type given"); +                break;              }          }          free(lexer->tokens); @@ -144,18 +117,65 @@ void lexer_destroy(lexer_t lexer)      free(lexer);  } -// ------------------------------------------------- // +void lexer_reset(lexer_t lexer) +{ +    for(size_t i = 0; i < lexer->tokens_cap; i++) { +        struct token *token = &lexer->tokens[i]; + +        switch(token->type) { +            MANAGE_TOKEN_TBL(CASE_FREE, token); +        default: +            err("lexer_reset: Unknown token type given"); +            break; +        } + +        token->type = TOKEN_TOKENS; +        token->value = NULL; +    } + +    lexer->ntokens = 0; + +    memset(lexer->iden, 0, LEXER_IDEN_CAP); +    lexer->iden_sz = 0; + +    lexer->inside_string = 0; +} + +// print based on the given way to print +#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; + +void lexer_print_tokens(lexer_t lexer) +{ +    // for the printing (see MANAGE_TOKEN_TBL) +    char buf[LEXER_IDEN_CAP]; +    size_t buf_sz = LEXER_IDEN_CAP; + +    for(size_t i = 0; i < lexer->ntokens; i++) { +        struct token *token = &lexer->tokens[i]; + +        switch(token->type) { +            MANAGE_TOKEN_TBL(CASE_PRINT, token); +        default: +            err("lexer_print_tokens: Unknown token given"); +            return; +        } +    } +} + +// ---------- Callback Functions ----------- // -static int on_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer)  {      int ret = on_generic_separator(lexer, TOKEN_TOKENS); -    if(ret <= 0) { // it either failed or worked, both not inside a string -        lexer->inside_string = 1; +    if(ret < 0) {          return ret; +    } else if(ret == 0) { +        lexer->inside_string = 1; +        return 1;      } -    if(save_current_identifier(lexer)) { -        err("save_current_identifier: failed"); +    if(token_add_iden(lexer)) { +        err("token_add_iden: failed");          return -1;      } @@ -163,26 +183,20 @@ static int on_quote(lexer_t lexer)      return 0;  } -static int on_dot(lexer_t lexer) -{ -    if(lexer->iden_sz != 0) return 1; -    return on_generic_separator(lexer, TOKEN_SPECIAL_DOT); -} -  static int on_generic_separator(lexer_t lexer, enum token_enum type)  {      if(lexer->inside_string) {          return 1;      } -    if(save_current_identifier(lexer)) { -        err("save_current_identifier: failed"); +    if(token_add_iden(lexer)) { +        err("token_add_iden: failed");          return -1;      }      if(type != TOKEN_TOKENS) { -        if(save_empty_token(lexer, type) < 0) { -            err("save_empty_token: failed"); +        if(token_add(lexer, type) < 0) { +            err("token_add: failed");              return -1;          }      } @@ -190,147 +204,41 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type)      return 0;  } -static int save_empty_token(lexer_t lexer, enum token_enum type) +// ---------- Token Functions ----------- // + +static int token_add(lexer_t lexer, enum token_enum type)  {      if(lexer->ntokens >= lexer->tokens_cap) {          err("tokens_cap of %ld has been reached", lexer->tokens_cap);          return -1;      } -    lexer->tokens[lexer->ntokens++].type = type; -    return lexer->ntokens - 1; +    lexer->tokens[lexer->ntokens].type = type; +    return lexer->ntokens++;  } -#define CHECK_IDEN(func)                        \ -    if((ret = func(lexer))) {                   \ -        if(ret < 0) {                           \ -            err(#func ": failed");              \ -            goto exit;                          \ -        }                                       \ -    } else - -static int save_current_identifier(lexer_t lexer) +static int token_add_iden(lexer_t lexer)  {      int ret = 1; +    if(!lexer->iden_sz) return 0; -    if(lexer->iden_sz != 0) { -        IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {} +    int i = token_add(lexer, TOKEN_VALUE); +    if(i < 0) { +        err("token_add: failed"); +        goto exit;      } -    ret = 0; +    value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); +    if(ret > 0) { +        value = value_create(VALUE_SYMBOL, lexer->iden, &ret); +    } else if(ret < 0) { +        err("value_create: failed"); +        goto exit; +    } +     +    lexer->tokens[i].value = value;  exit:      memset(lexer->iden, 0, lexer->iden_sz);      lexer->iden_sz = 0;      return ret;  } - - -// ------------------------------------------------- // - -static int try_str(lexer_t lexer) -{ -    if(!lexer->inside_string) return 0; - -    int i = save_empty_token(lexer, TOKEN_LITERAL_STRING); -    if(i < 0) { -        err("save_empty_token: failed"); -        return -1; -    } - -    lexer->tokens[i].string = malloc(lexer->iden_sz+1); -    if(!lexer->tokens[i].string) { -       err("malloc: %s", strerror(errno)); -       return -1; -    } - -    memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1); -    return 1; -} - -static int try_int(lexer_t lexer) -{ -    errno = ERANGE + 1; // set errno to not ERANGE - -    char *endptr; -    long num = strtol(lexer->iden, &endptr, 10); - -    if(*endptr != '\0') { // the whole string isn't a number -        return 0; -    } - -    if(errno == ERANGE) { -        warn("Given integer literal %s is outside the possible range", lexer->iden); -    } - -    int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT); -    if(i < 0) { -        err("save_empty_token: failed"); -        return -1; -    } - -    lexer->tokens[i].num_int = num; -    return 1; -} - -static int try_float(lexer_t lexer) -{ -    errno = ERANGE + 1; // set errno to not ERANGE - -    char *endptr; -    float num = strtof(lexer->iden, &endptr); - -    if(*endptr != '\0') { // the whole string isn't a number -        return 0; -    } - -    if(errno == ERANGE) { -        warn("Given float literal %s is outside the possible range", lexer->iden); -    } - -    int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT); -    if(i < 0) { -        err("save_empty_token: failed"); -        return -1; -    } - -    lexer->tokens[i].num_float = num; -    return 1; -} - -static int try_symbol(lexer_t lexer) -{ -    int i = save_empty_token(lexer, TOKEN_SYMBOL); -    if(i < 0) { -        err("save_empty_token: failed"); -        return -1; -    } - -    lexer->tokens[i].symbol = malloc(lexer->iden_sz+1); -    if(!lexer->tokens[i].symbol) { -       err("malloc: %s", strerror(errno)); -       return -1; -    } - -    memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1); -    return 1; -} - -// ------------------------------------------------- // - -#ifdef DEBUG -#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break; - -void lexer_print_tokens(lexer_t lexer) -{ -    for(size_t i = 0; i < lexer->ntokens; i++) { -        struct token *token = &lexer->tokens[i]; - -        info("Token %zu: %d", i, token->type); - -        switch(token->type) { -            TOKEN_TYPES_INFO(CASE_PRINT, token); -        default: break; -        } -    } -} -#endif diff --git a/src/lexer.h b/src/lexer.h index 942be54..fc13f24 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,6 +1,8 @@  #ifndef LEXER_H  #define LEXER_H +#include "value.h" +  #ifndef LEXER_IDEN_CAP  #define LEXER_IDEN_CAP 512  #endif @@ -9,19 +11,12 @@ typedef struct lexer *lexer_t;  struct token {      enum token_enum { -        TOKEN_PARENTHS_OPEN,     TOKEN_PARENTHS_CLOSE, -        TOKEN_SPECIAL_DOT,       TOKEN_SPECIAL_QUOTE, -        TOKEN_LITERAL_NUM_INT,   TOKEN_LITERAL_STRING, -        TOKEN_LITERAL_NUM_FLOAT, TOKEN_SYMBOL, +        TOKEN_PARENTHS_OPEN, TOKEN_PARENTHS_CLOSE, +        TOKEN_SPECIAL_QUOTE, TOKEN_VALUE,          TOKEN_TOKENS // number of token types      } type; -    union { -        char *symbol; -        char *string; -        long num_int; -        float num_float -    }; +    value_t value;  };  struct lexer { @@ -37,19 +32,25 @@ struct lexer {  };  // allocate a lexer with a maximum number of tokens_cap tokens -// returns a lexer on success, NULL on fail +// returns a lexer on success and NULL on fail  lexer_t lexer_create(size_t tokens_cap); -// destroy a lexer +// deallocate a lexer  void lexer_destroy(lexer_t lexer); +// reset a lexer to its default state without destroying it (faster) +void lexer_reset(lexer_t lexer); + +// self explanatory +void lexer_print_tokens(lexer_t lexer); +  // turn the given non-null-terminated string str of lenght len  // into into tokens  // returns 0 on success  int lexer_tokenize(lexer_t lexer, char *str, size_t len); -#ifdef DEBUG -void lexer_print_tokens(lexer_t lexer); -#endif +// checks whether the lexer has finished (temp buffers like iden are empty) +// returns 1 on finished, 0 on not finished +int lexer_has_finished(lexer_t lexer);  #endif @@ -4,15 +4,17 @@  #include "common.h"  #include "lexer.h" -// #include "ast.h" -// #include "eval.h" +#include "parser.h" +#include "eval.h" + +// TODO: the lexer, parser, and eval functions should return -1 on fatal, and 1 on non fatal error  #define READ_BUF_CAP 512  #define DEFAULT_TOKENS_CAP 8192 // make it a command line arg  lexer_t lexer = NULL; -// ast_t root = NULL; -// eval_t evaluator = NULL; +parser_t parser = NULL; +// evaluator_t eval = NULL;  int main(void)  { @@ -26,13 +28,13 @@ int main(void)          goto fail;      } -    // tokenize input      FILE *fp = fopen(filename, "r");      if(!fp) {          err("fopen: %s: %s", filename, strerror(errno));          goto fail;      } +    // tokenize input      char buf[READ_BUF_CAP]; size_t bytes = 0;      while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) {          if(lexer_tokenize(lexer, buf, bytes)) { @@ -42,36 +44,38 @@ int main(void)          if(bytes < READ_BUF_CAP) break;      } -    fclose(fp);      lexer_print_tokens(lexer); -    // -------------- - -//     ast = ast_create(); -//     if(!ast) { -//         err("ast_create: failed"); -//         goto fail; -//     } - -//     if(ast_parse_lexer(ast, lexer)) { -//         err("ast_parse_lexer: failed"); -//         goto fail; -//     } - -//     evaluator = evaluator_create(); -//     if(!evaluator) { -//         err("evaluator_create: failed"); -//         goto fail; -//     } - -//     if(evaluator_eval_ast(evaluator, ast)) { -//         err("evaluator_eval_ast: failed"); -//         goto fail; -//     } + +    fclose(fp); + +    parser = parser_create(); +    if(!parser) { +        err("parser_create: failed"); +        goto fail; +    } + +    if(parser_parse_lexer(parser, lexer)) { +        err("parser_parse_lexer: failed"); +        goto fail; +    } + +    parser_print_ast(parser); + +    // evaluator = eval_create(); +    // if(!evaluator) { +    //     err("eval_create: failed"); +    //     goto fail; +    // } + +    // if(eval_ast(eval, ast)) { +    //     err("eval_ast: failed"); +    //     goto fail; +    // }      ret = 0;  fail: -//     evaluator_destroy(eval); -//     ast_destroy(ast); +    // eval_destroy(eval); +    parser_destroy(parser);      lexer_destroy(lexer);      return ret;  } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..c4e37bd --- /dev/null +++ b/src/parser.c @@ -0,0 +1,258 @@ +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "common.h" +#include "parser.h" + +// TODO: Do all the todos + +// adds another child of type type to the sexp sexp  +// returns the index of the added type and < 0 on fail +static size_t sexp_add(struct sexp *sexp, enum ast_type type); + +// self-explanatory +static void sexp_init(struct sexp *sexp); +static void sexp_print(struct sexp *sexp, int indent); +static void sexp_free(struct sexp *sexp); +static void ast_free(struct ast *ast); + +static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp); +static struct sexp *quote_stack_pop(struct quote_node **head, int peak); + +// returns 0 on success +static int on_paren(parser_t parser, int paren_type); // 0 is open, 1 is close +static int on_quote(parser_t parent); +static int on_value(parser_t parent, value_t value); + +#define TOKEN_CALLBACK_TBL(X, parser, token)        \ +/*  X(test,                 execute on succes) */   \ +    X(TOKEN_PARENTHS_OPEN,  on_paren(parser, 0))    \ +    X(TOKEN_PARENTHS_CLOSE, on_paren(parser, 1))    \ +    X(TOKEN_SPECIAL_QUOTE,  on_quote(parser))       \ +    X(TOKEN_VALUE,          on_value(parser, token->value)) + +#define FN(fn, arg) "%s", fn(arg, buf, buf_sz) + +#define MANAGE_AST_TBL(X, ast)                                            \ +    X(AST_SEXP,  sexp_free(&ast->sexp), "|")                              \ +    X(AST_VALUE, value_destroy(ast->value), FN(value_string, ast->value)) \ + +// ---------- Exported Functions ---------- // + +#define CASE_TYPE(type, callback)               \ +    case type:                                  \ +    if(callback) {                              \ +        err(#callback ": failed");              \ +        return -1;                              \ +    } break; + +int parser_parse_lexer(parser_t parser, lexer_t lexer) +{     +    for(size_t i = 0; i < lexer->ntokens; i++) { +        struct token *token = &lexer->tokens[i]; + +        switch(token->type) {             +            TOKEN_CALLBACK_TBL(CASE_TYPE, parser, token); +        default: +            err("parser_parse_lexer: Unknown token type given"); +            break; +        } +         +        if((token->type != TOKEN_SPECIAL_QUOTE) && +           (parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1))) { +            if(on_paren(parser, 1)); +            quote_stack_pop(&parser->quote_head, 0); +        } +    } + +    if(&parser->root == parser->cur_sexp) { +        return 0; +    } else { +        return 1; +    }    +} + +parser_t parser_create() +{    +    parser_t parser = xmalloc(sizeof(struct parser)); + +#define VALUE_CREATE(str)                  \ +    value_create(VALUE_SYMBOL, str, &ret); \ +    if(ret) {                              \ +        err("value_create: failed");       \ +        return NULL;                       \ +    } + +    int ret = 0; +    parser->begin_symbol_value = VALUE_CREATE("begin"); +    parser->quote_symbol_value = VALUE_CREATE("quote"); + +    parser->root.nchildren = 0; +    parser->quote_head = NULL; +    parser_reset(parser); + +    return parser; +} + +void parser_destroy(parser_t parser) +{ +    if(!parser) return; + +    value_destroy(parser->begin_symbol_value); +    value_destroy(parser->quote_symbol_value); +     +    sexp_free(&parser->root); +    free(parser); +} + +void parser_reset(parser_t parser) +{ +    struct sexp *root = &parser->root; + +    for(size_t i = 0; i < root->nchildren; i++) { +        ast_free(&root->children[i]); +    } + +    sexp_init(root); +    size_t index = sexp_add(root, AST_VALUE); +     +    root->children[index].value = value_copy(parser->begin_symbol_value); +    parser->cur_sexp = &parser->root; + +    while(quote_stack_pop(&parser->quote_head, 0) != NULL); +} + + +void parser_print_ast(parser_t parser) +{ +    sexp_print(&parser->root, 0); +} + +// ---------- Callback Functions ---------- // + +static int on_paren(parser_t parser, int paren_type) +{     +    if(paren_type) { // !0 closing paren +        parser->cur_sexp = parser->cur_sexp->prev; +    } else { // 0 opening paren +        size_t index = sexp_add(parser->cur_sexp, AST_SEXP); + +        struct sexp *prev = parser->cur_sexp; +        struct sexp *new = &prev->children[index].sexp; +         +        sexp_init(new); +         +        parser->cur_sexp = new; +        parser->cur_sexp->prev = prev; +    } + +    return 0; +} + +static int on_quote(parser_t parser) +{ +    // new sexp +    on_paren(parser, 0); + +    // add symbol to the sexp +    on_value(parser, parser->quote_symbol_value); + +    // save the current quote sexp +    quote_stack_push(&parser->quote_head, parser->cur_sexp); +    return 0; +} + +static int on_value(parser_t parser, value_t value) +{     +    size_t index = sexp_add(parser->cur_sexp, AST_VALUE); + +    value_t copied = value_copy(value); + +    parser->cur_sexp->children[index].value = copied; +    return 0; +} + +// ---------- Parser Functions ---------- // + +static size_t sexp_add(struct sexp *sexp, enum ast_type type) +{    +    sexp->children = xrealloc(sexp->children, sizeof(struct ast) * (sexp->nchildren+1)); + +    sexp->children[sexp->nchildren].type = type; +    return sexp->nchildren++; +} + +static void sexp_init(struct sexp *sexp) +{ +    sexp->children = NULL; +    sexp->nchildren = 0; +    sexp->prev = NULL; +} + +static void sexp_print(struct sexp *sexp, int indent) +{ +    for(size_t i = 0; i < sexp->nchildren; i++) { +        struct ast *child = &sexp->children[i]; + +        if(child->type == AST_SEXP) { +            sexp_print(&child->sexp, indent + 2); +            continue; +        } + +        char buf[LEXER_IDEN_CAP]; +        size_t buf_sz = LEXER_IDEN_CAP; +         +        info("%d %s", indent, value_string(child->value, buf, buf_sz)); +    } +} + +static void sexp_free(struct sexp *sexp) +{ +    for(size_t i = 0; i < sexp->nchildren; i++){ +        ast_free(&sexp->children[i]); +    } +    free(sexp->children); +} + +#define CASE_FREE(type, free_func, print_func)  \ +    case type: free_func; return; + +static void ast_free(struct ast *ast) +{ +    if(!ast) return; + +    switch(ast->type) { +        MANAGE_AST_TBL(CASE_FREE, ast) +    default: +        err("ast_free: Unknown ast type given"); +        break; +    } +} + +static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp) +{ +    struct quote_node *node = xmalloc(sizeof(struct quote_node)); + +    node->prev = *head; +    node->cur_sexp = cur_sexp; +    *head = node; +} + +static struct sexp *quote_stack_pop(struct quote_node **head, int peak) +{ +    if(*head == NULL) { +        return NULL; +    } + +    struct sexp *sexp = (*head)->cur_sexp; +     +    if(!peak) { +        struct quote_node *prev = (*head)->prev; +        free(*head); +        *head = prev; +    } +     +    return sexp; +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..8fc5d6c --- /dev/null +++ b/src/parser.h @@ -0,0 +1,60 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "value.h" +#include "lexer.h" + +typedef struct parser *parser_t; + +struct ast { +    enum ast_type { +        AST_SEXP, +        AST_VALUE, +        AST_TYPES // number of types +    } type; + +    union { +        struct sexp { +            struct ast *children; +            size_t nchildren; + +            struct sexp *prev; +        } sexp; + +        value_t value; +    }; +}; + +struct parser { +    struct sexp root; +    struct sexp *cur_sexp; + +    struct quote_node { +        struct sexp *cur_sexp; +        struct quote_node *prev; +    } *quote_head; + +    value_t begin_symbol_value; +    value_t quote_symbol_value; +}; + +// allocate a parser +// returns a parser on success and NULL on fail +parser_t parser_create(); + +// deallocate a parser +void parser_destroy(parser_t parser); + +// reset a parser to its default state without destroying it +// returns 0 on success +void parser_reset(parser_t parser); + +// self explanatory +void parser_print_ast(parser_t parser); + +// turn the given lexer (which has already has tokens) into an ast +// returns 0 on success, > 0 when more tokens are needed, +//         and < 0 on a fatal error +int parser_parse_lexer(parser_t parser, lexer_t lexer); + +#endif diff --git a/src/value.c b/src/value.c new file mode 100644 index 0000000..289aa0f --- /dev/null +++ b/src/value.c @@ -0,0 +1,244 @@ +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "common.h" +#include "value.h" + +// TODO: add the other types + +// save data as a symbol +// return 0 on sucess, < 0 on error +static int symbol_save(char **symbol, char *data); +// same as value_string +static char *symbol_string(char *symbol, char *buf, size_t buf_sz); + +// try to save data as a literal +// returns 0 on sucess, > 0 on fail (data isnt a possible literal), +//         and < 0 on error +static int literal_save(struct literal *literal, char *data); +static void literal_free(struct literal *literal); +// same as value_string +static char *literal_string(struct literal *literal, char *buf, size_t buf_sz); + +// for a function that saves string data as a value +#define S_FN(fn, arg) fn(arg, (char *)data) +// for a function that saves a value as a string in buf[buf_sz] +#define P_FN(fn, arg) fn(arg, buf, buf_sz) + +#define MANAGE_VALUE_TBL(X, value)                                           \ +/*  X(type,           save function,                                         \ +      how to free,                    how to save to string) */              \ +    X(VALUE_SYMBOL,   S_FN(symbol_save, &value->symbol),                     \ +      free(value->symbol),            P_FN(symbol_string, value->symbol))    \ +    X(VALUE_LITERAL,  S_FN(literal_save, &value->literal),                   \ +      literal_free(&value->literal),  P_FN(literal_string, &value->literal)) \ +//  X(VALUE_PAIR, ...) +//  X(VALUE_LIST, ...) +//  X(VALUE_PROC, ...) + +// ------ Exported Functions Implementations ------- // +// ------------------------------------------------- // + +#define CASE_SAVE(type, save_func, free_func, string_func) \ +    case type: *ret = save_func; break; + +value_t value_create(enum value_type type, void *data, int *ret) +{ +    value_t value = xmalloc(sizeof(struct value)); + +    value->type = VALUE_TYPES; +    value->references = 1; + +    switch(type) { +        MANAGE_VALUE_TBL(CASE_SAVE, value); +    default: +        err("value_create: Unknown value type given"); +        *ret = -1; +        break; +    } + +    if(!*ret) { +        value->type = type; +        return value; +    } + +    value_destroy(value); +    return NULL; +} + +value_t value_copy(value_t value) +{ +    value->references++; +    return value; +} + +#define CASE_FREE(type, save_func, free_func, string_func) \ +    case type: free_func; break; + +void value_destroy(value_t value) +{ +    if(!value) return; +    if(--value->references > 0) { +        return; +    } + + +    switch(value->type) { +        MANAGE_VALUE_TBL(CASE_FREE, value); +    default: break; +    } + +    free(value); +} + +#define CASE_STRING(type, save_func, free_func, string_func) \ +    case type: return string_func; + +char *value_string(value_t value, char *buf, size_t buf_sz) +{ +    switch(value->type) { +        MANAGE_VALUE_TBL(CASE_STRING, value); +    default: return NULL; +    } +} + +// ---------------- Symbol Functions --------------- // + +static int symbol_save(char **symbol, char *data) +{ +    size_t len = strlen(data) + 1; + +    *symbol = xmalloc(len); + +    memcpy(*symbol, data, len); +    return 0; +} + +static char *symbol_string(char *symbol, char *buf, size_t buf_sz) +{ +    size_t len = strlen(symbol) + 1; +    if(len> buf_sz) { +        return NULL; +    } + +    memcpy(buf, symbol, len); +    return buf; +} + +// --------------- Literal Functions --------------- // + +// write a value in a printf format to buf[buf_sz] +#define EX(...) (((size_t)snprintf(buf, buf_sz, __VA_ARGS__) > buf_sz) ? NULL : buf) + +#define MANAGE_LITERAL_TBL(X, literal)                                         \ +/*  X(type,               how to free             how to print) */             \ +    X(LITERAL_STRING,     free(literal->string),  EX("%s",  literal->string))  \ +    X(LITERAL_NUM_INT,    ;,  EX("%ld", literal->num_int))                     \ +    X(LITERAL_NUM_FLOAT,  ;,  EX("%f",  literal->num_float)) + +// try to convert the data to a literal +// returns 0 on success, > 0 on fail (data isnt a literal), +//         and < 0 on error +static int literal_try_string(struct literal *literal, char *data); +static int literal_try_int   (struct literal *literal, char *data); +static int literal_try_float (struct literal *literal, char *data); + +// list of the try_type functions (order matters) +typedef int (*try_func)(struct literal *, char *); +try_func literal_try_funcs[] = { +    literal_try_string, +    literal_try_int, +    literal_try_float, +}; + +static int literal_save(struct literal *literal, char *data) +{ +    int ret = 0; + +    for(size_t i = 0; i < sizeof(literal_try_funcs)/sizeof(try_func); i++) { +        // <= 0 means that it either failed fatally, or succeeded +        if((ret = literal_try_funcs[i](literal, data)) <= 0) break; +    } + +    return ret; +} + +#define CASE_LITERAL_FREE(type, free_func, string_func) \ +    case type: free_func; break; + +static void literal_free(struct literal *literal) +{ +    switch(literal->type) { +        MANAGE_LITERAL_TBL(CASE_LITERAL_FREE, literal); +    default: break; +    } +} + +#define CASE_LITERAL_STRING(type, free_func, string_func)    \ +    case type: return string_func; + +static char *literal_string(struct literal *literal, char *buf, size_t buf_sz) +{ +    switch(literal->type) { +        MANAGE_LITERAL_TBL(CASE_LITERAL_STRING, literal); +    default: return NULL; break; +    } +} + +// Literal try_type functions + +static int literal_try_string(struct literal *literal, char *data) +{ +    if(data[0] != '"') return 1; + +    literal->type = LITERAL_STRING; + +    size_t len = strlen(data); // no +1 needed since first character is discarded +    literal->string = xmalloc(len); + +    memcpy(literal->string, data + 1, len); +    return 0; +} + +static int literal_try_int(struct literal *literal, char *data) +{ +    errno = ERANGE + 1; // set errno to not ERANGE + +    char *endptr; +    long num = strtol(data, &endptr, 10); + +    if(*endptr != '\0') { // the whole string isn't a number +        return 1; +    } + +    if(errno == ERANGE) { +        warn("Given integer literal %s is outside the possible range", data); +    } + +    literal->type = LITERAL_NUM_INT; +    literal->num_int = num; + +    return 0; +} + +static int literal_try_float (struct literal *literal, char *data) +{ +    errno = ERANGE + 1; // set errno to not ERANGE + +    char *endptr; +    float num = strtof(data, &endptr); + +    if(*endptr != '\0') { // the whole string isn't a number +        return 1; +    } + +    if(errno == ERANGE) { +        warn("Given float literal %s is outside the possible range", data); +    } + +    literal->type = LITERAL_NUM_FLOAT; +    literal->num_float = num; + +    return 0; +} diff --git a/src/value.h b/src/value.h new file mode 100644 index 0000000..60d2c43 --- /dev/null +++ b/src/value.h @@ -0,0 +1,62 @@ +#ifndef LITERAL_H +#define LITERAL_H + +typedef struct value *value_t; + +struct literal { +    enum literal_type { +        LITERAL_STRING, +        LITERAL_NUM_INT, +        LITERAL_NUM_FLOAT +    } type; + +    union { +        char *string; +        long num_int; +        float num_float; +    }; +}; + +// struct pair { +// }; +// struct proc { +// }; + +struct value { +    enum value_type { +        VALUE_SYMBOL, +        VALUE_LITERAL, +        VALUE_PAIR, +        VALUE_LIST, +        VALUE_PROC, +        VALUE_TYPES +    } type; + +    int references; + +    union { +        char *symbol; +        struct literal literal; +        // struct pair pair; +        // struct pari list; +        // struct proc_t proc; +    }; +}; + +// try to save data as a literal +// returns a value_t on success and NULL on fail +// ret is set to: 0 on sucess, > 0 on fail (data isnt this type), +//                and < 0 on error +value_t value_create(enum value_type type, void *data, int *ret); + +// returns a value_t on success +value_t value_copy(value_t value); + +// destroys a value +void value_destroy(value_t value); + +// copy a string version of the value the buf[buf_sz] +// returns buf on success +char *value_string(value_t value, char *buf, size_t buf_sz); + +#endif | 
