diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common.h | 1 | ||||
| -rw-r--r-- | src/eval.h | 10 | ||||
| -rw-r--r-- | src/lexer.c | 126 | ||||
| -rw-r--r-- | src/lexer.h | 29 | ||||
| -rw-r--r-- | src/main.c | 48 | ||||
| -rw-r--r-- | src/parser.c | 90 | ||||
| -rw-r--r-- | src/parser.h | 14 | ||||
| -rw-r--r-- | src/value.c | 2 | 
8 files changed, 167 insertions, 153 deletions
| diff --git a/src/common.h b/src/common.h index f67af22..935f79a 100644 --- a/src/common.h +++ b/src/common.h @@ -3,6 +3,7 @@  #include <stdio.h>  #include <stdlib.h> +#include <stdbool.h>  #define __RED__    "\033[0;31m"  #define __GREEN__  "\033[0;32m" @@ -3,14 +3,16 @@  #include "parser.h" -typedef struct eval *evaluator_t; +typedef struct eval *eval_t;  struct eval { +    // symtbl_t root; +    // symtbl_t *cur;  };  // TODO: add options for the evaluation -evaluator_t eval_create(); -void eval_destroy(evaluator_t evaluator); -int eval_ast(evaluator_t evaluator, parser_t ast); +eval_t eval_create(); +void eval_destroy(eval_t eval); +int eval_ast(eval_t eval, parser_t ast);  #endif diff --git a/src/lexer.c b/src/lexer.c index 9659bb4..d71fd2f 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -11,27 +11,27 @@  // saves a token with no data  // returns the index of the saved token; < 0 on fail -static int token_add(lexer_t lexer, enum token_enum type); +static int token_add(toklist_t *tokens, enum token_enum type); -// saves a token with the current identifier (lexer->iden) +// saves a token with an identifier (lexer -> iden)  // returns 0 on success -static int token_add_iden(lexer_t lexer); +static int token_add_iden(toklist_t *tokens, char *iden, size_t *iden_sz);  // used for tokens that separate things  //   if type is TOKEN_TOKENS, then no empty token will be saved  // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) -static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_double_quote(lexer_t lexer); +static int on_generic_separator(lexer_t lexer, toklist_t *tokens, enum token_enum type); +static int on_double_quote(lexer_t lexer, toklist_t *tokens);  #define EQ(ch) ch == -#define SEPARATOR_CALLBACK_TBL(X, lexer)                            \ +#define SEPARATOR_CALLBACK_TBL(X, ...)                              \  /*  X(test,     what to execute if the test succeeds) */            \ -    X(EQ('('),  on_generic_separator(lexer, TOKEN_PARENTHS_OPEN))   \ -    X(EQ(')'),  on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE))  \ -    X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE))   \ -    X(EQ('"'),  on_double_quote(lexer))                             \ -    X(isspace,  on_generic_separator(lexer, TOKEN_TOKENS)) +    X(EQ('('),  on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_OPEN);  lexer->scope_depth++)  \ +    X(EQ(')'),  on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_CLOSE); lexer->scope_depth--) \ +    X(EQ('\''), on_generic_separator(__VA_ARGS__, TOKEN_SPECIAL_QUOTE)) \ +    X(EQ('"'),  on_double_quote(__VA_ARGS__))               \ +    X(isspace,  on_generic_separator(__VA_ARGS__, TOKEN_TOKENS))  #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) @@ -58,13 +58,13 @@ static int on_double_quote(lexer_t lexer);          }                                                 \      } else -int lexer_tokenize(lexer_t lexer, char *str, size_t len) -{ +int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len) +{          int callback_ret = 0;      for(size_t i = 0; i < len; i++)      { -        SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} +        SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer, toklist);          if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated              err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -78,17 +78,9 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)      return 0;  } -lexer_t lexer_create(size_t tokens_cap) +lexer_t lexer_create()  {      lexer_t lexer = xmalloc(sizeof(struct lexer)); - -    lexer->tokens_cap = tokens_cap; -    lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); - -    for(size_t i = 0; i < tokens_cap; i++) { -        lexer->tokens[i].type = TOKEN_TOKENS; -    } -      lexer_reset(lexer);      return lexer;  } @@ -98,29 +90,22 @@ lexer_t lexer_create(size_t tokens_cap)  void lexer_destroy(lexer_t lexer)  {      if(!lexer) return; - -    if(lexer->tokens) { -        for(size_t i = 0; i < lexer->ntokens; i++) -        { -            struct token *token = &lexer->tokens[i]; -             -            switch(lexer->tokens[i].type) { -                MANAGE_TOKEN_TBL(CASE_FREE, token); -            default: -                err("lexer_reset: Unknown token type given"); -                break; -            } -        } -        free(lexer->tokens); -    } -      free(lexer);  }  void lexer_reset(lexer_t lexer)  { -    for(size_t i = 0; i < lexer->tokens_cap; i++) { -        struct token *token = &lexer->tokens[i]; +    memset(lexer->iden, 0, LEXER_IDEN_CAP); +    lexer->iden_sz = 0; + +    lexer->inside_string = false; +    lexer->scope_depth = 0; +} + +void toklist_reset(toklist_t *toklist) +{     +    for(size_t i = 0; i < toklist->ntokens; i++) { +        struct token *token = &toklist->tokens[i];          switch(token->type) {              MANAGE_TOKEN_TBL(CASE_FREE, token); @@ -132,26 +117,21 @@ void lexer_reset(lexer_t lexer)          token->type = TOKEN_TOKENS;          token->value = NULL;      } - -    lexer->ntokens = 0; - -    memset(lexer->iden, 0, LEXER_IDEN_CAP); -    lexer->iden_sz = 0; - -    lexer->inside_string = 0; +     +    toklist->ntokens = 0;  }  // print based on the given way to print  #define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; -void lexer_print_tokens(lexer_t lexer) +void toklist_print(toklist_t *toklist)  {      // for the printing (see MANAGE_TOKEN_TBL)      char buf[LEXER_IDEN_CAP];      size_t buf_sz = LEXER_IDEN_CAP; -    for(size_t i = 0; i < lexer->ntokens; i++) { -        struct token *token = &lexer->tokens[i]; +    for(size_t i = 0; i < toklist->ntokens; i++) { +        struct token *token = &toklist->tokens[i];          switch(token->type) {              MANAGE_TOKEN_TBL(CASE_PRINT, token); @@ -162,11 +142,17 @@ void lexer_print_tokens(lexer_t lexer)      }  } +bool lexer_has_finished(lexer_t lexer) +{ +    if(!lexer->inside_string && lexer->scope_depth == 0) return true; +    return false; +} +  // ---------- Callback Functions ----------- // -static int on_double_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer, toklist_t *toklist)  { -    int ret = on_generic_separator(lexer, TOKEN_TOKENS); +    int ret = on_generic_separator(lexer, toklist, TOKEN_TOKENS);      if(ret < 0) {          return ret;      } else if(ret == 0) { @@ -174,7 +160,7 @@ static int on_double_quote(lexer_t lexer)          return 1;      } -    if(token_add_iden(lexer)) { +    if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) {          err("token_add_iden: failed");          return -1;      } @@ -183,19 +169,19 @@ static int on_double_quote(lexer_t lexer)      return 0;  } -static int on_generic_separator(lexer_t lexer, enum token_enum type) +static int on_generic_separator(lexer_t lexer, toklist_t *toklist, enum token_enum type)  {      if(lexer->inside_string) {          return 1;      } -    if(token_add_iden(lexer)) { +    if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) {          err("token_add_iden: failed");          return -1;      }      if(type != TOKEN_TOKENS) { -        if(token_add(lexer, type) < 0) { +        if(token_add(toklist, type) < 0) {              err("token_add: failed");              return -1;          } @@ -206,39 +192,39 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type)  // ---------- Token Functions ----------- // -static int token_add(lexer_t lexer, enum token_enum type) +static int token_add(toklist_t *toklist, enum token_enum type)  { -    if(lexer->ntokens >= lexer->tokens_cap) { -        err("tokens_cap of %ld has been reached", lexer->tokens_cap); +    if(toklist->ntokens >= LEXER_TOK_CAP) { +        err("tokens_cap of %ld has been reached", toklist->ntokens);          return -1;      } -    lexer->tokens[lexer->ntokens].type = type; -    return lexer->ntokens++; +    toklist->tokens[toklist->ntokens].type = type; +    return toklist->ntokens++;  } -static int token_add_iden(lexer_t lexer) +static int token_add_iden(toklist_t *toklist, char *iden, size_t *iden_sz)  {      int ret = 1; -    if(!lexer->iden_sz) return 0; +    if(*iden_sz == 0) return 0; -    int i = token_add(lexer, TOKEN_VALUE); +    int i = token_add(toklist, TOKEN_VALUE);      if(i < 0) {          err("token_add: failed");          goto exit;      } -    value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); +    value_t value = value_create(VALUE_LITERAL, iden, &ret);      if(ret > 0) { -        value = value_create(VALUE_SYMBOL, lexer->iden, &ret); +        value = value_create(VALUE_SYMBOL, iden, &ret);      } else if(ret < 0) {          err("value_create: failed");          goto exit;      } -    lexer->tokens[i].value = value; +    toklist->tokens[i].value = value;  exit: -    memset(lexer->iden, 0, lexer->iden_sz); -    lexer->iden_sz = 0; +    memset(iden, 0, *iden_sz); +    *iden_sz = 0;      return ret;  } diff --git a/src/lexer.h b/src/lexer.h index fc13f24..e928430 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -6,8 +6,12 @@  #ifndef LEXER_IDEN_CAP  #define LEXER_IDEN_CAP 512  #endif +#ifndef LEXER_TOK_CAP +#define LEXER_TOK_CAP 8192 +#endif  typedef struct lexer *lexer_t; +typedef struct toklist toklist_t;  struct token {      enum token_enum { @@ -19,38 +23,39 @@ struct token {      value_t value;  }; -struct lexer { -    struct token *tokens; -    size_t tokens_cap; +struct toklist { +    struct token tokens[LEXER_TOK_CAP];      size_t ntokens; +}; -    // identifier -    char iden[LEXER_IDEN_CAP]; +struct lexer { +    char iden[LEXER_IDEN_CAP]; // identifier      size_t iden_sz; -    int inside_string; +    bool inside_string; +    int  scope_depth;  }; -// allocate a lexer with a maximum number of tokens_cap tokens  // returns a lexer on success and NULL on fail -lexer_t lexer_create(size_t tokens_cap); +lexer_t lexer_create();  // deallocate a lexer  void lexer_destroy(lexer_t lexer); -// reset a lexer to its default state without destroying it (faster) +// reset to its default state without destroying it  void lexer_reset(lexer_t lexer); +void toklist_reset(toklist_t *toklist);  // self explanatory -void lexer_print_tokens(lexer_t lexer); +void toklist_print(toklist_t *toklist);  // turn the given non-null-terminated string str of lenght len  // into into tokens  // returns 0 on success -int lexer_tokenize(lexer_t lexer, char *str, size_t len); +int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len);  // checks whether the lexer has finished (temp buffers like iden are empty)  // returns 1 on finished, 0 on not finished -int lexer_has_finished(lexer_t lexer); +bool lexer_has_finished(lexer_t lexer);  #endif @@ -10,59 +10,71 @@  // TODO: the lexer, parser, and eval functions should return -1 on fatal, and 1 on non fatal error  #define READ_BUF_CAP 512 -#define DEFAULT_TOKENS_CAP 8192 // make it a command line arg  lexer_t lexer = NULL;  parser_t parser = NULL; -// evaluator_t eval = NULL; +// eval_t eval = NULL;  int main(void)  {      int ret = 1; +    toklist_t tokens = {0}; +    ast_t ast_root = {0}; +          char *filename = "files/test1.lisp"; - -    lexer = lexer_create(DEFAULT_TOKENS_CAP); -    if(!lexer) { -        err("lexer_create: failed"); -        goto fail; -    } -      FILE *fp = fopen(filename, "r");      if(!fp) {          err("fopen: %s: %s", filename, strerror(errno));          goto fail;      } -    // tokenize input +    toklist_reset(&tokens); +     +    lexer = lexer_create(); +    if(!lexer) { +        err("lexer_create: failed"); +        goto fail; +    } +          char buf[READ_BUF_CAP]; size_t bytes = 0;      while((bytes = fread(buf, sizeof(char), READ_BUF_CAP, fp))) { -        if(lexer_tokenize(lexer, buf, bytes)) { -            fclose(fp); goto fail; +        if(lexer_tokenize(lexer, &tokens, buf, bytes)) { +            toklist_reset(&tokens); +            fclose(fp); +            goto fail;          }          if(bytes < READ_BUF_CAP) break;      } -    lexer_print_tokens(lexer); +    if(!lexer_has_finished(lexer)) { +        err("tokenization is not complete"); +    }      fclose(fp); -    parser = parser_create(); +    ast_reset(&ast_root); +     +    parser = parser_create();          if(!parser) {          err("parser_create: failed");          goto fail;      } -    if(parser_parse_lexer(parser, lexer)) { +    if(parser_parse_toklist(parser, &tokens, &ast_root)) {          err("parser_parse_lexer: failed");          goto fail;      } -    parser_print_ast(parser); +    toklist_print(&tokens); +    toklist_reset(&tokens); +     +    ast_print(&ast_root); +    ast_reset(&ast_root); -    // evaluator = eval_create(); -    // if(!evaluator) { +    // eval = eval_create(); +    // if(!eval) {      //     err("eval_create: failed");      //     goto fail;      // } diff --git a/src/parser.c b/src/parser.c index 654cda4..1458018 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,15 +21,16 @@ static void quote_stack_push(struct quote_node **head, struct sexp *cur_sexp);  static struct sexp *quote_stack_pop(struct quote_node **head, int peek);  // returns 0 on success -static int on_paren(parser_t parser, int paren_type); // 0 is open, 1 is close +enum paren_type { P_OPEN, P_CLOSE }; +static int on_paren(parser_t parser, enum paren_type type); // 0 is open, 1 is close  static int on_quote(parser_t parent);  static int on_value(parser_t parent, value_t value); -#define TOKEN_CALLBACK_TBL(X, parser, token)        \ -/*  X(test,                 execute on succes) */   \ -    X(TOKEN_PARENTHS_OPEN,  on_paren(parser, 0))    \ -    X(TOKEN_PARENTHS_CLOSE, on_paren(parser, 1))    \ -    X(TOKEN_SPECIAL_QUOTE,  on_quote(parser))       \ +#define TOKEN_CALLBACK_TBL(X, parser, token)           \ +/*  X(test,                 execute on succes) */      \ +    X(TOKEN_PARENTHS_OPEN,  on_paren(parser, P_OPEN))  \ +    X(TOKEN_PARENTHS_CLOSE, on_paren(parser, P_CLOSE)) \ +    X(TOKEN_SPECIAL_QUOTE,  on_quote(parser))          \      X(TOKEN_VALUE,          on_value(parser, token->value))  #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) @@ -47,30 +48,38 @@ static int on_value(parser_t parent, value_t value);          return -1;                              \      } break; -int parser_parse_lexer(parser_t parser, lexer_t lexer) -{     -    for(size_t i = 0; i < lexer->ntokens; i++) { -        struct token *token = &lexer->tokens[i]; +int parser_parse_toklist(parser_t parser, toklist_t *toklist, ast_t *ast) +{ +    if(parser->cur_sexp == NULL) { +        size_t index = sexp_add(&ast->sexp, AST_VALUE); +        ast->sexp.children[index].value = value_copy(parser->begin_symbol_value); +         +        parser->cur_sexp = &ast->sexp; +    } +     +     +    for(size_t i = 0; i < toklist->ntokens; i++) { +        struct token *token = &toklist->tokens[i]; -        switch(token->type) {             +        switch(token->type) {              TOKEN_CALLBACK_TBL(CASE_TYPE, parser, token);          default:              err("parser_parse_lexer: Unknown token type given");              break;          } -        if((token->type != TOKEN_SPECIAL_QUOTE) && -           (parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1))) { -            if(on_paren(parser, 1)); -            quote_stack_pop(&parser->quote_head, 0); -        } +        if(token->type != TOKEN_SPECIAL_QUOTE)  +           while(parser->cur_sexp == quote_stack_pop(&parser->quote_head, 1)) { +               on_paren(parser, P_CLOSE); +               quote_stack_pop(&parser->quote_head, 0); +           }      } -    if(&parser->root == parser->cur_sexp) { +    if(&ast->sexp == parser->cur_sexp) {          return 0;      } else {          return 1; -    }    +    }  }  parser_t parser_create() @@ -88,7 +97,6 @@ parser_t parser_create()      parser->begin_symbol_value = VALUE_CREATE("begin");      parser->quote_symbol_value = VALUE_CREATE("quote"); -    parser->root.nchildren = 0;      parser->quote_head = NULL;      parser_reset(parser); @@ -102,40 +110,35 @@ void parser_destroy(parser_t parser)      value_destroy(parser->begin_symbol_value);      value_destroy(parser->quote_symbol_value); -    sexp_free(&parser->root); +    parser_reset(parser);      free(parser);  }  void parser_reset(parser_t parser)  { -    struct sexp *root = &parser->root; - -    for(size_t i = 0; i < root->nchildren; i++) { -        ast_free(&root->children[i]); -    } - -    sexp_init(root); -    size_t index = sexp_add(root, AST_VALUE); -     -    root->children[index].value = value_copy(parser->begin_symbol_value); -    parser->cur_sexp = &parser->root; - +    parser->cur_sexp = NULL;      while(quote_stack_pop(&parser->quote_head, 0) != NULL);  } +void ast_reset(ast_t *ast) +{ +    sexp_free(&ast->sexp); +    sexp_init(&ast->sexp); +} + -void parser_print_ast(parser_t parser) +void ast_print(ast_t *ast)  { -    sexp_print(&parser->root, 0); +    sexp_print(&ast->sexp, 0);  }  // ---------- Callback Functions ---------- // -static int on_paren(parser_t parser, int paren_type) +static int on_paren(parser_t parser, enum paren_type type)  {     -    if(paren_type) { // !0 closing paren +    if(type == P_CLOSE) {          parser->cur_sexp = parser->cur_sexp->prev; -    } else { // 0 opening paren +    } else if(type == P_OPEN) {          size_t index = sexp_add(parser->cur_sexp, AST_SEXP);          struct sexp *prev = parser->cur_sexp; @@ -153,7 +156,7 @@ static int on_paren(parser_t parser, int paren_type)  static int on_quote(parser_t parser)  {      // new sexp -    on_paren(parser, 0); +    on_paren(parser, P_OPEN);      // add symbol to the sexp      on_value(parser, parser->quote_symbol_value); @@ -192,6 +195,8 @@ static void sexp_init(struct sexp *sexp)  static void sexp_print(struct sexp *sexp, int indent)  { +    // (void)indent; +    // printf("( ");      for(size_t i = 0; i < sexp->nchildren; i++) {          struct ast *child = &sexp->children[i]; @@ -202,9 +207,11 @@ static void sexp_print(struct sexp *sexp, int indent)          char buf[LEXER_IDEN_CAP];          size_t buf_sz = LEXER_IDEN_CAP; -         -        info("%d %s", indent, value_string(child->value, buf, buf_sz)); + +        for(int i = 0; i < indent; i++) printf(" "); +        printf("%s\n", value_string(child->value, buf, buf_sz));      } +    // printf(")");  }  static void sexp_free(struct sexp *sexp) @@ -212,7 +219,8 @@ static void sexp_free(struct sexp *sexp)      for(size_t i = 0; i < sexp->nchildren; i++){          ast_free(&sexp->children[i]);      } -    free(sexp->children); +     +    if(sexp->children) free(sexp->children);  }  #define CASE_FREE(type, free_func, print_func)  \ diff --git a/src/parser.h b/src/parser.h index 8fc5d6c..9193ca1 100644 --- a/src/parser.h +++ b/src/parser.h @@ -5,6 +5,7 @@  #include "lexer.h"  typedef struct parser *parser_t; +typedef struct ast ast_t;  struct ast {      enum ast_type { @@ -26,7 +27,6 @@ struct ast {  };  struct parser { -    struct sexp root;      struct sexp *cur_sexp;      struct quote_node { @@ -45,16 +45,16 @@ parser_t parser_create();  // deallocate a parser  void parser_destroy(parser_t parser); -// reset a parser to its default state without destroying it +// reset to its default state without destroying it  // returns 0 on success  void parser_reset(parser_t parser); +void ast_reset(ast_t *ast_root);  // self explanatory -void parser_print_ast(parser_t parser); +void ast_print(ast_t *ast_root); -// turn the given lexer (which has already has tokens) into an ast -// returns 0 on success, > 0 when more tokens are needed, -//         and < 0 on a fatal error -int parser_parse_lexer(parser_t parser, lexer_t lexer); +// turn the given toklist into an ast +// returns 0 on success, and < 0 on a fatal error +int parser_parse_toklist(parser_t parser, toklist_t *tokens, ast_t *ast);  #endif diff --git a/src/value.c b/src/value.c index 289aa0f..e2b9b32 100644 --- a/src/value.c +++ b/src/value.c @@ -133,7 +133,7 @@ static char *symbol_string(char *symbol, char *buf, size_t buf_sz)  #define MANAGE_LITERAL_TBL(X, literal)                                         \  /*  X(type,               how to free             how to print) */             \ -    X(LITERAL_STRING,     free(literal->string),  EX("%s",  literal->string))  \ +    X(LITERAL_STRING,     free(literal->string),  EX("\"%s\"",  literal->string)) \      X(LITERAL_NUM_INT,    ;,  EX("%ld", literal->num_int))                     \      X(LITERAL_NUM_FLOAT,  ;,  EX("%f",  literal->num_float)) | 
