#include #include #include #include #include "common.h" #include "lexer.h" #include "value.h" // TODO: handle escaping // saves a token with no data // returns the index of the saved token; < 0 on fail static int token_add(lexer_t lexer, enum token_enum type); // saves a token with the current identifier (lexer->iden) // returns 0 on success static int token_add_iden(lexer_t lexer); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) static int on_generic_separator(lexer_t lexer, enum token_enum type); static int on_double_quote(lexer_t lexer); #define EQ(ch) ch == #define SEPARATOR_CALLBACK_TBL(X, lexer) \ /* X(test, what to execute if the test succeeds) */ \ X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ X(EQ('"'), on_double_quote(lexer)) \ X(isspace, on_generic_separator(lexer, TOKEN_TOKENS)) #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) #define MANAGE_TOKEN_TBL(X, token) \ /* X(type, how to free, how to print) */ \ X(TOKEN_PARENTHS_OPEN, ;, "(") \ X(TOKEN_PARENTHS_CLOSE, ;, ")") \ X(TOKEN_SPECIAL_QUOTE, ;, "'") \ X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \ X(TOKEN_TOKENS, ;, "") \ // ---------- Exported Functions ---------- // // makes an if-else chain to test the character // agains the separator callback table #define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \ if(test_func(str[i])) { \ callback_ret = callback; \ if(callback_ret == 0) { \ continue; \ } else if(callback_ret < 0) { \ err(#callback ": failed"); \ return 1; \ } \ } else int lexer_tokenize(lexer_t lexer, char *str, size_t len) { int callback_ret = 0; for(size_t i = 0; i < len; i++) { SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); return 1; } // add charater to identifier lexer->iden[lexer->iden_sz++] = str[i]; } return 0; } lexer_t lexer_create(size_t tokens_cap) { lexer_t lexer = xmalloc(sizeof(struct lexer)); lexer->tokens_cap = tokens_cap; lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); for(size_t i = 0; i < tokens_cap; i++) { lexer->tokens[i].type = TOKEN_TOKENS; } lexer_reset(lexer); return lexer; } #define CASE_FREE(type, free_func, ...) case type: free_func; break; void lexer_destroy(lexer_t lexer) { if(!lexer) return; if(lexer->tokens) { for(size_t i = 0; i < lexer->ntokens; i++) { struct token *token = &lexer->tokens[i]; switch(lexer->tokens[i].type) { MANAGE_TOKEN_TBL(CASE_FREE, token); default: err("lexer_reset: Unknown token type given"); break; } } free(lexer->tokens); } free(lexer); } void lexer_reset(lexer_t lexer) { for(size_t i = 0; i < lexer->tokens_cap; i++) { struct token *token = &lexer->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_FREE, token); default: err("lexer_reset: Unknown token type given"); break; } token->type = TOKEN_TOKENS; token->value = NULL; } lexer->ntokens = 0; memset(lexer->iden, 0, LEXER_IDEN_CAP); lexer->iden_sz = 0; lexer->inside_string = 0; } // print based on the given way to print #define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; void lexer_print_tokens(lexer_t lexer) { // for the printing (see MANAGE_TOKEN_TBL) char buf[LEXER_IDEN_CAP]; size_t buf_sz = LEXER_IDEN_CAP; for(size_t i = 0; i < lexer->ntokens; i++) { struct token *token = &lexer->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_PRINT, token); default: err("lexer_print_tokens: Unknown token given"); return; } } } // ---------- Callback Functions ----------- // static int on_double_quote(lexer_t lexer) { int ret = on_generic_separator(lexer, TOKEN_TOKENS); if(ret < 0) { return ret; } else if(ret == 0) { lexer->inside_string = 1; return 1; } if(token_add_iden(lexer)) { err("token_add_iden: failed"); return -1; } lexer->inside_string = 0; return 0; } static int on_generic_separator(lexer_t lexer, enum token_enum type) { if(lexer->inside_string) { return 1; } if(token_add_iden(lexer)) { err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { if(token_add(lexer, type) < 0) { err("token_add: failed"); return -1; } } return 0; } // ---------- Token Functions ----------- // static int token_add(lexer_t lexer, enum token_enum type) { if(lexer->ntokens >= lexer->tokens_cap) { err("tokens_cap of %ld has been reached", lexer->tokens_cap); return -1; } lexer->tokens[lexer->ntokens].type = type; return lexer->ntokens++; } static int token_add_iden(lexer_t lexer) { int ret = 1; if(!lexer->iden_sz) return 0; int i = token_add(lexer, TOKEN_VALUE); if(i < 0) { err("token_add: failed"); goto exit; } value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); if(ret > 0) { value = value_create(VALUE_SYMBOL, lexer->iden, &ret); } else if(ret < 0) { err("value_create: failed"); goto exit; } lexer->tokens[i].value = value; exit: memset(lexer->iden, 0, lexer->iden_sz); lexer->iden_sz = 0; return ret; }