#include #include #include #include #include "common.h" #include "lexer.h" #include "value.h" // TODO: handle escaping // saves a token with no data // returns the index of the saved token; < 0 on fail static int token_add(toklist_t *tokens, enum token_enum type); // saves a token with an identifier (lexer -> iden) // returns 0 on success static int token_add_iden(toklist_t *tokens, char *iden, size_t *iden_sz); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) static int on_generic_separator(lexer_t lexer, toklist_t *tokens, enum token_enum type); static int on_double_quote(lexer_t lexer, toklist_t *tokens); #define EQ(ch) ch == #define SEPARATOR_CALLBACK_TBL(X, ...) \ /* X(test, what to execute if the test succeeds) */ \ X(EQ('('), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_OPEN); lexer->scope_depth++) \ X(EQ(')'), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_CLOSE); lexer->scope_depth--) \ X(EQ('\''), on_generic_separator(__VA_ARGS__, TOKEN_SPECIAL_QUOTE)) \ X(EQ('"'), on_double_quote(__VA_ARGS__)) \ X(isspace, on_generic_separator(__VA_ARGS__, TOKEN_TOKENS)) #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) #define MANAGE_TOKEN_TBL(X, token) \ /* X(type, how to free, how to print) */ \ X(TOKEN_PARENTHS_OPEN, ;, "(") \ X(TOKEN_PARENTHS_CLOSE, ;, ")") \ X(TOKEN_SPECIAL_QUOTE, ;, "'") \ X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \ X(TOKEN_TOKENS, ;, "") \ // ---------- Exported Functions ---------- // // makes an if-else chain to test the character // agains the separator callback table #define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \ if(test_func(str[i])) { \ callback_ret = callback; \ if(callback_ret == 0) { \ continue; \ } else if(callback_ret < 0) { \ err(#callback ": failed"); \ return 1; \ } \ } else int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len) { int callback_ret = 0; for(size_t i = 0; i < len; i++) { SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer, toklist); if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); return 1; } // add charater to identifier lexer->iden[lexer->iden_sz++] = str[i]; } return 0; } lexer_t lexer_create() { lexer_t lexer = xmalloc(sizeof(struct lexer)); lexer_reset(lexer); return lexer; } #define CASE_FREE(type, free_func, ...) case type: free_func; break; void lexer_destroy(lexer_t lexer) { if(!lexer) return; free(lexer); } void lexer_reset(lexer_t lexer) { memset(lexer->iden, 0, LEXER_IDEN_CAP); lexer->iden_sz = 0; lexer->inside_string = false; lexer->scope_depth = 0; } void toklist_reset(toklist_t *toklist) { for(size_t i = 0; i < toklist->ntokens; i++) { struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_FREE, token); default: err("lexer_reset: Unknown token type given"); break; } token->type = TOKEN_TOKENS; token->value = NULL; } toklist->ntokens = 0; } // print based on the given way to print #define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; void toklist_print(toklist_t *toklist) { // for the printing (see MANAGE_TOKEN_TBL) char buf[LEXER_IDEN_CAP]; size_t buf_sz = LEXER_IDEN_CAP; for(size_t i = 0; i < toklist->ntokens; i++) { struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_PRINT, token); default: err("lexer_print_tokens: Unknown token given"); return; } } } bool lexer_has_finished(lexer_t lexer) { if(!lexer->inside_string && lexer->scope_depth == 0) return true; return false; } // ---------- Callback Functions ----------- // static int on_double_quote(lexer_t lexer, toklist_t *toklist) { int ret = on_generic_separator(lexer, toklist, TOKEN_TOKENS); if(ret < 0) { return ret; } else if(ret == 0) { lexer->inside_string = 1; return 1; } if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } lexer->inside_string = 0; return 0; } static int on_generic_separator(lexer_t lexer, toklist_t *toklist, enum token_enum type) { if(lexer->inside_string) { return 1; } if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { if(token_add(toklist, type) < 0) { err("token_add: failed"); return -1; } } return 0; } // ---------- Token Functions ----------- // static int token_add(toklist_t *toklist, enum token_enum type) { if(toklist->ntokens >= LEXER_TOK_CAP) { err("tokens_cap of %ld has been reached", toklist->ntokens); return -1; } toklist->tokens[toklist->ntokens].type = type; return toklist->ntokens++; } static int token_add_iden(toklist_t *toklist, char *iden, size_t *iden_sz) { int ret = 1; if(*iden_sz == 0) return 0; int i = token_add(toklist, TOKEN_VALUE); if(i < 0) { err("token_add: failed"); goto exit; } value_t value = value_create(VALUE_LITERAL, iden, &ret); if(ret > 0) { value = value_create(VALUE_SYMBOL, iden, &ret); } else if(ret < 0) { err("value_create: failed"); goto exit; } toklist->tokens[i].value = value; exit: memset(iden, 0, *iden_sz); *iden_sz = 0; return ret; }