#include #include #include #include // TODO: handle escaped quotes #include "common.h" #include "lexer.h" // saves a token with no data // returns the index of the saved token; < 0 on fail static int save_empty_token(lexer_t lexer, enum token_enum type); // saves a token with data which is the current identifier (lexer->iden) // returns 0 on success static int save_current_identifier(lexer_t lexer); // used for tokens that separate things, type is optional (TOKEN_TOKENS for default) // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) static int on_generic_separator(lexer_t lexer, enum token_enum type); static int on_quote(lexer_t lexer); static int on_dot(lexer_t lexer); // try to convert the identifier (lexer->iden) to a given type // returns > 0 on sucess, 0 on fail (iden isnt the given type), // and < 0 on error static int try_str(lexer_t lexer); static int try_int(lexer_t lexer); static int try_float(lexer_t lexer); static int try_symbol(lexer_t lexer); #define SEPARATOR_CALLBACK_TBL(X, lexer) \ X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ X(EQ('.'), on_dot(lexer)) \ X(EQ('"'), on_quote(lexer)) \ X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS)) #define IDENTIFY_IDENTIFIER_LIST(X) \ X(try_str) \ X(try_int) \ X(try_float) \ X(try_symbol) // X(token type, what to free, how to print on screen) #define TOKEN_TYPES_INFO(X, token) \ X(TOKEN_PARENTHS_OPEN, NULL, "(") \ X(TOKEN_PARENTHS_CLOSE, NULL, ")") \ X(TOKEN_SPECIAL_QUOTE, NULL, "'") \ X(TOKEN_SPECIAL_DOT, NULL, ".") \ X(TOKEN_LITERAL_STRING, token->string, "'%s'", token->string) \ X(TOKEN_LITERAL_NUM_INT, NULL, "'%ld'", token->num_int) \ X(TOKEN_LITERAL_NUM_FLOAT, NULL, "'%f'", token->num_float) \ X(TOKEN_SYMBOL, token->symbol, "'%s'", token->symbol) #define EQ(ch) ch == #define FN(f) f // makes an if-else chain to test the character // agains the seperator callback table #define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \ if(test_func(str[i])) { \ callback_ret = callback; \ if(callback_ret == 0) { \ continue; \ } else if(callback_ret < 0) { \ err(#callback ": failed"); \ return 1; \ } \ } else int lexer_tokenize(lexer_t lexer, char *str, size_t len) { int callback_ret = 0; for(size_t i = 0; i < len; i++) { SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {} if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); return 1; } // add charater to identifier lexer->iden[lexer->iden_sz++] = str[i]; } return 0; } lexer_t lexer_create(size_t tokens_cap) { lexer_t lexer = malloc(sizeof(struct lexer)); if(!lexer) { err("malloc: %s", strerror(errno)); goto fail; } lexer->tokens = calloc(tokens_cap, sizeof(struct token)); if(!lexer->tokens) { err("malloc %s", strerror(errno)); goto fail; } for(size_t i = 0; i < tokens_cap; i++) { lexer->tokens[i].symbol = NULL; } lexer->tokens_cap = tokens_cap; lexer->ntokens = 0; memset(lexer->iden, 0, LEXER_IDEN_CAP); lexer->iden_sz = 0; lexer->inside_string = 0; return lexer; fail: lexer_destroy(lexer); return NULL; } #define CASE_FREE_TOKEN(type, data, ...) \ case type: if(data != NULL) { free(data); } break; void lexer_destroy(lexer_t lexer) { if(!lexer) return; if(lexer->tokens) { for(size_t i = 0; i < lexer->ntokens; i++) { struct token *token = &lexer->tokens[i]; switch(token->type) { TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token) default: break; } } free(lexer->tokens); } free(lexer); } // ------------------------------------------------- // static int on_quote(lexer_t lexer) { int ret = on_generic_separator(lexer, TOKEN_TOKENS); if(ret == 0) { lexer->inside_string = 1; return ret; } else if(ret > 0) { lexer->inside_string = 0; return 0; } return ret; } static int on_dot(lexer_t lexer) { if(lexer->iden_sz != 0) return 1; return on_generic_separator(lexer, TOKEN_SPECIAL_DOT); } static int on_generic_separator(lexer_t lexer, enum token_enum type) { if(lexer->inside_string) { return 1; } if(save_current_identifier(lexer)) { err("save_current_identifier: failed"); return -1; } if(type != TOKEN_TOKENS) { if(save_empty_token(lexer, type) < 0) { err("save_empty_token: failed"); return -1; } } return 0; } static int save_empty_token(lexer_t lexer, enum token_enum type) { if(lexer->ntokens >= lexer->tokens_cap) { err("tokens_cap of %ld has been reached", lexer->tokens_cap); return -1; } lexer->tokens[lexer->ntokens++].type = type; return lexer->ntokens - 1; } #define CHECK_IDEN(func) \ if((ret = func(lexer))) { \ if(ret < 0) { \ err(#func ": failed"); \ goto exit; \ } \ } else static int save_current_identifier(lexer_t lexer) { int ret = 1; if(lexer->iden_sz != 0) { IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {} } ret = 0; exit: memset(lexer->iden, 0, lexer->iden_sz); lexer->iden_sz = 0; return ret; } // ------------------------------------------------- // static int try_str(lexer_t lexer) { if(!lexer->inside_string) return 0; int i = save_empty_token(lexer, TOKEN_LITERAL_STRING); if(i < 0) { err("save_empty_token: failed"); return -1; } lexer->tokens[i].string = malloc(lexer->iden_sz+1); if(!lexer->tokens[i].string) { err("malloc: %s", strerror(errno)); return -1; } memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1); return 1; } static int try_int(lexer_t lexer) { errno = ERANGE + 1; // set errno to not ERANGE char *endptr; long num = strtol(lexer->iden, &endptr, 10); if(*endptr != '\0') { // the whole string isn't a number return 0; } if(errno == ERANGE) { warn("Given integer literal %s is outside the possible range", lexer->iden); } int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT); if(i < 0) { err("save_empty_token: failed"); return -1; } lexer->tokens[i].num_int = num; return 1; } static int try_float(lexer_t lexer) { errno = ERANGE + 1; // set errno to not ERANGE char *endptr; float num = strtof(lexer->iden, &endptr); if(*endptr != '\0') { // the whole string isn't a number return 0; } if(errno == ERANGE) { warn("Given float literal %s is outside the possible range", lexer->iden); } int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT); if(i < 0) { err("save_empty_token: failed"); return -1; } lexer->tokens[i].num_float = num; return 1; } static int try_symbol(lexer_t lexer) { int i = save_empty_token(lexer, TOKEN_SYMBOL); if(i < 0) { err("save_empty_token: failed"); return -1; } lexer->tokens[i].symbol = malloc(lexer->iden_sz+1); if(!lexer->tokens[i].symbol) { err("malloc: %s", strerror(errno)); return -1; } memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1); return 1; } // ------------------------------------------------- // #ifdef DEBUG #define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break; void lexer_print_tokens(lexer_t lexer) { for(size_t i = 0; i < lexer->ntokens; i++) { struct token *token = &lexer->tokens[i]; info("Token %zu: %d", i, token->type); switch(token->type) { TOKEN_TYPES_INFO(CASE_PRINT, token); default: break; } } } #endif