From a7bb8ace49f5725e0f92336ab5af28b4c8900aff Mon Sep 17 00:00:00 2001 From: kartofen Date: Mon, 14 Aug 2023 21:20:39 +0300 Subject: parser done --- src/lexer.c | 334 ++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 213 deletions(-) (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c index 71eed79..9659bb4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,64 +3,51 @@ #include #include -// TODO: handle escaped quotes #include "common.h" #include "lexer.h" +#include "value.h" + +// TODO: handle escaping // saves a token with no data // returns the index of the saved token; < 0 on fail -static int save_empty_token(lexer_t lexer, enum token_enum type); +static int token_add(lexer_t lexer, enum token_enum type); -// saves a token with data which is the current identifier (lexer->iden) +// saves a token with the current identifier (lexer->iden) // returns 0 on success -static int save_current_identifier(lexer_t lexer); +static int token_add_iden(lexer_t lexer); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_quote(lexer_t lexer); -static int on_dot(lexer_t lexer); - -// try to convert the identifier (lexer->iden) to a given type -// returns > 0 on sucess, 0 on fail (iden isnt the given type), -// and < 0 on error -static int try_str(lexer_t lexer); -static int try_int(lexer_t lexer); -static int try_float(lexer_t lexer); -static int try_symbol(lexer_t lexer); - -#define SEPARATOR_CALLBACK_TBL(X, lexer) \ - X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ - X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ - X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ - X(EQ('.'), on_dot(lexer)) \ - X(EQ('"'), on_quote(lexer)) \ - X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS)) - -// X(token type, what to free, how to print on screen) -#define TOKEN_TYPES_INFO(X, token) \ - X(TOKEN_PARENTHS_OPEN, NULL, "(") \ - X(TOKEN_PARENTHS_CLOSE, NULL, ")") \ - X(TOKEN_SPECIAL_QUOTE, NULL, "'") \ - X(TOKEN_SPECIAL_DOT, NULL, ".") \ - X(TOKEN_LITERAL_STRING, token->string, "'%s'", token->string) \ - X(TOKEN_LITERAL_NUM_INT, NULL, "'%ld'", token->num_int) \ - X(TOKEN_LITERAL_NUM_FLOAT, NULL, "'%f'", token->num_float) \ - X(TOKEN_SYMBOL, token->symbol, "'%s'", token->symbol) - -#define IDENTIFY_IDENTIFIER_LIST(X) \ - X(try_str) \ - X(try_int) \ - X(try_float) \ - X(try_symbol) +static int on_double_quote(lexer_t lexer); #define EQ(ch) ch == -#define FN(f) f + +#define SEPARATOR_CALLBACK_TBL(X, lexer) \ +/* X(test, what to execute if the test succeeds) */ \ + X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ + X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ + X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ + X(EQ('"'), on_double_quote(lexer)) \ + X(isspace, on_generic_separator(lexer, TOKEN_TOKENS)) + +#define FN(fn, arg) "%s", fn(arg, buf, buf_sz) + +#define MANAGE_TOKEN_TBL(X, token) \ +/* X(type, how to free, how to print) */ \ + X(TOKEN_PARENTHS_OPEN, ;, "(") \ + X(TOKEN_PARENTHS_CLOSE, ;, ")") \ + X(TOKEN_SPECIAL_QUOTE, ;, "'") \ + X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \ + X(TOKEN_TOKENS, ;, "") \ + +// ---------- Exported Functions ---------- // // makes an if-else chain to test the character -// agains the seperator callback table -#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \ +// agains the separator callback table +#define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \ if(test_func(str[i])) { \ callback_ret = callback; \ if(callback_ret == 0) { \ @@ -77,7 +64,7 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) for(size_t i = 0; i < len; i++) { - SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {} + SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -93,49 +80,35 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) lexer_t lexer_create(size_t tokens_cap) { - lexer_t lexer = malloc(sizeof(struct lexer)); - if(!lexer) { - err("malloc: %s", strerror(errno)); - goto fail; - } + lexer_t lexer = xmalloc(sizeof(struct lexer)); - lexer->tokens = calloc(tokens_cap, sizeof(struct token)); - if(!lexer->tokens) { - err("malloc %s", strerror(errno)); - goto fail; - } + lexer->tokens_cap = tokens_cap; + lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); for(size_t i = 0; i < tokens_cap; i++) { - lexer->tokens[i].symbol = NULL; + lexer->tokens[i].type = TOKEN_TOKENS; } - lexer->tokens_cap = tokens_cap; - lexer->ntokens = 0; - - memset(lexer->iden, 0, LEXER_IDEN_CAP); - lexer->iden_sz = 0; - - lexer->inside_string = 0; - + lexer_reset(lexer); return lexer; -fail: - lexer_destroy(lexer); - return NULL; } -#define CASE_FREE_TOKEN(type, data, ...) \ - case type: if(data != NULL) { free(data); } break; +#define CASE_FREE(type, free_func, ...) case type: free_func; break; void lexer_destroy(lexer_t lexer) { if(!lexer) return; if(lexer->tokens) { - for(size_t i = 0; i < lexer->ntokens; i++) { + for(size_t i = 0; i < lexer->ntokens; i++) + { struct token *token = &lexer->tokens[i]; - switch(token->type) { - TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token) - default: break; + + switch(lexer->tokens[i].type) { + MANAGE_TOKEN_TBL(CASE_FREE, token); + default: + err("lexer_reset: Unknown token type given"); + break; } } free(lexer->tokens); @@ -144,18 +117,65 @@ void lexer_destroy(lexer_t lexer) free(lexer); } -// ------------------------------------------------- // +void lexer_reset(lexer_t lexer) +{ + for(size_t i = 0; i < lexer->tokens_cap; i++) { + struct token *token = &lexer->tokens[i]; + + switch(token->type) { + MANAGE_TOKEN_TBL(CASE_FREE, token); + default: + err("lexer_reset: Unknown token type given"); + break; + } + + token->type = TOKEN_TOKENS; + token->value = NULL; + } + + lexer->ntokens = 0; + + memset(lexer->iden, 0, LEXER_IDEN_CAP); + lexer->iden_sz = 0; + + lexer->inside_string = 0; +} + +// print based on the given way to print +#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; + +void lexer_print_tokens(lexer_t lexer) +{ + // for the printing (see MANAGE_TOKEN_TBL) + char buf[LEXER_IDEN_CAP]; + size_t buf_sz = LEXER_IDEN_CAP; + + for(size_t i = 0; i < lexer->ntokens; i++) { + struct token *token = &lexer->tokens[i]; + + switch(token->type) { + MANAGE_TOKEN_TBL(CASE_PRINT, token); + default: + err("lexer_print_tokens: Unknown token given"); + return; + } + } +} + +// ---------- Callback Functions ----------- // -static int on_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer) { int ret = on_generic_separator(lexer, TOKEN_TOKENS); - if(ret <= 0) { // it either failed or worked, both not inside a string - lexer->inside_string = 1; + if(ret < 0) { return ret; + } else if(ret == 0) { + lexer->inside_string = 1; + return 1; } - if(save_current_identifier(lexer)) { - err("save_current_identifier: failed"); + if(token_add_iden(lexer)) { + err("token_add_iden: failed"); return -1; } @@ -163,26 +183,20 @@ static int on_quote(lexer_t lexer) return 0; } -static int on_dot(lexer_t lexer) -{ - if(lexer->iden_sz != 0) return 1; - return on_generic_separator(lexer, TOKEN_SPECIAL_DOT); -} - static int on_generic_separator(lexer_t lexer, enum token_enum type) { if(lexer->inside_string) { return 1; } - if(save_current_identifier(lexer)) { - err("save_current_identifier: failed"); + if(token_add_iden(lexer)) { + err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { - if(save_empty_token(lexer, type) < 0) { - err("save_empty_token: failed"); + if(token_add(lexer, type) < 0) { + err("token_add: failed"); return -1; } } @@ -190,147 +204,41 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type) return 0; } -static int save_empty_token(lexer_t lexer, enum token_enum type) +// ---------- Token Functions ----------- // + +static int token_add(lexer_t lexer, enum token_enum type) { if(lexer->ntokens >= lexer->tokens_cap) { err("tokens_cap of %ld has been reached", lexer->tokens_cap); return -1; } - lexer->tokens[lexer->ntokens++].type = type; - return lexer->ntokens - 1; + lexer->tokens[lexer->ntokens].type = type; + return lexer->ntokens++; } -#define CHECK_IDEN(func) \ - if((ret = func(lexer))) { \ - if(ret < 0) { \ - err(#func ": failed"); \ - goto exit; \ - } \ - } else - -static int save_current_identifier(lexer_t lexer) +static int token_add_iden(lexer_t lexer) { int ret = 1; + if(!lexer->iden_sz) return 0; - if(lexer->iden_sz != 0) { - IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {} + int i = token_add(lexer, TOKEN_VALUE); + if(i < 0) { + err("token_add: failed"); + goto exit; } - ret = 0; + value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); + if(ret > 0) { + value = value_create(VALUE_SYMBOL, lexer->iden, &ret); + } else if(ret < 0) { + err("value_create: failed"); + goto exit; + } + + lexer->tokens[i].value = value; exit: memset(lexer->iden, 0, lexer->iden_sz); lexer->iden_sz = 0; return ret; } - - -// ------------------------------------------------- // - -static int try_str(lexer_t lexer) -{ - if(!lexer->inside_string) return 0; - - int i = save_empty_token(lexer, TOKEN_LITERAL_STRING); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].string = malloc(lexer->iden_sz+1); - if(!lexer->tokens[i].string) { - err("malloc: %s", strerror(errno)); - return -1; - } - - memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1); - return 1; -} - -static int try_int(lexer_t lexer) -{ - errno = ERANGE + 1; // set errno to not ERANGE - - char *endptr; - long num = strtol(lexer->iden, &endptr, 10); - - if(*endptr != '\0') { // the whole string isn't a number - return 0; - } - - if(errno == ERANGE) { - warn("Given integer literal %s is outside the possible range", lexer->iden); - } - - int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].num_int = num; - return 1; -} - -static int try_float(lexer_t lexer) -{ - errno = ERANGE + 1; // set errno to not ERANGE - - char *endptr; - float num = strtof(lexer->iden, &endptr); - - if(*endptr != '\0') { // the whole string isn't a number - return 0; - } - - if(errno == ERANGE) { - warn("Given float literal %s is outside the possible range", lexer->iden); - } - - int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].num_float = num; - return 1; -} - -static int try_symbol(lexer_t lexer) -{ - int i = save_empty_token(lexer, TOKEN_SYMBOL); - if(i < 0) { - err("save_empty_token: failed"); - return -1; - } - - lexer->tokens[i].symbol = malloc(lexer->iden_sz+1); - if(!lexer->tokens[i].symbol) { - err("malloc: %s", strerror(errno)); - return -1; - } - - memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1); - return 1; -} - -// ------------------------------------------------- // - -#ifdef DEBUG -#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break; - -void lexer_print_tokens(lexer_t lexer) -{ - for(size_t i = 0; i < lexer->ntokens; i++) { - struct token *token = &lexer->tokens[i]; - - info("Token %zu: %d", i, token->type); - - switch(token->type) { - TOKEN_TYPES_INFO(CASE_PRINT, token); - default: break; - } - } -} -#endif -- cgit v1.2.3