From c837b2bb4ea71cedb434260b11c9f356e8b34e2d Mon Sep 17 00:00:00 2001 From: kartofen Date: Tue, 2 Jan 2024 16:06:45 +0200 Subject: things --- src/lexer.c | 126 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 56 insertions(+), 70 deletions(-) (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c index 9659bb4..d71fd2f 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -11,27 +11,27 @@ // saves a token with no data // returns the index of the saved token; < 0 on fail -static int token_add(lexer_t lexer, enum token_enum type); +static int token_add(toklist_t *tokens, enum token_enum type); -// saves a token with the current identifier (lexer->iden) +// saves a token with an identifier (lexer -> iden) // returns 0 on success -static int token_add_iden(lexer_t lexer); +static int token_add_iden(toklist_t *tokens, char *iden, size_t *iden_sz); // used for tokens that separate things // if type is TOKEN_TOKENS, then no empty token will be saved // returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden) -static int on_generic_separator(lexer_t lexer, enum token_enum type); -static int on_double_quote(lexer_t lexer); +static int on_generic_separator(lexer_t lexer, toklist_t *tokens, enum token_enum type); +static int on_double_quote(lexer_t lexer, toklist_t *tokens); #define EQ(ch) ch == -#define SEPARATOR_CALLBACK_TBL(X, lexer) \ +#define SEPARATOR_CALLBACK_TBL(X, ...) \ /* X(test, what to execute if the test succeeds) */ \ - X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \ - X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \ - X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \ - X(EQ('"'), on_double_quote(lexer)) \ - X(isspace, on_generic_separator(lexer, TOKEN_TOKENS)) + X(EQ('('), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_OPEN); lexer->scope_depth++) \ + X(EQ(')'), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_CLOSE); lexer->scope_depth--) \ + X(EQ('\''), on_generic_separator(__VA_ARGS__, TOKEN_SPECIAL_QUOTE)) \ + X(EQ('"'), on_double_quote(__VA_ARGS__)) \ + X(isspace, on_generic_separator(__VA_ARGS__, TOKEN_TOKENS)) #define FN(fn, arg) "%s", fn(arg, buf, buf_sz) @@ -58,13 +58,13 @@ static int on_double_quote(lexer_t lexer); } \ } else -int lexer_tokenize(lexer_t lexer, char *str, size_t len) -{ +int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len) +{ int callback_ret = 0; for(size_t i = 0; i < len; i++) { - SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {} + SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer, toklist); if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz); @@ -78,17 +78,9 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len) return 0; } -lexer_t lexer_create(size_t tokens_cap) +lexer_t lexer_create() { lexer_t lexer = xmalloc(sizeof(struct lexer)); - - lexer->tokens_cap = tokens_cap; - lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token)); - - for(size_t i = 0; i < tokens_cap; i++) { - lexer->tokens[i].type = TOKEN_TOKENS; - } - lexer_reset(lexer); return lexer; } @@ -98,29 +90,22 @@ lexer_t lexer_create(size_t tokens_cap) void lexer_destroy(lexer_t lexer) { if(!lexer) return; - - if(lexer->tokens) { - for(size_t i = 0; i < lexer->ntokens; i++) - { - struct token *token = &lexer->tokens[i]; - - switch(lexer->tokens[i].type) { - MANAGE_TOKEN_TBL(CASE_FREE, token); - default: - err("lexer_reset: Unknown token type given"); - break; - } - } - free(lexer->tokens); - } - free(lexer); } void lexer_reset(lexer_t lexer) { - for(size_t i = 0; i < lexer->tokens_cap; i++) { - struct token *token = &lexer->tokens[i]; + memset(lexer->iden, 0, LEXER_IDEN_CAP); + lexer->iden_sz = 0; + + lexer->inside_string = false; + lexer->scope_depth = 0; +} + +void toklist_reset(toklist_t *toklist) +{ + for(size_t i = 0; i < toklist->ntokens; i++) { + struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_FREE, token); @@ -132,26 +117,21 @@ void lexer_reset(lexer_t lexer) token->type = TOKEN_TOKENS; token->value = NULL; } - - lexer->ntokens = 0; - - memset(lexer->iden, 0, LEXER_IDEN_CAP); - lexer->iden_sz = 0; - - lexer->inside_string = 0; + + toklist->ntokens = 0; } // print based on the given way to print #define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break; -void lexer_print_tokens(lexer_t lexer) +void toklist_print(toklist_t *toklist) { // for the printing (see MANAGE_TOKEN_TBL) char buf[LEXER_IDEN_CAP]; size_t buf_sz = LEXER_IDEN_CAP; - for(size_t i = 0; i < lexer->ntokens; i++) { - struct token *token = &lexer->tokens[i]; + for(size_t i = 0; i < toklist->ntokens; i++) { + struct token *token = &toklist->tokens[i]; switch(token->type) { MANAGE_TOKEN_TBL(CASE_PRINT, token); @@ -162,11 +142,17 @@ void lexer_print_tokens(lexer_t lexer) } } +bool lexer_has_finished(lexer_t lexer) +{ + if(!lexer->inside_string && lexer->scope_depth == 0) return true; + return false; +} + // ---------- Callback Functions ----------- // -static int on_double_quote(lexer_t lexer) +static int on_double_quote(lexer_t lexer, toklist_t *toklist) { - int ret = on_generic_separator(lexer, TOKEN_TOKENS); + int ret = on_generic_separator(lexer, toklist, TOKEN_TOKENS); if(ret < 0) { return ret; } else if(ret == 0) { @@ -174,7 +160,7 @@ static int on_double_quote(lexer_t lexer) return 1; } - if(token_add_iden(lexer)) { + if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } @@ -183,19 +169,19 @@ static int on_double_quote(lexer_t lexer) return 0; } -static int on_generic_separator(lexer_t lexer, enum token_enum type) +static int on_generic_separator(lexer_t lexer, toklist_t *toklist, enum token_enum type) { if(lexer->inside_string) { return 1; } - if(token_add_iden(lexer)) { + if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) { err("token_add_iden: failed"); return -1; } if(type != TOKEN_TOKENS) { - if(token_add(lexer, type) < 0) { + if(token_add(toklist, type) < 0) { err("token_add: failed"); return -1; } @@ -206,39 +192,39 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type) // ---------- Token Functions ----------- // -static int token_add(lexer_t lexer, enum token_enum type) +static int token_add(toklist_t *toklist, enum token_enum type) { - if(lexer->ntokens >= lexer->tokens_cap) { - err("tokens_cap of %ld has been reached", lexer->tokens_cap); + if(toklist->ntokens >= LEXER_TOK_CAP) { + err("tokens_cap of %ld has been reached", toklist->ntokens); return -1; } - lexer->tokens[lexer->ntokens].type = type; - return lexer->ntokens++; + toklist->tokens[toklist->ntokens].type = type; + return toklist->ntokens++; } -static int token_add_iden(lexer_t lexer) +static int token_add_iden(toklist_t *toklist, char *iden, size_t *iden_sz) { int ret = 1; - if(!lexer->iden_sz) return 0; + if(*iden_sz == 0) return 0; - int i = token_add(lexer, TOKEN_VALUE); + int i = token_add(toklist, TOKEN_VALUE); if(i < 0) { err("token_add: failed"); goto exit; } - value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret); + value_t value = value_create(VALUE_LITERAL, iden, &ret); if(ret > 0) { - value = value_create(VALUE_SYMBOL, lexer->iden, &ret); + value = value_create(VALUE_SYMBOL, iden, &ret); } else if(ret < 0) { err("value_create: failed"); goto exit; } - lexer->tokens[i].value = value; + toklist->tokens[i].value = value; exit: - memset(lexer->iden, 0, lexer->iden_sz); - lexer->iden_sz = 0; + memset(iden, 0, *iden_sz); + *iden_sz = 0; return ret; } -- cgit v1.2.3