aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c126
1 files changed, 56 insertions, 70 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 9659bb4..d71fd2f 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -11,27 +11,27 @@
// saves a token with no data
// returns the index of the saved token; < 0 on fail
-static int token_add(lexer_t lexer, enum token_enum type);
+static int token_add(toklist_t *tokens, enum token_enum type);
-// saves a token with the current identifier (lexer->iden)
+// saves a token with an identifier (lexer -> iden)
// returns 0 on success
-static int token_add_iden(lexer_t lexer);
+static int token_add_iden(toklist_t *tokens, char *iden, size_t *iden_sz);
// used for tokens that separate things
// if type is TOKEN_TOKENS, then no empty token will be saved
// returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden)
-static int on_generic_separator(lexer_t lexer, enum token_enum type);
-static int on_double_quote(lexer_t lexer);
+static int on_generic_separator(lexer_t lexer, toklist_t *tokens, enum token_enum type);
+static int on_double_quote(lexer_t lexer, toklist_t *tokens);
#define EQ(ch) ch ==
-#define SEPARATOR_CALLBACK_TBL(X, lexer) \
+#define SEPARATOR_CALLBACK_TBL(X, ...) \
/* X(test, what to execute if the test succeeds) */ \
- X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
- X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
- X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
- X(EQ('"'), on_double_quote(lexer)) \
- X(isspace, on_generic_separator(lexer, TOKEN_TOKENS))
+ X(EQ('('), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_OPEN); lexer->scope_depth++) \
+ X(EQ(')'), on_generic_separator(__VA_ARGS__, TOKEN_PARENTHS_CLOSE); lexer->scope_depth--) \
+ X(EQ('\''), on_generic_separator(__VA_ARGS__, TOKEN_SPECIAL_QUOTE)) \
+ X(EQ('"'), on_double_quote(__VA_ARGS__)) \
+ X(isspace, on_generic_separator(__VA_ARGS__, TOKEN_TOKENS))
#define FN(fn, arg) "%s", fn(arg, buf, buf_sz)
@@ -58,13 +58,13 @@ static int on_double_quote(lexer_t lexer);
} \
} else
-int lexer_tokenize(lexer_t lexer, char *str, size_t len)
-{
+int lexer_tokenize(lexer_t lexer, toklist_t *toklist, char *str, size_t len)
+{
int callback_ret = 0;
for(size_t i = 0; i < len; i++)
{
- SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {}
+ SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer, toklist);
if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated
err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz);
@@ -78,17 +78,9 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)
return 0;
}
-lexer_t lexer_create(size_t tokens_cap)
+lexer_t lexer_create()
{
lexer_t lexer = xmalloc(sizeof(struct lexer));
-
- lexer->tokens_cap = tokens_cap;
- lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token));
-
- for(size_t i = 0; i < tokens_cap; i++) {
- lexer->tokens[i].type = TOKEN_TOKENS;
- }
-
lexer_reset(lexer);
return lexer;
}
@@ -98,29 +90,22 @@ lexer_t lexer_create(size_t tokens_cap)
void lexer_destroy(lexer_t lexer)
{
if(!lexer) return;
-
- if(lexer->tokens) {
- for(size_t i = 0; i < lexer->ntokens; i++)
- {
- struct token *token = &lexer->tokens[i];
-
- switch(lexer->tokens[i].type) {
- MANAGE_TOKEN_TBL(CASE_FREE, token);
- default:
- err("lexer_reset: Unknown token type given");
- break;
- }
- }
- free(lexer->tokens);
- }
-
free(lexer);
}
void lexer_reset(lexer_t lexer)
{
- for(size_t i = 0; i < lexer->tokens_cap; i++) {
- struct token *token = &lexer->tokens[i];
+ memset(lexer->iden, 0, LEXER_IDEN_CAP);
+ lexer->iden_sz = 0;
+
+ lexer->inside_string = false;
+ lexer->scope_depth = 0;
+}
+
+void toklist_reset(toklist_t *toklist)
+{
+ for(size_t i = 0; i < toklist->ntokens; i++) {
+ struct token *token = &toklist->tokens[i];
switch(token->type) {
MANAGE_TOKEN_TBL(CASE_FREE, token);
@@ -132,26 +117,21 @@ void lexer_reset(lexer_t lexer)
token->type = TOKEN_TOKENS;
token->value = NULL;
}
-
- lexer->ntokens = 0;
-
- memset(lexer->iden, 0, LEXER_IDEN_CAP);
- lexer->iden_sz = 0;
-
- lexer->inside_string = 0;
+
+ toklist->ntokens = 0;
}
// print based on the given way to print
#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break;
-void lexer_print_tokens(lexer_t lexer)
+void toklist_print(toklist_t *toklist)
{
// for the printing (see MANAGE_TOKEN_TBL)
char buf[LEXER_IDEN_CAP];
size_t buf_sz = LEXER_IDEN_CAP;
- for(size_t i = 0; i < lexer->ntokens; i++) {
- struct token *token = &lexer->tokens[i];
+ for(size_t i = 0; i < toklist->ntokens; i++) {
+ struct token *token = &toklist->tokens[i];
switch(token->type) {
MANAGE_TOKEN_TBL(CASE_PRINT, token);
@@ -162,11 +142,17 @@ void lexer_print_tokens(lexer_t lexer)
}
}
+bool lexer_has_finished(lexer_t lexer)
+{
+ if(!lexer->inside_string && lexer->scope_depth == 0) return true;
+ return false;
+}
+
// ---------- Callback Functions ----------- //
-static int on_double_quote(lexer_t lexer)
+static int on_double_quote(lexer_t lexer, toklist_t *toklist)
{
- int ret = on_generic_separator(lexer, TOKEN_TOKENS);
+ int ret = on_generic_separator(lexer, toklist, TOKEN_TOKENS);
if(ret < 0) {
return ret;
} else if(ret == 0) {
@@ -174,7 +160,7 @@ static int on_double_quote(lexer_t lexer)
return 1;
}
- if(token_add_iden(lexer)) {
+ if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) {
err("token_add_iden: failed");
return -1;
}
@@ -183,19 +169,19 @@ static int on_double_quote(lexer_t lexer)
return 0;
}
-static int on_generic_separator(lexer_t lexer, enum token_enum type)
+static int on_generic_separator(lexer_t lexer, toklist_t *toklist, enum token_enum type)
{
if(lexer->inside_string) {
return 1;
}
- if(token_add_iden(lexer)) {
+ if(token_add_iden(toklist, lexer->iden, &lexer->iden_sz)) {
err("token_add_iden: failed");
return -1;
}
if(type != TOKEN_TOKENS) {
- if(token_add(lexer, type) < 0) {
+ if(token_add(toklist, type) < 0) {
err("token_add: failed");
return -1;
}
@@ -206,39 +192,39 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type)
// ---------- Token Functions ----------- //
-static int token_add(lexer_t lexer, enum token_enum type)
+static int token_add(toklist_t *toklist, enum token_enum type)
{
- if(lexer->ntokens >= lexer->tokens_cap) {
- err("tokens_cap of %ld has been reached", lexer->tokens_cap);
+ if(toklist->ntokens >= LEXER_TOK_CAP) {
+ err("tokens_cap of %ld has been reached", toklist->ntokens);
return -1;
}
- lexer->tokens[lexer->ntokens].type = type;
- return lexer->ntokens++;
+ toklist->tokens[toklist->ntokens].type = type;
+ return toklist->ntokens++;
}
-static int token_add_iden(lexer_t lexer)
+static int token_add_iden(toklist_t *toklist, char *iden, size_t *iden_sz)
{
int ret = 1;
- if(!lexer->iden_sz) return 0;
+ if(*iden_sz == 0) return 0;
- int i = token_add(lexer, TOKEN_VALUE);
+ int i = token_add(toklist, TOKEN_VALUE);
if(i < 0) {
err("token_add: failed");
goto exit;
}
- value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret);
+ value_t value = value_create(VALUE_LITERAL, iden, &ret);
if(ret > 0) {
- value = value_create(VALUE_SYMBOL, lexer->iden, &ret);
+ value = value_create(VALUE_SYMBOL, iden, &ret);
} else if(ret < 0) {
err("value_create: failed");
goto exit;
}
- lexer->tokens[i].value = value;
+ toklist->tokens[i].value = value;
exit:
- memset(lexer->iden, 0, lexer->iden_sz);
- lexer->iden_sz = 0;
+ memset(iden, 0, *iden_sz);
+ *iden_sz = 0;
return ret;
}