aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c334
1 files changed, 121 insertions, 213 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 71eed79..9659bb4 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -3,64 +3,51 @@
#include <ctype.h>
#include <errno.h>
-// TODO: handle escaped quotes
#include "common.h"
#include "lexer.h"
+#include "value.h"
+
+// TODO: handle escaping
// saves a token with no data
// returns the index of the saved token; < 0 on fail
-static int save_empty_token(lexer_t lexer, enum token_enum type);
+static int token_add(lexer_t lexer, enum token_enum type);
-// saves a token with data which is the current identifier (lexer->iden)
+// saves a token with the current identifier (lexer->iden)
// returns 0 on success
-static int save_current_identifier(lexer_t lexer);
+static int token_add_iden(lexer_t lexer);
// used for tokens that separate things
// if type is TOKEN_TOKENS, then no empty token will be saved
// returns 0 on success, < 0 on fail, and > 0 to skip the token (add it in iden)
static int on_generic_separator(lexer_t lexer, enum token_enum type);
-static int on_quote(lexer_t lexer);
-static int on_dot(lexer_t lexer);
-
-// try to convert the identifier (lexer->iden) to a given type
-// returns > 0 on sucess, 0 on fail (iden isnt the given type),
-// and < 0 on error
-static int try_str(lexer_t lexer);
-static int try_int(lexer_t lexer);
-static int try_float(lexer_t lexer);
-static int try_symbol(lexer_t lexer);
-
-#define SEPARATOR_CALLBACK_TBL(X, lexer) \
- X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
- X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
- X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
- X(EQ('.'), on_dot(lexer)) \
- X(EQ('"'), on_quote(lexer)) \
- X(FN(isspace), on_generic_separator(lexer, TOKEN_TOKENS))
-
-// X(token type, what to free, how to print on screen)
-#define TOKEN_TYPES_INFO(X, token) \
- X(TOKEN_PARENTHS_OPEN, NULL, "(") \
- X(TOKEN_PARENTHS_CLOSE, NULL, ")") \
- X(TOKEN_SPECIAL_QUOTE, NULL, "'") \
- X(TOKEN_SPECIAL_DOT, NULL, ".") \
- X(TOKEN_LITERAL_STRING, token->string, "'%s'", token->string) \
- X(TOKEN_LITERAL_NUM_INT, NULL, "'%ld'", token->num_int) \
- X(TOKEN_LITERAL_NUM_FLOAT, NULL, "'%f'", token->num_float) \
- X(TOKEN_SYMBOL, token->symbol, "'%s'", token->symbol)
-
-#define IDENTIFY_IDENTIFIER_LIST(X) \
- X(try_str) \
- X(try_int) \
- X(try_float) \
- X(try_symbol)
+static int on_double_quote(lexer_t lexer);
#define EQ(ch) ch ==
-#define FN(f) f
+
+#define SEPARATOR_CALLBACK_TBL(X, lexer) \
+/* X(test, what to execute if the test succeeds) */ \
+ X(EQ('('), on_generic_separator(lexer, TOKEN_PARENTHS_OPEN)) \
+ X(EQ(')'), on_generic_separator(lexer, TOKEN_PARENTHS_CLOSE)) \
+ X(EQ('\''), on_generic_separator(lexer, TOKEN_SPECIAL_QUOTE)) \
+ X(EQ('"'), on_double_quote(lexer)) \
+ X(isspace, on_generic_separator(lexer, TOKEN_TOKENS))
+
+#define FN(fn, arg) "%s", fn(arg, buf, buf_sz)
+
+#define MANAGE_TOKEN_TBL(X, token) \
+/* X(type, how to free, how to print) */ \
+ X(TOKEN_PARENTHS_OPEN, ;, "(") \
+ X(TOKEN_PARENTHS_CLOSE, ;, ")") \
+ X(TOKEN_SPECIAL_QUOTE, ;, "'") \
+ X(TOKEN_VALUE, value_destroy(token->value), FN(value_string, token->value)) \
+ X(TOKEN_TOKENS, ;, "") \
+
+// ---------- Exported Functions ---------- //
// makes an if-else chain to test the character
-// agains the seperator callback table
-#define CHECK_SEPERATOR_AND_CALLBACK(test_func, callback) \
+// agains the separator callback table
+#define CHECK_SEPARATOR_AND_CALLBACK(test_func, callback) \
if(test_func(str[i])) { \
callback_ret = callback; \
if(callback_ret == 0) { \
@@ -77,7 +64,7 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)
for(size_t i = 0; i < len; i++)
{
- SEPARATOR_CALLBACK_TBL(CHECK_SEPERATOR_AND_CALLBACK, lexer) {}
+ SEPARATOR_CALLBACK_TBL(CHECK_SEPARATOR_AND_CALLBACK, lexer) {}
if(lexer->iden_sz >= LEXER_IDEN_CAP - 1) { // -1 to be null-terminated
err("LEXER_IDEN_CAP of %ld reached", lexer->iden_sz);
@@ -93,49 +80,35 @@ int lexer_tokenize(lexer_t lexer, char *str, size_t len)
lexer_t lexer_create(size_t tokens_cap)
{
- lexer_t lexer = malloc(sizeof(struct lexer));
- if(!lexer) {
- err("malloc: %s", strerror(errno));
- goto fail;
- }
+ lexer_t lexer = xmalloc(sizeof(struct lexer));
- lexer->tokens = calloc(tokens_cap, sizeof(struct token));
- if(!lexer->tokens) {
- err("malloc %s", strerror(errno));
- goto fail;
- }
+ lexer->tokens_cap = tokens_cap;
+ lexer->tokens = xcalloc(lexer->tokens_cap, sizeof(struct token));
for(size_t i = 0; i < tokens_cap; i++) {
- lexer->tokens[i].symbol = NULL;
+ lexer->tokens[i].type = TOKEN_TOKENS;
}
- lexer->tokens_cap = tokens_cap;
- lexer->ntokens = 0;
-
- memset(lexer->iden, 0, LEXER_IDEN_CAP);
- lexer->iden_sz = 0;
-
- lexer->inside_string = 0;
-
+ lexer_reset(lexer);
return lexer;
-fail:
- lexer_destroy(lexer);
- return NULL;
}
-#define CASE_FREE_TOKEN(type, data, ...) \
- case type: if(data != NULL) { free(data); } break;
+#define CASE_FREE(type, free_func, ...) case type: free_func; break;
void lexer_destroy(lexer_t lexer)
{
if(!lexer) return;
if(lexer->tokens) {
- for(size_t i = 0; i < lexer->ntokens; i++) {
+ for(size_t i = 0; i < lexer->ntokens; i++)
+ {
struct token *token = &lexer->tokens[i];
- switch(token->type) {
- TOKEN_TYPES_INFO(CASE_FREE_TOKEN, token)
- default: break;
+
+ switch(lexer->tokens[i].type) {
+ MANAGE_TOKEN_TBL(CASE_FREE, token);
+ default:
+ err("lexer_reset: Unknown token type given");
+ break;
}
}
free(lexer->tokens);
@@ -144,18 +117,65 @@ void lexer_destroy(lexer_t lexer)
free(lexer);
}
-// ------------------------------------------------- //
+void lexer_reset(lexer_t lexer)
+{
+ for(size_t i = 0; i < lexer->tokens_cap; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ switch(token->type) {
+ MANAGE_TOKEN_TBL(CASE_FREE, token);
+ default:
+ err("lexer_reset: Unknown token type given");
+ break;
+ }
+
+ token->type = TOKEN_TOKENS;
+ token->value = NULL;
+ }
+
+ lexer->ntokens = 0;
+
+ memset(lexer->iden, 0, LEXER_IDEN_CAP);
+ lexer->iden_sz = 0;
+
+ lexer->inside_string = 0;
+}
+
+// print based on the given way to print
+#define CASE_PRINT(type, free_func, ...) case type: info("\n\t" #type "\n\t" __VA_ARGS__); break;
+
+void lexer_print_tokens(lexer_t lexer)
+{
+ // for the printing (see MANAGE_TOKEN_TBL)
+ char buf[LEXER_IDEN_CAP];
+ size_t buf_sz = LEXER_IDEN_CAP;
+
+ for(size_t i = 0; i < lexer->ntokens; i++) {
+ struct token *token = &lexer->tokens[i];
+
+ switch(token->type) {
+ MANAGE_TOKEN_TBL(CASE_PRINT, token);
+ default:
+ err("lexer_print_tokens: Unknown token given");
+ return;
+ }
+ }
+}
+
+// ---------- Callback Functions ----------- //
-static int on_quote(lexer_t lexer)
+static int on_double_quote(lexer_t lexer)
{
int ret = on_generic_separator(lexer, TOKEN_TOKENS);
- if(ret <= 0) { // it either failed or worked, both not inside a string
- lexer->inside_string = 1;
+ if(ret < 0) {
return ret;
+ } else if(ret == 0) {
+ lexer->inside_string = 1;
+ return 1;
}
- if(save_current_identifier(lexer)) {
- err("save_current_identifier: failed");
+ if(token_add_iden(lexer)) {
+ err("token_add_iden: failed");
return -1;
}
@@ -163,26 +183,20 @@ static int on_quote(lexer_t lexer)
return 0;
}
-static int on_dot(lexer_t lexer)
-{
- if(lexer->iden_sz != 0) return 1;
- return on_generic_separator(lexer, TOKEN_SPECIAL_DOT);
-}
-
static int on_generic_separator(lexer_t lexer, enum token_enum type)
{
if(lexer->inside_string) {
return 1;
}
- if(save_current_identifier(lexer)) {
- err("save_current_identifier: failed");
+ if(token_add_iden(lexer)) {
+ err("token_add_iden: failed");
return -1;
}
if(type != TOKEN_TOKENS) {
- if(save_empty_token(lexer, type) < 0) {
- err("save_empty_token: failed");
+ if(token_add(lexer, type) < 0) {
+ err("token_add: failed");
return -1;
}
}
@@ -190,147 +204,41 @@ static int on_generic_separator(lexer_t lexer, enum token_enum type)
return 0;
}
-static int save_empty_token(lexer_t lexer, enum token_enum type)
+// ---------- Token Functions ----------- //
+
+static int token_add(lexer_t lexer, enum token_enum type)
{
if(lexer->ntokens >= lexer->tokens_cap) {
err("tokens_cap of %ld has been reached", lexer->tokens_cap);
return -1;
}
- lexer->tokens[lexer->ntokens++].type = type;
- return lexer->ntokens - 1;
+ lexer->tokens[lexer->ntokens].type = type;
+ return lexer->ntokens++;
}
-#define CHECK_IDEN(func) \
- if((ret = func(lexer))) { \
- if(ret < 0) { \
- err(#func ": failed"); \
- goto exit; \
- } \
- } else
-
-static int save_current_identifier(lexer_t lexer)
+static int token_add_iden(lexer_t lexer)
{
int ret = 1;
+ if(!lexer->iden_sz) return 0;
- if(lexer->iden_sz != 0) {
- IDENTIFY_IDENTIFIER_LIST(CHECK_IDEN) {}
+ int i = token_add(lexer, TOKEN_VALUE);
+ if(i < 0) {
+ err("token_add: failed");
+ goto exit;
}
- ret = 0;
+ value_t value = value_create(VALUE_LITERAL, lexer->iden, &ret);
+ if(ret > 0) {
+ value = value_create(VALUE_SYMBOL, lexer->iden, &ret);
+ } else if(ret < 0) {
+ err("value_create: failed");
+ goto exit;
+ }
+
+ lexer->tokens[i].value = value;
exit:
memset(lexer->iden, 0, lexer->iden_sz);
lexer->iden_sz = 0;
return ret;
}
-
-
-// ------------------------------------------------- //
-
-static int try_str(lexer_t lexer)
-{
- if(!lexer->inside_string) return 0;
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_STRING);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].string = malloc(lexer->iden_sz+1);
- if(!lexer->tokens[i].string) {
- err("malloc: %s", strerror(errno));
- return -1;
- }
-
- memcpy(lexer->tokens[i].string, lexer->iden, lexer->iden_sz+1);
- return 1;
-}
-
-static int try_int(lexer_t lexer)
-{
- errno = ERANGE + 1; // set errno to not ERANGE
-
- char *endptr;
- long num = strtol(lexer->iden, &endptr, 10);
-
- if(*endptr != '\0') { // the whole string isn't a number
- return 0;
- }
-
- if(errno == ERANGE) {
- warn("Given integer literal %s is outside the possible range", lexer->iden);
- }
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_INT);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].num_int = num;
- return 1;
-}
-
-static int try_float(lexer_t lexer)
-{
- errno = ERANGE + 1; // set errno to not ERANGE
-
- char *endptr;
- float num = strtof(lexer->iden, &endptr);
-
- if(*endptr != '\0') { // the whole string isn't a number
- return 0;
- }
-
- if(errno == ERANGE) {
- warn("Given float literal %s is outside the possible range", lexer->iden);
- }
-
- int i = save_empty_token(lexer, TOKEN_LITERAL_NUM_FLOAT);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].num_float = num;
- return 1;
-}
-
-static int try_symbol(lexer_t lexer)
-{
- int i = save_empty_token(lexer, TOKEN_SYMBOL);
- if(i < 0) {
- err("save_empty_token: failed");
- return -1;
- }
-
- lexer->tokens[i].symbol = malloc(lexer->iden_sz+1);
- if(!lexer->tokens[i].symbol) {
- err("malloc: %s", strerror(errno));
- return -1;
- }
-
- memcpy(lexer->tokens[i].symbol, lexer->iden, lexer->iden_sz+1);
- return 1;
-}
-
-// ------------------------------------------------- //
-
-#ifdef DEBUG
-#define CASE_PRINT(type, data, ...) case type: info("\t" __VA_ARGS__); break;
-
-void lexer_print_tokens(lexer_t lexer)
-{
- for(size_t i = 0; i < lexer->ntokens; i++) {
- struct token *token = &lexer->tokens[i];
-
- info("Token %zu: %d", i, token->type);
-
- switch(token->type) {
- TOKEN_TYPES_INFO(CASE_PRINT, token);
- default: break;
- }
- }
-}
-#endif