aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2024-08-23 19:55:13 +0300
committerkartofen <mladenovnasko0@gmail.com>2024-08-23 19:55:13 +0300
commit68a62ad356603d64d537e231f06b5d9445e79abe (patch)
tree3682d6b607fed96eafaf7e218d85a03fbc71d914 /src/lexer.c
usefull commit message
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c174
1 files changed, 174 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..407be25
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,174 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "common.h"
+#include "lexer.h"
+
+#define CH(lexer) (lexer)->str[(lexer)->str_idx]
+#define TOKEN_SEPARATOR_TABLE(X, l) \
+ X(('(' == CH(l)), on_separator(l, TOKEN_LP)) \
+ X((')' == CH(l)), on_separator(l, TOKEN_RP)) \
+ X(isspace(CH(l)), on_separator(l, TOKEN_NONE)) \
+ X(TABLE_ELSE, acc_add_char(l, CH(l)))
+
+#define SET_TYPE(lexer, ttype) (lexer)->token.type = (ttype)
+#define SET_VALUE(lexer, member, tvalue) (lexer)->token.value.member = (tvalue)
+#define TOKEN_VALUE_TABLE(X, l) \
+ X(is_int(l->acc, &l->token.value.num), \
+ SET_TYPE(l, TOKEN_INT)) \
+ X(is_special(l->acc, &l->token.type), \
+ ;) \
+ X(TABLE_ELSE, \
+ SET_TYPE(l, TOKEN_ID); \
+ SET_VALUE(l, id, l->acc))
+
+#define TOKEN_SPECIALS_TABLE(X) \
+ X(TOKEN_DOT, ".") \
+ X(TOKEN_QUOTE, "'") \
+ X(TOKEN_UNQUOTE, ",") \
+ X(TOKEN_LAMBDA, "lambda") \
+ X(TOKEN_DEFINE, "define") \
+ X(TOKEN_QUOTE_FORM, "quote")
+
+#define TOKEN_VALUE_STRING_TABLE(X, tvalue) \
+ X(TOKEN_LP, "(") \
+ X(TOKEN_RP, ")") \
+ X(TOKEN_ID, "%s", tvalue.id) \
+ X(TOKEN_STR, "%s", tvalue.str) \
+ X(TOKEN_INT, "%d", tvalue.num) \
+ TOKEN_SPECIALS_TABLE(X) \
+ X(TOKEN_NONE, "(none)")
+
+#define TABLE_ELSE 1
+#define TABLE_END {}
+
+#define CALLBACK(test, callback) \
+ if(test) { \
+ ERR_NZ(callback, r, return r); \
+ } else
+#define CALLBACK_BLIND(test, callback) \
+ if(test) { \
+ callback; \
+ } else
+
+#define LEN(arr) (sizeof(arr)/sizeof(*(arr)))
+
+const char * const token_type_string[] = {
+ TOKEN_TYPES(TO_STRING)
+};
+
+static int on_separator(lexer_t lexer, enum token_type type);
+static int acc_add_char(lexer_t lexer, char ch);
+static int acc_empty(lexer_t lexer);
+
+static int is_int(char *str, int *num);
+static int is_special(char *str, enum token_type *type);
+
+lexer_t lexer_create(FILE *fp)
+{
+ lexer_t lexer = malloc(sizeof(*lexer));
+ lexer->fp = fp;
+ lexer->line = 0;
+ lexer->str_idx = 0;
+ lexer->acc_idx = 0;
+ memset(lexer->acc, 0, sizeof(lexer->acc));
+ memset(lexer->str, 0, sizeof(lexer->str));
+
+ lexer->token.type = TOKEN_NONE;
+
+ return lexer;
+}
+
+void lexer_destroy(lexer_t lexer)
+{
+ if(!lexer) return;
+ free(lexer);
+}
+
+int lexer_token_next(lexer_t lexer, struct token *token)
+{
+ if(lexer->acc_idx == 0 && lexer->acc[0] != '\0') {
+ memset(lexer->acc, 0, sizeof(lexer->acc));
+ }
+
+ while(lexer->token.type == TOKEN_NONE)
+ {
+ if(lexer->str[lexer->str_idx] == '\0') {
+ ERR_Z(fgets(lexer->str, LEN(lexer->str), lexer->fp), return -EIO);
+ lexer->str_idx = 0;
+ lexer->line++;
+ }
+
+ TOKEN_SEPARATOR_TABLE(CALLBACK, lexer) TABLE_END;
+ }
+
+ *token = lexer->token;
+ lexer->token.type = TOKEN_NONE;
+ return 0;
+}
+
+int token_value_string(struct token *token, size_t buf_sz, char *buf)
+{
+#define AS_STRING(ttype, ...) \
+ case ttype: return snprintf(buf, buf_sz, __VA_ARGS__);
+
+ switch(token->type) {
+ TOKEN_VALUE_STRING_TABLE(AS_STRING, token->value);
+ }
+
+ return 0;
+}
+
+static int on_separator(lexer_t lexer, enum token_type type)
+{
+ if(lexer->acc_idx > 0) return acc_empty(lexer);
+
+ lexer->token.type = type;
+ lexer->str_idx++;
+ return 0;
+}
+
+static int acc_add_char(lexer_t lexer, char ch)
+{
+ if(lexer->acc_idx >= LEN(lexer->acc) - 1) {
+ return -ENAMETOOLONG;
+ }
+
+ lexer->acc[lexer->acc_idx++] = ch;
+ lexer->str_idx++;
+
+ return 0;
+}
+
+static int acc_empty(lexer_t lexer)
+{
+ TOKEN_VALUE_TABLE(CALLBACK_BLIND, lexer) TABLE_END;
+ lexer->acc_idx = 0;
+
+ return 0;
+}
+
+static int is_int(char *str, int *num)
+{
+ char *endptr = str;
+ long _num = strtol(str, &endptr, 10);
+
+ if(*endptr != '\0') return 0;
+
+ *num = (int)_num;
+ return 1;
+}
+
+static int is_special(char *str, enum token_type *type)
+{
+#define IS_SPECIAL(ttype, sstr) \
+ if(strcmp(sstr, str) == 0) { \
+ *type = ttype; \
+ return 1; \
+ } else
+
+ TOKEN_SPECIALS_TABLE(IS_SPECIAL) TABLE_END;
+
+ return 0;
+}