diff options
author | kartofen <mladenovnasko0@gmail.com> | 2024-08-23 19:55:13 +0300 |
---|---|---|
committer | kartofen <mladenovnasko0@gmail.com> | 2024-08-23 19:55:13 +0300 |
commit | 68a62ad356603d64d537e231f06b5d9445e79abe (patch) | |
tree | 3682d6b607fed96eafaf7e218d85a03fbc71d914 /src/lexer.c |
usefull commit message
Diffstat (limited to 'src/lexer.c')
-rw-r--r-- | src/lexer.c | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..407be25 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,174 @@ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "common.h" +#include "lexer.h" + +#define CH(lexer) (lexer)->str[(lexer)->str_idx] +#define TOKEN_SEPARATOR_TABLE(X, l) \ + X(('(' == CH(l)), on_separator(l, TOKEN_LP)) \ + X((')' == CH(l)), on_separator(l, TOKEN_RP)) \ + X(isspace(CH(l)), on_separator(l, TOKEN_NONE)) \ + X(TABLE_ELSE, acc_add_char(l, CH(l))) + +#define SET_TYPE(lexer, ttype) (lexer)->token.type = (ttype) +#define SET_VALUE(lexer, member, tvalue) (lexer)->token.value.member = (tvalue) +#define TOKEN_VALUE_TABLE(X, l) \ + X(is_int(l->acc, &l->token.value.num), \ + SET_TYPE(l, TOKEN_INT)) \ + X(is_special(l->acc, &l->token.type), \ + ;) \ + X(TABLE_ELSE, \ + SET_TYPE(l, TOKEN_ID); \ + SET_VALUE(l, id, l->acc)) + +#define TOKEN_SPECIALS_TABLE(X) \ + X(TOKEN_DOT, ".") \ + X(TOKEN_QUOTE, "'") \ + X(TOKEN_UNQUOTE, ",") \ + X(TOKEN_LAMBDA, "lambda") \ + X(TOKEN_DEFINE, "define") \ + X(TOKEN_QUOTE_FORM, "quote") + +#define TOKEN_VALUE_STRING_TABLE(X, tvalue) \ + X(TOKEN_LP, "(") \ + X(TOKEN_RP, ")") \ + X(TOKEN_ID, "%s", tvalue.id) \ + X(TOKEN_STR, "%s", tvalue.str) \ + X(TOKEN_INT, "%d", tvalue.num) \ + TOKEN_SPECIALS_TABLE(X) \ + X(TOKEN_NONE, "(none)") + +#define TABLE_ELSE 1 +#define TABLE_END {} + +#define CALLBACK(test, callback) \ + if(test) { \ + ERR_NZ(callback, r, return r); \ + } else +#define CALLBACK_BLIND(test, callback) \ + if(test) { \ + callback; \ + } else + +#define LEN(arr) (sizeof(arr)/sizeof(*(arr))) + +const char * const token_type_string[] = { + TOKEN_TYPES(TO_STRING) +}; + +static int on_separator(lexer_t lexer, enum token_type type); +static int acc_add_char(lexer_t lexer, char ch); +static int acc_empty(lexer_t lexer); + +static int is_int(char *str, int *num); +static int is_special(char *str, enum token_type *type); + +lexer_t lexer_create(FILE *fp) +{ + lexer_t lexer = malloc(sizeof(*lexer)); + lexer->fp = fp; + lexer->line = 0; + lexer->str_idx = 0; + lexer->acc_idx = 0; + memset(lexer->acc, 0, sizeof(lexer->acc)); + memset(lexer->str, 0, sizeof(lexer->str)); + + lexer->token.type = TOKEN_NONE; + + return lexer; +} + +void lexer_destroy(lexer_t lexer) +{ + if(!lexer) return; + free(lexer); +} + +int lexer_token_next(lexer_t lexer, struct token *token) +{ + if(lexer->acc_idx == 0 && lexer->acc[0] != '\0') { + memset(lexer->acc, 0, sizeof(lexer->acc)); + } + + while(lexer->token.type == TOKEN_NONE) + { + if(lexer->str[lexer->str_idx] == '\0') { + ERR_Z(fgets(lexer->str, LEN(lexer->str), lexer->fp), return -EIO); + lexer->str_idx = 0; + lexer->line++; + } + + TOKEN_SEPARATOR_TABLE(CALLBACK, lexer) TABLE_END; + } + + *token = lexer->token; + lexer->token.type = TOKEN_NONE; + return 0; +} + +int token_value_string(struct token *token, size_t buf_sz, char *buf) +{ +#define AS_STRING(ttype, ...) \ + case ttype: return snprintf(buf, buf_sz, __VA_ARGS__); + + switch(token->type) { + TOKEN_VALUE_STRING_TABLE(AS_STRING, token->value); + } + + return 0; +} + +static int on_separator(lexer_t lexer, enum token_type type) +{ + if(lexer->acc_idx > 0) return acc_empty(lexer); + + lexer->token.type = type; + lexer->str_idx++; + return 0; +} + +static int acc_add_char(lexer_t lexer, char ch) +{ + if(lexer->acc_idx >= LEN(lexer->acc) - 1) { + return -ENAMETOOLONG; + } + + lexer->acc[lexer->acc_idx++] = ch; + lexer->str_idx++; + + return 0; +} + +static int acc_empty(lexer_t lexer) +{ + TOKEN_VALUE_TABLE(CALLBACK_BLIND, lexer) TABLE_END; + lexer->acc_idx = 0; + + return 0; +} + +static int is_int(char *str, int *num) +{ + char *endptr = str; + long _num = strtol(str, &endptr, 10); + + if(*endptr != '\0') return 0; + + *num = (int)_num; + return 1; +} + +static int is_special(char *str, enum token_type *type) +{ +#define IS_SPECIAL(ttype, sstr) \ + if(strcmp(sstr, str) == 0) { \ + *type = ttype; \ + return 1; \ + } else + + TOKEN_SPECIALS_TABLE(IS_SPECIAL) TABLE_END; + + return 0; +} |