#include #include #include #include "common.h" #include "lexer.h" #define CH(lexer) (lexer)->str[(lexer)->str_idx] #define TOKEN_SEPARATOR_TABLE(X, l) \ X((';' == CH(l)), lexer_clear_line(l)) \ X(('(' == CH(l)), on_separator(l, TOKEN_LP)) \ X((')' == CH(l)), on_separator(l, TOKEN_RP)) \ X(('\''== CH(l)), on_separator(l, TOKEN_QUOTE)) \ X((',' == CH(l)), on_separator(l, TOKEN_UNQUOTE)) \ X(isspace(CH(l)), on_separator(l, TOKEN_NONE)) \ X(TABLE_ELSE, acc_add_char(l, CH(l))) #define SET_TYPE(lexer, ttype) (lexer)->token.type = (ttype) #define SET_VALUE(lexer, member, tvalue) (lexer)->token.value.member = (tvalue) #define TOKEN_VALUE_TABLE(X, l) \ X(is_int(l->acc, &l->token.value.num), \ SET_TYPE(l, TOKEN_INT)) \ X(is_special(l->acc, &l->token.type), \ ;) \ X(TABLE_ELSE, \ SET_TYPE(l, TOKEN_ID); \ SET_VALUE(l, id, l->acc)) #define TOKEN_SPECIALS_TABLE(X) \ X(TOKEN_DOT, ".") \ X(TOKEN_LAMBDA, "lambda") \ X(TOKEN_DEFINE, "define") \ X(TOKEN_QUOTE_FORM, "quote") \ X(TOKEN_IF, "if") #define TOKEN_VALUE_STRING_TABLE(X, tvalue) \ X(TOKEN_LP, "(") \ X(TOKEN_RP, ")") \ X(TOKEN_QUOTE, "'") \ X(TOKEN_UNQUOTE, ",") \ X(TOKEN_ID, "%s", tvalue.id) \ X(TOKEN_STR, "%s", tvalue.str) \ X(TOKEN_INT, "%d", tvalue.num) \ TOKEN_SPECIALS_TABLE(X) \ X(TOKEN_NONE, "(none)") #define TABLE_ELSE 1 #define TABLE_END {} #define CALLBACK(test, callback) \ if(test) { \ ERR_NZ(callback, r, return r); \ } else #define CALLBACK_BLIND(test, callback) \ if(test) { \ callback; \ } else #define LEN(arr) (sizeof(arr)/sizeof(*(arr))) const char * const token_type_string[] = { TOKEN_TYPES(TO_STRING) }; static int on_separator(lexer_t lexer, enum token_type type); static int acc_add_char(lexer_t lexer, char ch); static int acc_empty(lexer_t lexer); static int is_int(char *str, int *num); static int is_special(char *str, enum token_type *type); lexer_t lexer_create(FILE *fp) { lexer_t lexer = malloc(sizeof(*lexer)); lexer->fp = fp; lexer->line = 0; lexer->str_idx = 0; lexer->acc_idx = 0; memset(lexer->acc, 0, sizeof(lexer->acc)); memset(lexer->str, 0, sizeof(lexer->str)); lexer->token.type = TOKEN_NONE; return lexer; } void lexer_destroy(lexer_t lexer) { if(!lexer) return; free(lexer); } int lexer_clear_line(lexer_t lexer) { lexer->str[lexer->str_idx] = '\0'; return 0; } int lexer_token_next(lexer_t lexer, struct token *token) { if(lexer->acc_idx == 0 && lexer->acc[0] != '\0') { memset(lexer->acc, 0, sizeof(lexer->acc)); } while(lexer->token.type == TOKEN_NONE) { if(lexer->str[lexer->str_idx] == '\0') { ERR_Z(fgets(lexer->str, LEN(lexer->str), lexer->fp), return -EIO); lexer->str_idx = 0; lexer->line++; } TOKEN_SEPARATOR_TABLE(CALLBACK, lexer) TABLE_END; } *token = lexer->token; lexer->token.type = TOKEN_NONE; return 0; } int token_value_string(struct token *token, size_t buf_sz, char *buf) { #define AS_STRING(ttype, ...) \ case ttype: return snprintf(buf, buf_sz, __VA_ARGS__); switch(token->type) { TOKEN_VALUE_STRING_TABLE(AS_STRING, token->value); } return 0; } #define STR_ALLOC_COPY(dest, str) do { \ size_t len = strlen(str) + 1; \ dest = malloc(len); \ memcpy((dest), (str), len); \ } while(0) void token_clone(struct token *dest, struct token *src) { dest->type = src->type; switch(src->type) { case TOKEN_ID: STR_ALLOC_COPY(dest->value.id, src->value.id); return; case TOKEN_STR: STR_ALLOC_COPY(dest->value.str, src->value.str); return; case TOKEN_INT: dest->value.num = src->value.num; return; default: return; } } void token_dealloc(struct token *token) { switch(token->type) { case TOKEN_ID: free(token->value.id); return; case TOKEN_STR: free(token->value.str); return; default: return; } } static int on_separator(lexer_t lexer, enum token_type type) { if(lexer->acc_idx > 0) return acc_empty(lexer); lexer->token.type = type; lexer->str_idx++; return 0; } static int acc_add_char(lexer_t lexer, char ch) { if(lexer->acc_idx >= LEN(lexer->acc) - 1) { return -ENAMETOOLONG; } lexer->acc[lexer->acc_idx++] = ch; lexer->str_idx++; return 0; } static int acc_empty(lexer_t lexer) { TOKEN_VALUE_TABLE(CALLBACK_BLIND, lexer) TABLE_END; lexer->acc_idx = 0; return 0; } static int is_int(char *str, int *num) { char *endptr = str; long _num = strtol(str, &endptr, 10); if(*endptr != '\0') return 0; *num = (int)_num; return 1; } static int is_special(char *str, enum token_type *type) { #define IS_SPECIAL(ttype, sstr) \ if(strcmp(sstr, str) == 0) { \ *type = ttype; \ return 1; \ } else TOKEN_SPECIALS_TABLE(IS_SPECIAL) TABLE_END; return 0; }