#include #include #include #include extern const struct string_token { char *s; int t; } strings[]; extern const size_t nstrings; extern const uint8_t char_to_bit[]; struct level { uint64_t bit_mask; uint64_t *token_masks; }; #define MAPPED_CHARS 32 static struct level start_level = {0}; static struct level *bit_to_ptr[MAPPED_CHARS] = {0}; static size_t num_levels; #define CHAR_TO_PTR(c) (bit_to_ptr[char_to_bit[c]]) #define popcount(x) (__builtin_popcount(x)) int dict_compile(void) { // max number of levels for(size_t i = 0; i < nstrings; i++) if(strlen(strings[i].s) > num_levels) num_levels = strlen(strings[i].s); // allocated levels for(size_t i = 0; i < MAPPED_CHARS; i++) { bit_to_ptr[i] = calloc(num_levels, sizeof(*bit_to_ptr[i])); if(!bit_to_ptr[i]) return 1; } // BUG: everything is repeated for the start_level // populate bit_masks for(size_t i = 0; i < nstrings; i++) { struct level *l = &start_level; for(size_t j = 0; j < strlen(strings[i].s)+1; j++) { uint8_t bit = char_to_bit[strings[i].s[j]]; l->bit_mask |= 1 << bit; l = &bit_to_ptr[bit][j]; } } // allocate token_masks // NOTE: start_level alloc'd many times, so realloc is used for(size_t i = 0; i < MAPPED_CHARS; i++) { struct level *l = &start_level; for(size_t j = 0; j < num_levels + 1; j++) { l->token_masks = realloc(l->token_masks, popcount(l->bit_mask) * sizeof(*l->token_masks)); memset(l->token_masks, 0, popcount(l->bit_mask) * sizeof(*l->token_masks)); l = &bit_to_ptr[i][j]; } } // populate token_masks for(size_t i = 0; i < nstrings; i++) { struct level *l = &start_level; for(size_t j = 0; j < strlen(strings[i].s)+1; j++) { uint8_t bit = char_to_bit[strings[i].s[j]]; uint8_t idx = popcount(l->bit_mask & ((1 << bit) - 1)); l->token_masks[idx] |= 1 << strings[i].t; l = &bit_to_ptr[bit][j]; } } return 0; } void dict_print(void) { for(size_t i = 0; i < 256; i++) for(size_t j = 0; j < num_levels; j++) if(CHAR_TO_PTR(i)[j].bit_mask) { printf("%c, %zu, %32lb ", (char)i, j, CHAR_TO_PTR(i)[j].bit_mask); printf("{ "); for(size_t k = 0; k < popcount(CHAR_TO_PTR(i)[j].bit_mask); k++) printf("%lb ", CHAR_TO_PTR(i)[j].token_masks[k]); printf(" }\n"); } printf(" %32lb ", start_level.bit_mask); printf("{ "); for(size_t k = 0; k < popcount(start_level.bit_mask); k++) printf("%lb ", start_level.token_masks[k]); printf(" }\n"); printf(" %32s\n", "zyxwvutsrqponmlkjihgfedcbaE "); } void dict_free(void) { free(start_level.token_masks); for(size_t i = 0; i < MAPPED_CHARS; i++) { for(size_t j = 0; j < num_levels; j++) { if(bit_to_ptr[i][j].token_masks) free(bit_to_ptr[i][j].token_masks); } free(bit_to_ptr[i]); } } int dict_check(char *string) { uint64_t token_mask = ~(uint64_t)0; for(size_t i = 0; i < strlen(string) + 1; i++) { struct level *l = (i == 0) ? &start_level : &bit_to_ptr[char_to_bit[string[i-1]]][i-1]; uint8_t bit = char_to_bit[string[i]]; if((l->bit_mask & (1 << bit)) == 0) return -1; uint64_t idx = popcount(l->bit_mask & ((1 << bit) - 1)); token_mask &= l->token_masks[idx]; } if(token_mask) return __builtin_ctz(token_mask); else return -1; } #ifdef _DICT_STANDALONE #define TOKENS(X) \ X(TOKEN_NONE) \ X(TOKEN_TEST) \ X(TOKEN_RETARDED) \ X(TOKEN_WOW) \ X(TOKEN_TITS) \ X(TOKEN_RPAREN) \ X(TOKEN_LPAREN) #define TOKEN_ENUM(a) a, #define TOKEN_STRING(a) #a, enum token { TOKENS(TOKEN_ENUM) }; const char * const token_to_string[] = { TOKENS(TOKEN_STRING) }; const struct string_token strings[] = { {"test", TOKEN_TEST}, {"retarded", TOKEN_RETARDED}, {"wow", TOKEN_WOW}, {"tits", TOKEN_TITS}, }; const size_t nstrings = 4; const uint8_t char_to_bit[256] = { ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; int main(void) { dict_compile(); int t; if((t = dict_check("tits")) >= 0) printf("%s\n", token_to_string[t]); if((t = dict_check("retarded")) >= 0) printf("%s\n", token_to_string[t]); if((t = dict_check("test2")) >= 0) printf("%s\n", token_to_string[t]); if((t = dict_check("tes")) >= 0) printf("%s\n", token_to_string[t]); dict_free(); } #endif