aboutsummaryrefslogtreecommitdiff
path: root/dict.c
diff options
context:
space:
mode:
Diffstat (limited to 'dict.c')
-rw-r--r--dict.c188
1 files changed, 0 insertions, 188 deletions
diff --git a/dict.c b/dict.c
deleted file mode 100644
index 45ddf38..0000000
--- a/dict.c
+++ /dev/null
@@ -1,188 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-
-extern const struct string_token {
- char *s;
- int t;
-} strings[];
-extern const size_t nstrings;
-
-extern const uint8_t char_to_bit[];
-
-struct level {
- uint64_t bit_mask;
- uint64_t *token_masks;
-};
-
-#define MAPPED_CHARS 32
-static struct level start_level = {0};
-static struct level *bit_to_ptr[MAPPED_CHARS] = {0};
-static size_t num_levels;
-
-#define CHAR_TO_PTR(c) (bit_to_ptr[char_to_bit[c]])
-#define popcount(x) (__builtin_popcount(x))
-
-int dict_compile(void)
-{
- // max number of levels
- for(size_t i = 0; i < nstrings; i++)
- if(strlen(strings[i].s) > num_levels) num_levels = strlen(strings[i].s);
-
- // allocated levels
- for(size_t i = 0; i < MAPPED_CHARS; i++) {
- bit_to_ptr[i] = calloc(num_levels, sizeof(*bit_to_ptr[i]));
- if(!bit_to_ptr[i]) return 1;
- }
-
- // BUG: everything is repeated for the start_level
-
- // populate bit_masks
- for(size_t i = 0; i < nstrings; i++) {
- struct level *l = &start_level;
- for(size_t j = 0; j < strlen(strings[i].s)+1; j++) {
- uint8_t bit = char_to_bit[strings[i].s[j]];
-
- l->bit_mask |= 1 << bit;
- l = &bit_to_ptr[bit][j];
- }
- }
-
- // allocate token_masks
- // NOTE: start_level alloc'd many times, so realloc is used
- for(size_t i = 0; i < MAPPED_CHARS; i++) {
- struct level *l = &start_level;
- for(size_t j = 0; j < num_levels + 1; j++) {
- l->token_masks = realloc(l->token_masks, popcount(l->bit_mask)
- * sizeof(*l->token_masks));
- memset(l->token_masks, 0, popcount(l->bit_mask)
- * sizeof(*l->token_masks));
-
- l = &bit_to_ptr[i][j];
- }
- }
-
- // populate token_masks
- for(size_t i = 0; i < nstrings; i++) {
- struct level *l = &start_level;
- for(size_t j = 0; j < strlen(strings[i].s)+1; j++) {
- uint8_t bit = char_to_bit[strings[i].s[j]];
- uint8_t idx = popcount(l->bit_mask & ((1 << bit) - 1));
-
- l->token_masks[idx] |= 1 << strings[i].t;
- l = &bit_to_ptr[bit][j];
- }
- }
-
- return 0;
-}
-
-void dict_print(void)
-{
- for(size_t i = 0; i < 256; i++)
- for(size_t j = 0; j < num_levels; j++)
- if(CHAR_TO_PTR(i)[j].bit_mask) {
- printf("%c, %zu, %32lb ", (char)i, j, CHAR_TO_PTR(i)[j].bit_mask);
-
- printf("{ ");
- for(size_t k = 0; k < popcount(CHAR_TO_PTR(i)[j].bit_mask); k++)
- printf("%lb ", CHAR_TO_PTR(i)[j].token_masks[k]);
- printf(" }\n");
- }
-
-
- printf(" %32lb ", start_level.bit_mask);
- printf("{ ");
- for(size_t k = 0; k < popcount(start_level.bit_mask); k++)
- printf("%lb ", start_level.token_masks[k]);
- printf(" }\n");
-
- printf(" %32s\n", "zyxwvutsrqponmlkjihgfedcbaE ");
-}
-
-void dict_free(void)
-{
- free(start_level.token_masks);
- for(size_t i = 0; i < MAPPED_CHARS; i++) {
- for(size_t j = 0; j < num_levels; j++) {
- if(bit_to_ptr[i][j].token_masks)
- free(bit_to_ptr[i][j].token_masks);
- }
- free(bit_to_ptr[i]);
- }
-}
-
-int dict_check(char *string)
-{
- uint64_t token_mask = ~(uint64_t)0;
-
- for(size_t i = 0; i < strlen(string) + 1; i++) {
- struct level *l = (i == 0)
- ? &start_level
- : &bit_to_ptr[char_to_bit[string[i-1]]][i-1];
-
- uint8_t bit = char_to_bit[string[i]];
-
- if((l->bit_mask & (1 << bit)) == 0) return -1;
-
- uint64_t idx = popcount(l->bit_mask & ((1 << bit) - 1));
- token_mask &= l->token_masks[idx];
- }
-
- if(token_mask) return __builtin_ctz(token_mask);
- else return -1;
-}
-
-#ifdef _DICT_STANDALONE
-
-#define TOKENS(X) \
- X(TOKEN_NONE) \
- X(TOKEN_TEST) \
- X(TOKEN_RETARDED) \
- X(TOKEN_WOW) \
- X(TOKEN_TITS) \
- X(TOKEN_RPAREN) \
- X(TOKEN_LPAREN)
-
-#define TOKEN_ENUM(a) a,
-#define TOKEN_STRING(a) #a,
-
-enum token {
- TOKENS(TOKEN_ENUM)
-};
-
-const char * const token_to_string[] = {
- TOKENS(TOKEN_STRING)
-};
-const struct string_token strings[] = {
- {"test", TOKEN_TEST},
- {"retarded", TOKEN_RETARDED},
- {"wow", TOKEN_WOW},
- {"tits", TOKEN_TITS},
-};
-
-const size_t nstrings = 4;
-
-const uint8_t char_to_bit[256] = {
- ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
- ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
- ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
- ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
- ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
-};
-
-int main(void)
-{
- dict_compile();
-
- int t;
- if((t = dict_check("tits")) >= 0) printf("%s\n", token_to_string[t]);
- if((t = dict_check("retarded")) >= 0) printf("%s\n", token_to_string[t]);
- if((t = dict_check("test2")) >= 0) printf("%s\n", token_to_string[t]);
- if((t = dict_check("tes")) >= 0) printf("%s\n", token_to_string[t]);
-
- dict_free();
-}
-
-#endif