diff options
| author | kartofen <mladenovnasko0@gmail.com> | 2023-03-18 14:40:47 +0200 | 
|---|---|---|
| committer | kartofen <mladenovnasko0@gmail.com> | 2023-03-18 14:40:47 +0200 | 
| commit | 574c9119cd170318e31fb966be93c01dfb388bb1 (patch) | |
| tree | a8ea9dcf86e77bf742f0bdd40e1ae08963fb9454 /encode.c | |
| parent | 047cf6811cb7c5b5f46f7357eeffb8e41a0c3b45 (diff) | |
encoding done
Diffstat (limited to 'encode.c')
| -rw-r--r-- | encode.c | 260 | 
1 files changed, 260 insertions, 0 deletions
diff --git a/encode.c b/encode.c new file mode 100644 index 0000000..9139c06 --- /dev/null +++ b/encode.c @@ -0,0 +1,260 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +typedef struct node { +    bool is_char; +    char ch; +    size_t references; + +    // to help when saving tree +    struct node *left; +    struct node *right; + +    // to help encode +    int direction; // 0 - left; 1 - right +    struct node *parent; +} node_t; + +void node_char_init(node_t *node, size_t ref, char ch); +void node_link_init(node_t *node, node_t* left, node_t* right); + +// void liner_insert(void base[.size * .nmemb + 1], +//                   size_t nmemb, size_t size, void *el, +//                   int (*compar)(const void [.size], const void [.size])); +void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)); +int compare_nodes(const void *p1, const void *p2); + +void free_encoding_table(); +void wb_flush(size_t nmemb); + +void gen_tree(FILE *fp); +size_t write_tree_shape(node_t *root); +size_t write_tree_data(node_t *root); + +size_t write_encoded(node_t *root); +void encode(FILE *fp); + + +size_t POOL_CAP = 512; +node_t *pool; +size_t pool_sz; + +size_t WRITE_BUF_CAP = 512; +char *write_buf; + +node_t *encoding_table[256] = {0}; + +void encode(FILE *fp) +{ +    // write tree // +    wb_flush(1); // flush amount of bits (written in gen_tree()) +    wb_flush(write_tree_shape(&pool[pool_sz-1])/8 + 1); +    wb_flush(write_tree_data(&pool[pool_sz-1])); + +    // encode // +    char ch; +    while((ch = fgetc(fp)) != EOF) { +        if(encoding_table[ch] == NULL) { +            fprintf(stderr, "Character %c not found encoding table\n", ch); +            exit(1); +        } +        write_encoded(encoding_table[ch]); +    } +    wb_flush(write_encoded(NULL)/8+1); +} + +int main(int argc, char **argv) +{ +    if(argc != 2) { +        fprintf(stderr, "ERROR: Not enough command line arguments\n"); +        exit(1); +    } + +    write_buf = malloc(sizeof(char) * WRITE_BUF_CAP); +    pool = malloc(sizeof(node_t) * POOL_CAP); + +    FILE *fp = fopen(argv[1], "rb"); +    if(!fp) { +        fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]); +        exit(1); +    } + +    gen_tree(fp); +    encode(fp); + +    free_encoding_table(); +    free(pool); +    free(write_buf); + +    fclose(fp); +    return 0; +} + +void gen_tree(FILE *fp) +{ +    node_t *nodes[256]; +    size_t nodes_sz = 0; + +    size_t char_counts[256] = {0}; +    char mem[1024] = {0}; size_t sz; + +    // load char counts +    while((sz = fread(mem, sizeof(char), 1024, fp)) != 0) +    { +        for(size_t i = 0; i < sz; i++) { +            char_counts[(size_t)mem[i]]++; +        } +    } +    rewind(fp); + +    // make the tree +    for(int i = 0; i < 256; i++) { +        if(char_counts[i] <= 0) continue; + +        nodes[nodes_sz] = malloc(sizeof(node_t)); +        node_char_init(nodes[nodes_sz], char_counts[i], (char)i); +        encoding_table[i] = nodes[nodes_sz]; +        nodes_sz++; +    } + +    qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes); + +    // save the amount of bits +    write_buf[0] = nodes_sz * 2 - 1; + +    // pair last 2 elements and do linear sort +    for(pool_sz = 0; nodes_sz != 1; nodes_sz--, pool_sz++) +    { +        if(pool_sz >= POOL_CAP) { +            fprintf(stderr, "ERROR: POOL_CAP reached, please re-run the program with a bigger POOL_CAP\n"); +            exit(1); +        } + +        node_link_init(&pool[pool_sz], nodes[nodes_sz-1], nodes[nodes_sz-2]); +        liner_insert(nodes, nodes_sz-2, sizeof(node_t *), &pool[pool_sz], compare_nodes); +    } +} + +size_t write_encoded(node_t *root) +{ +    static size_t bits = 0; +    if(root == NULL || root->parent == NULL) return bits; + +    write_buf[bits/8] |= root->direction << (bits % 8); +    bits++; + +    if(bits/8 == WRITE_BUF_CAP) { +        wb_flush(WRITE_BUF_CAP); +        bits = 0; +    } + +    write_encoded(root->parent); +    return bits; +} + +size_t write_tree_shape(node_t *root) +{ +    static size_t bits = 0; +    if(root == NULL) return bits; + +    write_buf[bits/8] |= root->is_char << (8 - (bits%8)); +    bits++; + +    if(bits/8 == WRITE_BUF_CAP) { +        wb_flush(WRITE_BUF_CAP); +        bits = 0; +    } + +    write_tree_shape(root->left); +    write_tree_shape(root->right); + +    return bits; +} + +size_t write_tree_data(node_t *root) +{ +    static size_t n = 0; +    if(root == NULL) return n; + +    if(root->is_char) { +        if(n == WRITE_BUF_CAP) { +            wb_flush(WRITE_BUF_CAP); +            n = 0; +        } +        write_buf[n++] = root->ch; +    } + +    write_tree_data(root->left); +    write_tree_data(root->right); + +    return n; +} + +void wb_flush(size_t nmemb) +{ +    fwrite(write_buf, sizeof(char), nmemb, stdout); +    memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP); +} + +void free_encoding_table() +{ +    for(int i = 0; i < 256; i++) { +        if(encoding_table[i] != NULL) { +            free(encoding_table[i]); +        } +    } +} + +int compare_nodes(const void *p1, const void *p2) +{ +    return (*(node_t **)p2)->references - (*(node_t **)p1)->references; +} + +void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)) +{ +    for(size_t i = 0; i < nmemb; i++) +    { +        void *cur = base + (i * size); +        if(compar(cur, &el) < 0) continue; +        for(size_t j = nmemb-1; j >= i; j--) +        { +            if(j == (size_t)-1) break; //integer underflow + +            void *src = base + (j * size); +            void *dest = src + size; +            memcpy(dest, src, size); +        } + +        memcpy(cur, &el, size); +        return; +    } + +    memcpy(base + (nmemb * size), &el, size); +} + +void node_char_init(node_t *node, size_t ref, char ch) +{ +    node->is_char = true; +    node->ch = ch; +    node->references = ref; +    node->parent = NULL; +    node->left = NULL; +    node->right = NULL; +} + +void node_link_init(node_t *node, node_t* left, node_t* right) +{ +    node->is_char = false; +    node->references = left->references + right->references; +    node->parent = NULL; +    node->left = left; +    node->right = right; + +    left->parent = node; +    right->parent = node; +    left->direction = 0; +    right->direction = 1; +}  | 
