diff options
author | kartofen <mladenovnasko0@gmail.com> | 2023-03-18 14:40:47 +0200 |
---|---|---|
committer | kartofen <mladenovnasko0@gmail.com> | 2023-03-18 14:40:47 +0200 |
commit | 574c9119cd170318e31fb966be93c01dfb388bb1 (patch) | |
tree | a8ea9dcf86e77bf742f0bdd40e1ae08963fb9454 | |
parent | 047cf6811cb7c5b5f46f7357eeffb8e41a0c3b45 (diff) |
encoding done
-rwxr-xr-x | build.sh | 3 | ||||
-rwxr-xr-x | encode | bin | 0 -> 22728 bytes | |||
-rw-r--r-- | encode.c | 260 | ||||
-rw-r--r-- | main.c | 173 |
4 files changed, 262 insertions, 174 deletions
@@ -2,4 +2,5 @@ set -xe -gcc -o main main.c -Wall -Wextra -pedantic -g +gcc -o encode encode.c -Wall -Wextra -pedantic -g +# gcc -o decode decode.c -Wall -Wextra -pedantic -g Binary files differdiff --git a/encode.c b/encode.c new file mode 100644 index 0000000..9139c06 --- /dev/null +++ b/encode.c @@ -0,0 +1,260 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +typedef struct node { + bool is_char; + char ch; + size_t references; + + // to help when saving tree + struct node *left; + struct node *right; + + // to help encode + int direction; // 0 - left; 1 - right + struct node *parent; +} node_t; + +void node_char_init(node_t *node, size_t ref, char ch); +void node_link_init(node_t *node, node_t* left, node_t* right); + +// void liner_insert(void base[.size * .nmemb + 1], +// size_t nmemb, size_t size, void *el, +// int (*compar)(const void [.size], const void [.size])); +void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)); +int compare_nodes(const void *p1, const void *p2); + +void free_encoding_table(); +void wb_flush(size_t nmemb); + +void gen_tree(FILE *fp); +size_t write_tree_shape(node_t *root); +size_t write_tree_data(node_t *root); + +size_t write_encoded(node_t *root); +void encode(FILE *fp); + + +size_t POOL_CAP = 512; +node_t *pool; +size_t pool_sz; + +size_t WRITE_BUF_CAP = 512; +char *write_buf; + +node_t *encoding_table[256] = {0}; + +void encode(FILE *fp) +{ + // write tree // + wb_flush(1); // flush amount of bits (written in gen_tree()) + wb_flush(write_tree_shape(&pool[pool_sz-1])/8 + 1); + wb_flush(write_tree_data(&pool[pool_sz-1])); + + // encode // + char ch; + while((ch = fgetc(fp)) != EOF) { + if(encoding_table[ch] == NULL) { + fprintf(stderr, "Character %c not found encoding table\n", ch); + exit(1); + } + write_encoded(encoding_table[ch]); + } + wb_flush(write_encoded(NULL)/8+1); +} + +int main(int argc, char **argv) +{ + if(argc != 2) { + fprintf(stderr, "ERROR: Not enough command line arguments\n"); + exit(1); + } + + write_buf = malloc(sizeof(char) * WRITE_BUF_CAP); + pool = malloc(sizeof(node_t) * POOL_CAP); + + FILE *fp = fopen(argv[1], "rb"); + if(!fp) { + fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]); + exit(1); + } + + gen_tree(fp); + encode(fp); + + free_encoding_table(); + free(pool); + free(write_buf); + + fclose(fp); + return 0; +} + +void gen_tree(FILE *fp) +{ + node_t *nodes[256]; + size_t nodes_sz = 0; + + size_t char_counts[256] = {0}; + char mem[1024] = {0}; size_t sz; + + // load char counts + while((sz = fread(mem, sizeof(char), 1024, fp)) != 0) + { + for(size_t i = 0; i < sz; i++) { + char_counts[(size_t)mem[i]]++; + } + } + rewind(fp); + + // make the tree + for(int i = 0; i < 256; i++) { + if(char_counts[i] <= 0) continue; + + nodes[nodes_sz] = malloc(sizeof(node_t)); + node_char_init(nodes[nodes_sz], char_counts[i], (char)i); + encoding_table[i] = nodes[nodes_sz]; + nodes_sz++; + } + + qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes); + + // save the amount of bits + write_buf[0] = nodes_sz * 2 - 1; + + // pair last 2 elements and do linear sort + for(pool_sz = 0; nodes_sz != 1; nodes_sz--, pool_sz++) + { + if(pool_sz >= POOL_CAP) { + fprintf(stderr, "ERROR: POOL_CAP reached, please re-run the program with a bigger POOL_CAP\n"); + exit(1); + } + + node_link_init(&pool[pool_sz], nodes[nodes_sz-1], nodes[nodes_sz-2]); + liner_insert(nodes, nodes_sz-2, sizeof(node_t *), &pool[pool_sz], compare_nodes); + } +} + +size_t write_encoded(node_t *root) +{ + static size_t bits = 0; + if(root == NULL || root->parent == NULL) return bits; + + write_buf[bits/8] |= root->direction << (bits % 8); + bits++; + + if(bits/8 == WRITE_BUF_CAP) { + wb_flush(WRITE_BUF_CAP); + bits = 0; + } + + write_encoded(root->parent); + return bits; +} + +size_t write_tree_shape(node_t *root) +{ + static size_t bits = 0; + if(root == NULL) return bits; + + write_buf[bits/8] |= root->is_char << (8 - (bits%8)); + bits++; + + if(bits/8 == WRITE_BUF_CAP) { + wb_flush(WRITE_BUF_CAP); + bits = 0; + } + + write_tree_shape(root->left); + write_tree_shape(root->right); + + return bits; +} + +size_t write_tree_data(node_t *root) +{ + static size_t n = 0; + if(root == NULL) return n; + + if(root->is_char) { + if(n == WRITE_BUF_CAP) { + wb_flush(WRITE_BUF_CAP); + n = 0; + } + write_buf[n++] = root->ch; + } + + write_tree_data(root->left); + write_tree_data(root->right); + + return n; +} + +void wb_flush(size_t nmemb) +{ + fwrite(write_buf, sizeof(char), nmemb, stdout); + memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP); +} + +void free_encoding_table() +{ + for(int i = 0; i < 256; i++) { + if(encoding_table[i] != NULL) { + free(encoding_table[i]); + } + } +} + +int compare_nodes(const void *p1, const void *p2) +{ + return (*(node_t **)p2)->references - (*(node_t **)p1)->references; +} + +void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)) +{ + for(size_t i = 0; i < nmemb; i++) + { + void *cur = base + (i * size); + if(compar(cur, &el) < 0) continue; + for(size_t j = nmemb-1; j >= i; j--) + { + if(j == (size_t)-1) break; //integer underflow + + void *src = base + (j * size); + void *dest = src + size; + memcpy(dest, src, size); + } + + memcpy(cur, &el, size); + return; + } + + memcpy(base + (nmemb * size), &el, size); +} + +void node_char_init(node_t *node, size_t ref, char ch) +{ + node->is_char = true; + node->ch = ch; + node->references = ref; + node->parent = NULL; + node->left = NULL; + node->right = NULL; +} + +void node_link_init(node_t *node, node_t* left, node_t* right) +{ + node->is_char = false; + node->references = left->references + right->references; + node->parent = NULL; + node->left = left; + node->right = right; + + left->parent = node; + right->parent = node; + left->direction = 0; + right->direction = 1; +} @@ -1,173 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> - -#define POOL_CAP 512 - -typedef struct node { - bool is_char; - char ch; - - size_t references; - int direction; // 0 - left; 1 - right - - struct node *parent; -} node_t; - -// void liner_insert(void base[.size * .nmemb + 1], -// size_t nmemb, size_t size, void *el, -// int (*compar)(const void [.size], const void [.size])); -void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)); - -int compare_nodes(const void *p1, const void *p2); - -void node_char_init(node_t *node, size_t ref, char ch); -void node_link_init(node_t *node, node_t* left, node_t* right); -void node_print(node_t *node, int indent); - -node_t pool[POOL_CAP]; -size_t pool_sz; - -node_t *encoding_table[256]; - -int main(int argc, char **argv) -{ - if(argc != 2) { - fprintf(stderr, "ERROR: Not enough command line arguments\n"); - exit(1); - } - - //-------------------------------------------------// - - pool_sz = 0; - - for(int i = 0; i < 256; i++) - encoding_table[i] = NULL; - - //-------------------------------------------------// - // Load the file data - int times_seen[256] = {0}; - - FILE *fp = fopen(argv[1], "rb"); - if(!fp) { - fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]); - exit(1); - } - - char mem[1024]; - size_t sz; - while((sz = fread(mem, sizeof(char), 1024, fp)) != 0) - { - for(size_t i = 0; i < sz; i++) { - times_seen[(size_t)mem[i]]++; - } - memset(mem, '\0', sizeof(char) * 1024); - } - rewind(fp); - - //-------------------------------------------------// - // Load everything into nodes array and sort - - node_t *nodes[256]; - size_t nodes_sz = 0; - - for(int i = 0; i < 256; i++) { - if(times_seen[i] <= 0) { - continue; - } - - nodes[nodes_sz] = malloc(sizeof(node_t)); - node_char_init(nodes[nodes_sz], times_seen[i], (char)i); - encoding_table[i] = nodes[nodes_sz]; - nodes_sz++; - } - - qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes); - - //-------------------------------------------------// - // Continuously sort and build the tree - - while(nodes_sz != 1) - { - nodes_sz--; - node_link_init(&pool[pool_sz], nodes[nodes_sz], nodes[nodes_sz-1]); - - liner_insert(nodes, nodes_sz-1, sizeof(node_t *), &pool[pool_sz++], compare_nodes); - - for(size_t i = 0; i < nodes_sz; i++) - node_print(nodes[i], 0); - } - - - //-------------------------------------------------// - - for(int i = 0; i < 256; i++) - if(encoding_table[i] != NULL) free(encoding_table[i]); - fclose(fp); - return 0; -} - -void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)) -{ - for(size_t i = 0; i < nmemb; i++) - { - void *cur = base + (i * size); - if(compar(cur, &el) < 0) continue; - - for(size_t j = nmemb; j >= i; j--) { - void *src = base + (j * size); - void *dest = src + size; - memcpy(dest, src, size); - } - - memcpy(cur, &el, size); - break; - } -} - -int compare_nodes(const void *p1, const void *p2) -{ - // printf("ff\t%d %d\n", (*(node_t **)p2)->references, (*(node_t **)p1)->references); - return (*(node_t **)p2)->references - (*(node_t **)p1)->references; -} - -void node_char_init(node_t *node, size_t ref, char ch) -{ - node->is_char = true; - node->ch = ch; - node->references = ref; - node->parent = NULL; -} - -void node_link_init(node_t *node, node_t* left, node_t* right) -{ - node->is_char = false; - node->references = left->references + right->references; - node->parent = NULL; - - left->parent = node; - right->parent = node; - left->direction = 0; - right->direction = 1; -} - -void node_print(node_t *node, int indent) -{ - for(int i = 0; i < indent; i++) printf(" "); - printf("ref: %ld\n", node->references); - - - if(node->is_char) { - for(int i = 0; i < indent; i++) printf(" "); - printf("char: %c\n", node->ch); - } else { - for(int i = 0; i < indent; i++) printf(" "); - printf("link thing\n"); - } - - if(node->parent != NULL) { - for(int i = 0; i < indent; i++) printf(" "); - printf("dir: %d\n", node->direction); node_print(node->parent, indent + 4); - } -} |