#include #include #include #include #include typedef struct node { bool is_char; char ch; size_t references; // to help when saving tree struct node *left; struct node *right; // to help encode int direction; // 0 - left; 1 - right struct node *parent; } node_t; void node_char_init(node_t *node, size_t ref, char ch); void node_link_init(node_t *node, node_t* left, node_t* right); // void liner_insert(void base[.size * .nmemb + 1], // size_t nmemb, size_t size, void *el, // int (*compar)(const void [.size], const void [.size])); void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)); int compare_nodes(const void *p1, const void *p2); void free_encoding_table(); void wb_flush(size_t nmemb); void gen_tree(FILE *fp); size_t write_tree_shape(node_t *root); size_t write_tree_data(node_t *root); size_t write_encoded(node_t *root); void encode(FILE *fp); size_t POOL_CAP = 512; node_t *pool; size_t pool_sz; size_t WRITE_BUF_CAP = 512; char *write_buf; node_t *encoding_table[256] = {0}; void encode(FILE *fp) { // write tree // wb_flush(1); // flush amount of bits (written in gen_tree()) wb_flush(write_tree_shape(&pool[pool_sz-1])/8 + 1); wb_flush(write_tree_data(&pool[pool_sz-1])); // encode // char ch; while((ch = fgetc(fp)) != EOF) { if(encoding_table[ch] == NULL) { fprintf(stderr, "Character %c not found encoding table\n", ch); exit(1); } write_encoded(encoding_table[ch]); } wb_flush(write_encoded(NULL)/8+1); } int main(int argc, char **argv) { if(argc != 2) { fprintf(stderr, "ERROR: Not enough command line arguments\n"); exit(1); } write_buf = malloc(sizeof(char) * WRITE_BUF_CAP); pool = malloc(sizeof(node_t) * POOL_CAP); FILE *fp = fopen(argv[1], "rb"); if(!fp) { fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]); exit(1); } gen_tree(fp); encode(fp); free_encoding_table(); free(pool); free(write_buf); fclose(fp); return 0; } void gen_tree(FILE *fp) { node_t *nodes[256]; size_t nodes_sz = 0; size_t char_counts[256] = {0}; char mem[1024] = {0}; size_t sz; // load char counts while((sz = fread(mem, sizeof(char), 1024, fp)) != 0) { for(size_t i = 0; i < sz; i++) { char_counts[(size_t)mem[i]]++; } } rewind(fp); // make the tree for(int i = 0; i < 256; i++) { if(char_counts[i] <= 0) continue; nodes[nodes_sz] = malloc(sizeof(node_t)); node_char_init(nodes[nodes_sz], char_counts[i], (char)i); encoding_table[i] = nodes[nodes_sz]; nodes_sz++; } qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes); // save the amount of bits write_buf[0] = nodes_sz * 2 - 1; // pair last 2 elements and do linear sort for(pool_sz = 0; nodes_sz != 1; nodes_sz--, pool_sz++) { if(pool_sz >= POOL_CAP) { fprintf(stderr, "ERROR: POOL_CAP reached, please re-run the program with a bigger POOL_CAP\n"); exit(1); } node_link_init(&pool[pool_sz], nodes[nodes_sz-1], nodes[nodes_sz-2]); liner_insert(nodes, nodes_sz-2, sizeof(node_t *), &pool[pool_sz], compare_nodes); } } size_t write_encoded(node_t *root) { static size_t bits = 0; if(root == NULL || root->parent == NULL) return bits; write_buf[bits/8] |= root->direction << (bits % 8); bits++; if(bits/8 == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); bits = 0; } write_encoded(root->parent); return bits; } size_t write_tree_shape(node_t *root) { static size_t bits = 0; if(root == NULL) return bits; write_buf[bits/8] |= root->is_char << (8 - (bits%8)); bits++; if(bits/8 == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); bits = 0; } write_tree_shape(root->left); write_tree_shape(root->right); return bits; } size_t write_tree_data(node_t *root) { static size_t n = 0; if(root == NULL) return n; if(root->is_char) { if(n == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); n = 0; } write_buf[n++] = root->ch; } write_tree_data(root->left); write_tree_data(root->right); return n; } void wb_flush(size_t nmemb) { fwrite(write_buf, sizeof(char), nmemb, stdout); memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP); } void free_encoding_table() { for(int i = 0; i < 256; i++) { if(encoding_table[i] != NULL) { free(encoding_table[i]); } } } int compare_nodes(const void *p1, const void *p2) { return (*(node_t **)p2)->references - (*(node_t **)p1)->references; } void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)) { for(size_t i = 0; i < nmemb; i++) { void *cur = base + (i * size); if(compar(cur, &el) < 0) continue; for(size_t j = nmemb-1; j >= i; j--) { if(j == (size_t)-1) break; //integer underflow void *src = base + (j * size); void *dest = src + size; memcpy(dest, src, size); } memcpy(cur, &el, size); return; } memcpy(base + (nmemb * size), &el, size); } void node_char_init(node_t *node, size_t ref, char ch) { node->is_char = true; node->ch = ch; node->references = ref; node->parent = NULL; node->left = NULL; node->right = NULL; } void node_link_init(node_t *node, node_t* left, node_t* right) { node->is_char = false; node->references = left->references + right->references; node->parent = NULL; node->left = left; node->right = right; left->parent = node; right->parent = node; left->direction = 0; right->direction = 1; }