#include #include #include #include typedef struct node { bool is_char; char ch; size_t references; // to help when saving tree struct node *left; struct node *right; // to help encode int direction; // 0 - left; 1 - right struct node *parent; } node_t; node_t *node_char_create(char ch, size_t ref); node_t *node_link_create(node_t* left, node_t* right); void node_free(node_t *root); void node_print(node_t *root, int indent); // void liner_insert(void base[.size * .nmemb + 1], // size_t nmemb, size_t size, void *el, // int (*compar)(const void [.size], const void [.size])); void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)); int compare_nodes(const void *p1, const void *p2); void wb_flush(size_t nmemb); void gen_tree(); size_t write_tree_data(node_t *root); size_t write_tree_shape(node_t *root); size_t write_encoded(node_t *root); void encode(); size_t MMAP_SIZE_CAP = 1024; // 1 kibibyte FILE *in; FILE *out; size_t WRITE_BUF_CAP = 512; char *write_buf; node_t *root; node_t *encoding_table[256] = {0}; void encode() { // write tree // wb_flush(1); // flush amount of leafs in tree (written in gen_tree()) wb_flush(write_tree_data(root)); wb_flush(write_tree_shape(root)/8 + 1); // write amount of characters fseek(in, 0, SEEK_END); size_t text_len = ftell(in); rewind(in); fwrite(&text_len, sizeof(size_t), 1, out); // encode // char ch; while(fread(&ch, sizeof(char), 1, in) > 0) { // fprintf(stderr, "%c\n", ch); write_encoded(encoding_table[ch]); } // fprintf(stderr, "%d\n", write_encoded(NULL)/8); wb_flush((write_encoded(NULL) + 7)/8); // write amout of extra bits write_buf[0] = write_encoded(NULL) % 8; wb_flush(1); } int main(void) { write_buf = malloc(sizeof(char) * WRITE_BUF_CAP); in = stdin; out = stdout; // in = fopen(argv[1], "rb"); // if(!in) { // fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]); // exit(1); // } // out = fopen(argv[1], "wb"); // if(!out) { // fprintf(stderr, "ERROR Could not open file %s\n", argv[1]); // exit(1); // } gen_tree(); encode(); // node_print(root, 0); node_free(root); free(write_buf); // fclose(in); // fclose(out); return 0; } void gen_tree() { node_t *nodes[256]; size_t nodes_sz = 0; size_t char_refs[256] = {0}; char mem[1024] = {0}; size_t sz; // load char counts while((sz = fread(mem, sizeof(char), 1024, in)) != 0) { for(size_t i = 0; i < sz; i++) { char_refs[(size_t)mem[i]]++; } } rewind(in); // make the tree for(int i = 0; i < 256; i++) { if(char_refs[i] <= 0) continue; nodes[nodes_sz] = node_char_create((char)i, char_refs[i]); encoding_table[i] = nodes[nodes_sz]; nodes_sz++; } qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes); // save the amount of bits write_buf[0] = (char)nodes_sz; // pair last 2 elements and do linear sort for(; nodes_sz != 1; nodes_sz--) { root = node_link_create(nodes[nodes_sz-1], nodes[nodes_sz-2]); liner_insert(nodes, nodes_sz-2, sizeof(node_t *), root, compare_nodes); } } size_t write_encoded(node_t *root) { static size_t bits = 0; if(root == NULL || root->parent == NULL) return bits; write_buf[bits/8] |= root->direction << (7 - (bits % 8)); bits++; if(bits/8 == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); bits = 0; } write_encoded(root->parent); return bits; } size_t write_tree_shape(node_t *root) { static size_t bits = 0; if(root == NULL) return bits; write_buf[bits/8] |= root->is_char << (bits % 8); bits++; if(bits/8 == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); bits = 0; } write_tree_shape(root->left); write_tree_shape(root->right); return bits; } size_t write_tree_data(node_t *root) { static size_t n = 0; if(root == NULL) return n; if(root->is_char) { if(n == WRITE_BUF_CAP) { wb_flush(WRITE_BUF_CAP); n = 0; } write_buf[n++] = root->ch; } write_tree_data(root->left); write_tree_data(root->right); return n; } void wb_flush(size_t nmemb) { fwrite(write_buf, sizeof(char), nmemb, out); memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP); } int compare_nodes(const void *p1, const void *p2) { return (*(node_t **)p2)->references - (*(node_t **)p1)->references; } void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *)) { for(size_t i = 0; i < nmemb; i++) { void *cur = base + (i * size); if(compar(cur, &el) < 0) continue; for(size_t j = nmemb-1; j >= i; j--) { if(j == (size_t)-1) break; //integer underflow void *src = base + (j * size); void *dest = src + size; memcpy(dest, src, size); } memcpy(cur, &el, size); return; } memcpy(base + (nmemb * size), &el, size); } node_t *node_char_create(char ch, size_t ref) { node_t *node = malloc(sizeof(node_t)); node->is_char = true; node->ch = ch; node->references = ref; node->parent = NULL; node->left = NULL; node->right = NULL; return node; } node_t *node_link_create(node_t* left, node_t* right) { node_t *node = malloc(sizeof(node_t)); node->is_char = false; node->ch = 0; node->references = left->references + right->references; node->parent = NULL; node->left = left; node->right = right; left->parent = node; right->parent = node; left->direction = 0; right->direction = 1; return node; } void node_free(node_t *root) { if(root == NULL) return; node_free(root->left); node_free(root->right); free(root); } void node_print(node_t *root, int indent) { if(root == NULL) return; for(int i = 0; i < indent; i++) fprintf(stderr, " "); fprintf(stderr, "%d '%c'\n", root->is_char, (root->ch == 0) ? '-' : root->ch); node_print(root->left, indent + 2); node_print(root->right, indent + 2); }