summaryrefslogtreecommitdiff
path: root/encode.c
diff options
context:
space:
mode:
Diffstat (limited to 'encode.c')
-rw-r--r--encode.c260
1 files changed, 260 insertions, 0 deletions
diff --git a/encode.c b/encode.c
new file mode 100644
index 0000000..9139c06
--- /dev/null
+++ b/encode.c
@@ -0,0 +1,260 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+typedef struct node {
+ bool is_char;
+ char ch;
+ size_t references;
+
+ // to help when saving tree
+ struct node *left;
+ struct node *right;
+
+ // to help encode
+ int direction; // 0 - left; 1 - right
+ struct node *parent;
+} node_t;
+
+void node_char_init(node_t *node, size_t ref, char ch);
+void node_link_init(node_t *node, node_t* left, node_t* right);
+
+// void liner_insert(void base[.size * .nmemb + 1],
+// size_t nmemb, size_t size, void *el,
+// int (*compar)(const void [.size], const void [.size]));
+void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *));
+int compare_nodes(const void *p1, const void *p2);
+
+void free_encoding_table();
+void wb_flush(size_t nmemb);
+
+void gen_tree(FILE *fp);
+size_t write_tree_shape(node_t *root);
+size_t write_tree_data(node_t *root);
+
+size_t write_encoded(node_t *root);
+void encode(FILE *fp);
+
+
+size_t POOL_CAP = 512;
+node_t *pool;
+size_t pool_sz;
+
+size_t WRITE_BUF_CAP = 512;
+char *write_buf;
+
+node_t *encoding_table[256] = {0};
+
+void encode(FILE *fp)
+{
+ // write tree //
+ wb_flush(1); // flush amount of bits (written in gen_tree())
+ wb_flush(write_tree_shape(&pool[pool_sz-1])/8 + 1);
+ wb_flush(write_tree_data(&pool[pool_sz-1]));
+
+ // encode //
+ char ch;
+ while((ch = fgetc(fp)) != EOF) {
+ if(encoding_table[ch] == NULL) {
+ fprintf(stderr, "Character %c not found encoding table\n", ch);
+ exit(1);
+ }
+ write_encoded(encoding_table[ch]);
+ }
+ wb_flush(write_encoded(NULL)/8+1);
+}
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) {
+ fprintf(stderr, "ERROR: Not enough command line arguments\n");
+ exit(1);
+ }
+
+ write_buf = malloc(sizeof(char) * WRITE_BUF_CAP);
+ pool = malloc(sizeof(node_t) * POOL_CAP);
+
+ FILE *fp = fopen(argv[1], "rb");
+ if(!fp) {
+ fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
+ exit(1);
+ }
+
+ gen_tree(fp);
+ encode(fp);
+
+ free_encoding_table();
+ free(pool);
+ free(write_buf);
+
+ fclose(fp);
+ return 0;
+}
+
+void gen_tree(FILE *fp)
+{
+ node_t *nodes[256];
+ size_t nodes_sz = 0;
+
+ size_t char_counts[256] = {0};
+ char mem[1024] = {0}; size_t sz;
+
+ // load char counts
+ while((sz = fread(mem, sizeof(char), 1024, fp)) != 0)
+ {
+ for(size_t i = 0; i < sz; i++) {
+ char_counts[(size_t)mem[i]]++;
+ }
+ }
+ rewind(fp);
+
+ // make the tree
+ for(int i = 0; i < 256; i++) {
+ if(char_counts[i] <= 0) continue;
+
+ nodes[nodes_sz] = malloc(sizeof(node_t));
+ node_char_init(nodes[nodes_sz], char_counts[i], (char)i);
+ encoding_table[i] = nodes[nodes_sz];
+ nodes_sz++;
+ }
+
+ qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes);
+
+ // save the amount of bits
+ write_buf[0] = nodes_sz * 2 - 1;
+
+ // pair last 2 elements and do linear sort
+ for(pool_sz = 0; nodes_sz != 1; nodes_sz--, pool_sz++)
+ {
+ if(pool_sz >= POOL_CAP) {
+ fprintf(stderr, "ERROR: POOL_CAP reached, please re-run the program with a bigger POOL_CAP\n");
+ exit(1);
+ }
+
+ node_link_init(&pool[pool_sz], nodes[nodes_sz-1], nodes[nodes_sz-2]);
+ liner_insert(nodes, nodes_sz-2, sizeof(node_t *), &pool[pool_sz], compare_nodes);
+ }
+}
+
+size_t write_encoded(node_t *root)
+{
+ static size_t bits = 0;
+ if(root == NULL || root->parent == NULL) return bits;
+
+ write_buf[bits/8] |= root->direction << (bits % 8);
+ bits++;
+
+ if(bits/8 == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ bits = 0;
+ }
+
+ write_encoded(root->parent);
+ return bits;
+}
+
+size_t write_tree_shape(node_t *root)
+{
+ static size_t bits = 0;
+ if(root == NULL) return bits;
+
+ write_buf[bits/8] |= root->is_char << (8 - (bits%8));
+ bits++;
+
+ if(bits/8 == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ bits = 0;
+ }
+
+ write_tree_shape(root->left);
+ write_tree_shape(root->right);
+
+ return bits;
+}
+
+size_t write_tree_data(node_t *root)
+{
+ static size_t n = 0;
+ if(root == NULL) return n;
+
+ if(root->is_char) {
+ if(n == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ n = 0;
+ }
+ write_buf[n++] = root->ch;
+ }
+
+ write_tree_data(root->left);
+ write_tree_data(root->right);
+
+ return n;
+}
+
+void wb_flush(size_t nmemb)
+{
+ fwrite(write_buf, sizeof(char), nmemb, stdout);
+ memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP);
+}
+
+void free_encoding_table()
+{
+ for(int i = 0; i < 256; i++) {
+ if(encoding_table[i] != NULL) {
+ free(encoding_table[i]);
+ }
+ }
+}
+
+int compare_nodes(const void *p1, const void *p2)
+{
+ return (*(node_t **)p2)->references - (*(node_t **)p1)->references;
+}
+
+void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *))
+{
+ for(size_t i = 0; i < nmemb; i++)
+ {
+ void *cur = base + (i * size);
+ if(compar(cur, &el) < 0) continue;
+ for(size_t j = nmemb-1; j >= i; j--)
+ {
+ if(j == (size_t)-1) break; //integer underflow
+
+ void *src = base + (j * size);
+ void *dest = src + size;
+ memcpy(dest, src, size);
+ }
+
+ memcpy(cur, &el, size);
+ return;
+ }
+
+ memcpy(base + (nmemb * size), &el, size);
+}
+
+void node_char_init(node_t *node, size_t ref, char ch)
+{
+ node->is_char = true;
+ node->ch = ch;
+ node->references = ref;
+ node->parent = NULL;
+ node->left = NULL;
+ node->right = NULL;
+}
+
+void node_link_init(node_t *node, node_t* left, node_t* right)
+{
+ node->is_char = false;
+ node->references = left->references + right->references;
+ node->parent = NULL;
+ node->left = left;
+ node->right = right;
+
+ left->parent = node;
+ right->parent = node;
+ left->direction = 0;
+ right->direction = 1;
+}