summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <mladenovnasko0@gmail.com>2023-03-18 14:40:47 +0200
committerkartofen <mladenovnasko0@gmail.com>2023-03-18 14:40:47 +0200
commit574c9119cd170318e31fb966be93c01dfb388bb1 (patch)
treea8ea9dcf86e77bf742f0bdd40e1ae08963fb9454
parent047cf6811cb7c5b5f46f7357eeffb8e41a0c3b45 (diff)
encoding done
-rwxr-xr-xbuild.sh3
-rwxr-xr-xencodebin0 -> 22728 bytes
-rw-r--r--encode.c260
-rw-r--r--main.c173
4 files changed, 262 insertions, 174 deletions
diff --git a/build.sh b/build.sh
index 06058c0..680c10a 100755
--- a/build.sh
+++ b/build.sh
@@ -2,4 +2,5 @@
set -xe
-gcc -o main main.c -Wall -Wextra -pedantic -g
+gcc -o encode encode.c -Wall -Wextra -pedantic -g
+# gcc -o decode decode.c -Wall -Wextra -pedantic -g
diff --git a/encode b/encode
new file mode 100755
index 0000000..06ab292
--- /dev/null
+++ b/encode
Binary files differ
diff --git a/encode.c b/encode.c
new file mode 100644
index 0000000..9139c06
--- /dev/null
+++ b/encode.c
@@ -0,0 +1,260 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+typedef struct node {
+ bool is_char;
+ char ch;
+ size_t references;
+
+ // to help when saving tree
+ struct node *left;
+ struct node *right;
+
+ // to help encode
+ int direction; // 0 - left; 1 - right
+ struct node *parent;
+} node_t;
+
+void node_char_init(node_t *node, size_t ref, char ch);
+void node_link_init(node_t *node, node_t* left, node_t* right);
+
+// void liner_insert(void base[.size * .nmemb + 1],
+// size_t nmemb, size_t size, void *el,
+// int (*compar)(const void [.size], const void [.size]));
+void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *));
+int compare_nodes(const void *p1, const void *p2);
+
+void free_encoding_table();
+void wb_flush(size_t nmemb);
+
+void gen_tree(FILE *fp);
+size_t write_tree_shape(node_t *root);
+size_t write_tree_data(node_t *root);
+
+size_t write_encoded(node_t *root);
+void encode(FILE *fp);
+
+
+size_t POOL_CAP = 512;
+node_t *pool;
+size_t pool_sz;
+
+size_t WRITE_BUF_CAP = 512;
+char *write_buf;
+
+node_t *encoding_table[256] = {0};
+
+void encode(FILE *fp)
+{
+ // write tree //
+ wb_flush(1); // flush amount of bits (written in gen_tree())
+ wb_flush(write_tree_shape(&pool[pool_sz-1])/8 + 1);
+ wb_flush(write_tree_data(&pool[pool_sz-1]));
+
+ // encode //
+ char ch;
+ while((ch = fgetc(fp)) != EOF) {
+ if(encoding_table[ch] == NULL) {
+ fprintf(stderr, "Character %c not found encoding table\n", ch);
+ exit(1);
+ }
+ write_encoded(encoding_table[ch]);
+ }
+ wb_flush(write_encoded(NULL)/8+1);
+}
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) {
+ fprintf(stderr, "ERROR: Not enough command line arguments\n");
+ exit(1);
+ }
+
+ write_buf = malloc(sizeof(char) * WRITE_BUF_CAP);
+ pool = malloc(sizeof(node_t) * POOL_CAP);
+
+ FILE *fp = fopen(argv[1], "rb");
+ if(!fp) {
+ fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
+ exit(1);
+ }
+
+ gen_tree(fp);
+ encode(fp);
+
+ free_encoding_table();
+ free(pool);
+ free(write_buf);
+
+ fclose(fp);
+ return 0;
+}
+
+void gen_tree(FILE *fp)
+{
+ node_t *nodes[256];
+ size_t nodes_sz = 0;
+
+ size_t char_counts[256] = {0};
+ char mem[1024] = {0}; size_t sz;
+
+ // load char counts
+ while((sz = fread(mem, sizeof(char), 1024, fp)) != 0)
+ {
+ for(size_t i = 0; i < sz; i++) {
+ char_counts[(size_t)mem[i]]++;
+ }
+ }
+ rewind(fp);
+
+ // make the tree
+ for(int i = 0; i < 256; i++) {
+ if(char_counts[i] <= 0) continue;
+
+ nodes[nodes_sz] = malloc(sizeof(node_t));
+ node_char_init(nodes[nodes_sz], char_counts[i], (char)i);
+ encoding_table[i] = nodes[nodes_sz];
+ nodes_sz++;
+ }
+
+ qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes);
+
+ // save the amount of bits
+ write_buf[0] = nodes_sz * 2 - 1;
+
+ // pair last 2 elements and do linear sort
+ for(pool_sz = 0; nodes_sz != 1; nodes_sz--, pool_sz++)
+ {
+ if(pool_sz >= POOL_CAP) {
+ fprintf(stderr, "ERROR: POOL_CAP reached, please re-run the program with a bigger POOL_CAP\n");
+ exit(1);
+ }
+
+ node_link_init(&pool[pool_sz], nodes[nodes_sz-1], nodes[nodes_sz-2]);
+ liner_insert(nodes, nodes_sz-2, sizeof(node_t *), &pool[pool_sz], compare_nodes);
+ }
+}
+
+size_t write_encoded(node_t *root)
+{
+ static size_t bits = 0;
+ if(root == NULL || root->parent == NULL) return bits;
+
+ write_buf[bits/8] |= root->direction << (bits % 8);
+ bits++;
+
+ if(bits/8 == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ bits = 0;
+ }
+
+ write_encoded(root->parent);
+ return bits;
+}
+
+size_t write_tree_shape(node_t *root)
+{
+ static size_t bits = 0;
+ if(root == NULL) return bits;
+
+ write_buf[bits/8] |= root->is_char << (8 - (bits%8));
+ bits++;
+
+ if(bits/8 == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ bits = 0;
+ }
+
+ write_tree_shape(root->left);
+ write_tree_shape(root->right);
+
+ return bits;
+}
+
+size_t write_tree_data(node_t *root)
+{
+ static size_t n = 0;
+ if(root == NULL) return n;
+
+ if(root->is_char) {
+ if(n == WRITE_BUF_CAP) {
+ wb_flush(WRITE_BUF_CAP);
+ n = 0;
+ }
+ write_buf[n++] = root->ch;
+ }
+
+ write_tree_data(root->left);
+ write_tree_data(root->right);
+
+ return n;
+}
+
+void wb_flush(size_t nmemb)
+{
+ fwrite(write_buf, sizeof(char), nmemb, stdout);
+ memset(write_buf, 0, sizeof(char) * WRITE_BUF_CAP);
+}
+
+void free_encoding_table()
+{
+ for(int i = 0; i < 256; i++) {
+ if(encoding_table[i] != NULL) {
+ free(encoding_table[i]);
+ }
+ }
+}
+
+int compare_nodes(const void *p1, const void *p2)
+{
+ return (*(node_t **)p2)->references - (*(node_t **)p1)->references;
+}
+
+void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *))
+{
+ for(size_t i = 0; i < nmemb; i++)
+ {
+ void *cur = base + (i * size);
+ if(compar(cur, &el) < 0) continue;
+ for(size_t j = nmemb-1; j >= i; j--)
+ {
+ if(j == (size_t)-1) break; //integer underflow
+
+ void *src = base + (j * size);
+ void *dest = src + size;
+ memcpy(dest, src, size);
+ }
+
+ memcpy(cur, &el, size);
+ return;
+ }
+
+ memcpy(base + (nmemb * size), &el, size);
+}
+
+void node_char_init(node_t *node, size_t ref, char ch)
+{
+ node->is_char = true;
+ node->ch = ch;
+ node->references = ref;
+ node->parent = NULL;
+ node->left = NULL;
+ node->right = NULL;
+}
+
+void node_link_init(node_t *node, node_t* left, node_t* right)
+{
+ node->is_char = false;
+ node->references = left->references + right->references;
+ node->parent = NULL;
+ node->left = left;
+ node->right = right;
+
+ left->parent = node;
+ right->parent = node;
+ left->direction = 0;
+ right->direction = 1;
+}
diff --git a/main.c b/main.c
deleted file mode 100644
index 6ab79d4..0000000
--- a/main.c
+++ /dev/null
@@ -1,173 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-
-#define POOL_CAP 512
-
-typedef struct node {
- bool is_char;
- char ch;
-
- size_t references;
- int direction; // 0 - left; 1 - right
-
- struct node *parent;
-} node_t;
-
-// void liner_insert(void base[.size * .nmemb + 1],
-// size_t nmemb, size_t size, void *el,
-// int (*compar)(const void [.size], const void [.size]));
-void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *));
-
-int compare_nodes(const void *p1, const void *p2);
-
-void node_char_init(node_t *node, size_t ref, char ch);
-void node_link_init(node_t *node, node_t* left, node_t* right);
-void node_print(node_t *node, int indent);
-
-node_t pool[POOL_CAP];
-size_t pool_sz;
-
-node_t *encoding_table[256];
-
-int main(int argc, char **argv)
-{
- if(argc != 2) {
- fprintf(stderr, "ERROR: Not enough command line arguments\n");
- exit(1);
- }
-
- //-------------------------------------------------//
-
- pool_sz = 0;
-
- for(int i = 0; i < 256; i++)
- encoding_table[i] = NULL;
-
- //-------------------------------------------------//
- // Load the file data
- int times_seen[256] = {0};
-
- FILE *fp = fopen(argv[1], "rb");
- if(!fp) {
- fprintf(stderr, "ERROR: Could not open file %s\n", argv[1]);
- exit(1);
- }
-
- char mem[1024];
- size_t sz;
- while((sz = fread(mem, sizeof(char), 1024, fp)) != 0)
- {
- for(size_t i = 0; i < sz; i++) {
- times_seen[(size_t)mem[i]]++;
- }
- memset(mem, '\0', sizeof(char) * 1024);
- }
- rewind(fp);
-
- //-------------------------------------------------//
- // Load everything into nodes array and sort
-
- node_t *nodes[256];
- size_t nodes_sz = 0;
-
- for(int i = 0; i < 256; i++) {
- if(times_seen[i] <= 0) {
- continue;
- }
-
- nodes[nodes_sz] = malloc(sizeof(node_t));
- node_char_init(nodes[nodes_sz], times_seen[i], (char)i);
- encoding_table[i] = nodes[nodes_sz];
- nodes_sz++;
- }
-
- qsort(nodes, nodes_sz, sizeof(node_t *), compare_nodes);
-
- //-------------------------------------------------//
- // Continuously sort and build the tree
-
- while(nodes_sz != 1)
- {
- nodes_sz--;
- node_link_init(&pool[pool_sz], nodes[nodes_sz], nodes[nodes_sz-1]);
-
- liner_insert(nodes, nodes_sz-1, sizeof(node_t *), &pool[pool_sz++], compare_nodes);
-
- for(size_t i = 0; i < nodes_sz; i++)
- node_print(nodes[i], 0);
- }
-
-
- //-------------------------------------------------//
-
- for(int i = 0; i < 256; i++)
- if(encoding_table[i] != NULL) free(encoding_table[i]);
- fclose(fp);
- return 0;
-}
-
-void liner_insert(void *base, size_t nmemb, size_t size, void *el, int (*compar)(const void *, const void *))
-{
- for(size_t i = 0; i < nmemb; i++)
- {
- void *cur = base + (i * size);
- if(compar(cur, &el) < 0) continue;
-
- for(size_t j = nmemb; j >= i; j--) {
- void *src = base + (j * size);
- void *dest = src + size;
- memcpy(dest, src, size);
- }
-
- memcpy(cur, &el, size);
- break;
- }
-}
-
-int compare_nodes(const void *p1, const void *p2)
-{
- // printf("ff\t%d %d\n", (*(node_t **)p2)->references, (*(node_t **)p1)->references);
- return (*(node_t **)p2)->references - (*(node_t **)p1)->references;
-}
-
-void node_char_init(node_t *node, size_t ref, char ch)
-{
- node->is_char = true;
- node->ch = ch;
- node->references = ref;
- node->parent = NULL;
-}
-
-void node_link_init(node_t *node, node_t* left, node_t* right)
-{
- node->is_char = false;
- node->references = left->references + right->references;
- node->parent = NULL;
-
- left->parent = node;
- right->parent = node;
- left->direction = 0;
- right->direction = 1;
-}
-
-void node_print(node_t *node, int indent)
-{
- for(int i = 0; i < indent; i++) printf(" ");
- printf("ref: %ld\n", node->references);
-
-
- if(node->is_char) {
- for(int i = 0; i < indent; i++) printf(" ");
- printf("char: %c\n", node->ch);
- } else {
- for(int i = 0; i < indent; i++) printf(" ");
- printf("link thing\n");
- }
-
- if(node->parent != NULL) {
- for(int i = 0; i < indent; i++) printf(" ");
- printf("dir: %d\n", node->direction); node_print(node->parent, indent + 4);
- }
-}