|
@@ -4,7 +4,24 @@ use bit_vec::BitVec;
|
|
|
use std::io::Read;
|
|
use std::io::Read;
|
|
|
use std::io::Write;
|
|
use std::io::Write;
|
|
|
|
|
|
|
|
-pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, text: &String) -> Result<(), String> {
|
|
|
|
|
|
|
+/* So what is our file format going to look like?
|
|
|
|
|
+ * This is a toy project, but its usage should be real. I should be able to use
|
|
|
|
|
+ * this to compress actual text files (hence the decision of using Rust chars)
|
|
|
|
|
+ * with Unicode.
|
|
|
|
|
+ *
|
|
|
|
|
+ * Here is the idea:
|
|
|
|
|
+ *
|
|
|
|
|
+ * 4 bytes -> Length of the rest of file in bits.
|
|
|
|
|
+ * n * 8 bytes -> CanonicalHufftree stored (8 bytes: (4)chars with (4)code_length)
|
|
|
|
|
+ * 4 bytes -> Ones to mark as delimiter.
|
|
|
|
|
+ * m bytes -> Compressed data, read only up to the bit specified earlier.
|
|
|
|
|
+ *
|
|
|
|
|
+ */
|
|
|
|
|
+pub fn store_tree_and_text<F: Write>(
|
|
|
|
|
+ tree: CanonicalHufftree,
|
|
|
|
|
+ writer: &mut F,
|
|
|
|
|
+ text: &String,
|
|
|
|
|
+) -> Result<(), String> {
|
|
|
let mut buff = BitVec::new();
|
|
let mut buff = BitVec::new();
|
|
|
let mut character_buff: [u8; 4] = [0; 4];
|
|
let mut character_buff: [u8; 4] = [0; 4];
|
|
|
let mut bit_length: u32 = 0;
|
|
let mut bit_length: u32 = 0;
|
|
@@ -15,9 +32,6 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
|
|
|
buff.append(&mut BitVec::from_bytes(&code_length));
|
|
buff.append(&mut BitVec::from_bytes(&code_length));
|
|
|
bit_length += 32;
|
|
bit_length += 32;
|
|
|
|
|
|
|
|
- buff.append(&mut BitVec::from_elem(8, false));
|
|
|
|
|
- bit_length += 8;
|
|
|
|
|
-
|
|
|
|
|
character.encode_utf8(&mut character_buff);
|
|
character.encode_utf8(&mut character_buff);
|
|
|
buff.append(&mut BitVec::from_bytes(&character_buff));
|
|
buff.append(&mut BitVec::from_bytes(&character_buff));
|
|
|
bit_length += 32;
|
|
bit_length += 32;
|
|
@@ -25,9 +39,12 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
|
|
|
character_buff.fill(0);
|
|
character_buff.fill(0);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ buff.append(&mut BitVec::from_elem(32, true));
|
|
|
|
|
+ bit_length += 32;
|
|
|
|
|
+
|
|
|
let encoded_text = tree.encode_text(&text);
|
|
let encoded_text = tree.encode_text(&text);
|
|
|
|
|
|
|
|
- let text_bits:u32 = encoded_text.len().try_into().unwrap();
|
|
|
|
|
|
|
+ let text_bits: u32 = encoded_text.len().try_into().unwrap();
|
|
|
|
|
|
|
|
println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
|
|
println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
|
|
|
bit_length += text_bits;
|
|
bit_length += text_bits;
|
|
@@ -45,8 +62,8 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
#[cfg(test)]
|
|
|
mod test {
|
|
mod test {
|
|
|
- use std::collections::HashMap;
|
|
|
|
|
use crate::hufftree::base::Hufftree;
|
|
use crate::hufftree::base::Hufftree;
|
|
|
|
|
+ use std::collections::HashMap;
|
|
|
|
|
|
|
|
use super::*;
|
|
use super::*;
|
|
|
|
|
|
|
@@ -60,12 +77,26 @@ mod test {
|
|
|
let huff = Hufftree::new(chars_and_freq);
|
|
let huff = Hufftree::new(chars_and_freq);
|
|
|
let canonical = CanonicalHufftree::from_tree(huff);
|
|
let canonical = CanonicalHufftree::from_tree(huff);
|
|
|
|
|
|
|
|
- let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc");
|
|
|
|
|
|
|
+ let input_text = String::from("aaabbc");
|
|
|
|
|
|
|
|
let mut virtual_buffer = Vec::new();
|
|
let mut virtual_buffer = Vec::new();
|
|
|
store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
|
|
store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
|
|
|
|
|
|
|
|
println!("Buffer:{:?}", virtual_buffer);
|
|
println!("Buffer:{:?}", virtual_buffer);
|
|
|
-
|
|
|
|
|
|
|
+ assert_eq!(&virtual_buffer[0..4],
|
|
|
|
|
+ &[0,0,0,233]); // Length of tree + encoded text.
|
|
|
|
|
+ // 0,0,0,1, // Code length of 'a'
|
|
|
|
|
+ // 97,0,0,0, // 'a'
|
|
|
|
|
+ // 0,0,0,2, // Code length of 'b'
|
|
|
|
|
+ // 98,0,0,0, // 'b'
|
|
|
|
|
+ // 0,0,0,2, // Code length of 'c'
|
|
|
|
|
+ // 99,0,0,0, // 'c'
|
|
|
|
|
+ // 255,255,255,255, // Delimiter
|
|
|
|
|
+ // 21, 128, // Encoded text.
|
|
|
|
|
+ // ]
|
|
|
|
|
+ let size = virtual_buffer.len();
|
|
|
|
|
+ assert_eq!(&virtual_buffer[(size - 4)..size],
|
|
|
|
|
+ &[255,255,21,128]);
|
|
|
|
|
+ // )
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|