|
|
@@ -1,5 +1,5 @@
|
|
|
-use crate::hufftree::canonical::convert_no_to_bit_vec;
|
|
|
use crate::hufftree::canonical::CanonicalHufftree;
|
|
|
+use bimap::BiMap;
|
|
|
use bit_vec::BitVec;
|
|
|
use std::io::Read;
|
|
|
use std::io::Write;
|
|
|
@@ -26,6 +26,8 @@ pub fn store_tree_and_text<F: Write>(
|
|
|
let mut character_buff: [u8; 4] = [0; 4];
|
|
|
let mut bit_length: u32 = 0;
|
|
|
|
|
|
+ // TODO: Fix bimap not being deterministic
|
|
|
+ // This needs to output characters in order of frequency.
|
|
|
for (character, code) in tree.get_character_codes() {
|
|
|
let code_length: u32 = code.len().try_into().unwrap();
|
|
|
let code_length = code_length.to_be_bytes();
|
|
|
@@ -46,11 +48,11 @@ pub fn store_tree_and_text<F: Write>(
|
|
|
|
|
|
let text_bits: u32 = encoded_text.len().try_into().unwrap();
|
|
|
|
|
|
- println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
|
|
|
+ // println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
|
|
|
bit_length += text_bits;
|
|
|
|
|
|
let buff = buff.to_bytes();
|
|
|
- println!("Buffer when in bytes:{:?}", buff);
|
|
|
+ // println!("Buffer when in bytes:{:?}", buff);
|
|
|
// let buff_len: u32 = TryInto::<u32>::try_into(buff.len()).unwrap() * 8;
|
|
|
|
|
|
let encoded_text = encoded_text.to_bytes();
|
|
|
@@ -60,6 +62,91 @@ pub fn store_tree_and_text<F: Write>(
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
|
|
|
+ let mut length_of_file_in_bits: [u8; 4] = [0; 4];
|
|
|
+
|
|
|
+ reader.read_exact(&mut length_of_file_in_bits).unwrap();
|
|
|
+
|
|
|
+ let mut length_of_file_in_bits: u32 = four_b_to_u32(&length_of_file_in_bits);
|
|
|
+
|
|
|
+ let mut working_bimap: BiMap<char, BitVec> = BiMap::new();
|
|
|
+
|
|
|
+ let mut char_and_code: [u8; 8] = [0; 8];
|
|
|
+ reader
|
|
|
+ .read_exact(&mut char_and_code)
|
|
|
+ .expect("Could not read further.");
|
|
|
+
|
|
|
+ let mut c: [u8; 4] = [0; 4];
|
|
|
+ while char_and_code[0..4] != [255, 255, 255, 255] {
|
|
|
+ println!("Char and code (start):\n{:?}\n", char_and_code);
|
|
|
+ c.clone_from_slice(&char_and_code[4..8]);
|
|
|
+
|
|
|
+ println!("Character: {:?}", c);
|
|
|
+ let c: String = String::from_utf8(Vec::from(c)).expect("Corrupted data 🪳");
|
|
|
+ // There should only be one character per 4 bytes.
|
|
|
+ let c = c.chars().next().expect("Corrupted data 🪳");
|
|
|
+ let code = BitVec::from_bytes(&char_and_code[0..4]);
|
|
|
+ println!("Character: {:?}", c);
|
|
|
+
|
|
|
+ working_bimap.insert(c, code);
|
|
|
+
|
|
|
+ length_of_file_in_bits -= 64;
|
|
|
+
|
|
|
+ // For small encodings
|
|
|
+ if length_of_file_in_bits < 64 {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ reader
|
|
|
+ .read_exact(&mut char_and_code)
|
|
|
+ .expect("Could not read further.");
|
|
|
+ println!("Char and code:\n{:?}\n", char_and_code);
|
|
|
+ }
|
|
|
+ println!("Char and code:\n{:?}\n", char_and_code);
|
|
|
+ length_of_file_in_bits -= 32;
|
|
|
+
|
|
|
+ println!("Length of file remaining: {}", length_of_file_in_bits);
|
|
|
+
|
|
|
+ if length_of_file_in_bits < 32 {
|
|
|
+ let mut rest_of_binary = Vec::new();
|
|
|
+ reader.read_to_end(&mut rest_of_binary).expect("Could not read data to end.");
|
|
|
+ println!("Rest of binary: {:?}", rest_of_binary);
|
|
|
+ let rest_of_binary = &rest_of_binary[4..];
|
|
|
+ println!("Rest of binary: {:?}", rest_of_binary);
|
|
|
+
|
|
|
+ let mut bits = BitVec::from_bytes(rest_of_binary);
|
|
|
+ bits.split_off(length_of_file_in_bits as usize);
|
|
|
+ println!("Bit vec: {:?}", bits);
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut encoded_text = BitVec::from_bytes(&char_and_code[4..8]);
|
|
|
+ let mut rest_of_encoded_text = Vec::new();
|
|
|
+ reader
|
|
|
+ .read_to_end(&mut rest_of_encoded_text)
|
|
|
+ .expect("Could not read till EOF.");
|
|
|
+
|
|
|
+ let mut rest_of_encoded_text = BitVec::from_bytes(&rest_of_encoded_text);
|
|
|
+ rest_of_encoded_text.split_off(length_of_file_in_bits as usize);
|
|
|
+ encoded_text.append(&mut rest_of_encoded_text);
|
|
|
+
|
|
|
+ let can_tree = CanonicalHufftree::from_bimap(working_bimap);
|
|
|
+ can_tree.decode_text(encoded_text).unwrap()
|
|
|
+}
|
|
|
+
|
|
|
+fn four_b_to_u32(b: &[u8; 4]) -> u32 {
|
|
|
+ let mut result: u32 = 0;
|
|
|
+
|
|
|
+ for (i, bt) in b.iter().enumerate() {
|
|
|
+ let bt32 = *bt as u32;
|
|
|
+ result += bt32;
|
|
|
+
|
|
|
+ if i != 3 {
|
|
|
+ result = result << 8;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ result
|
|
|
+}
|
|
|
+
|
|
|
#[cfg(test)]
|
|
|
mod test {
|
|
|
use crate::hufftree::base::Hufftree;
|
|
|
@@ -83,20 +170,45 @@ mod test {
|
|
|
store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
|
|
|
|
|
|
println!("Buffer:{:?}", virtual_buffer);
|
|
|
- assert_eq!(&virtual_buffer[0..4],
|
|
|
- &[0,0,0,233]); // Length of tree + encoded text.
|
|
|
- // 0,0,0,1, // Code length of 'a'
|
|
|
- // 97,0,0,0, // 'a'
|
|
|
- // 0,0,0,2, // Code length of 'b'
|
|
|
- // 98,0,0,0, // 'b'
|
|
|
- // 0,0,0,2, // Code length of 'c'
|
|
|
- // 99,0,0,0, // 'c'
|
|
|
- // 255,255,255,255, // Delimiter
|
|
|
- // 21, 128, // Encoded text.
|
|
|
- // ]
|
|
|
+ assert_eq!(&virtual_buffer[0..4], &[0, 0, 0, 233]); // Length of tree + encoded text.
|
|
|
+ // 0,0,0,1, // Code length of 'a'
|
|
|
+ // 97,0,0,0, // 'a'
|
|
|
+ // 0,0,0,2, // Code length of 'b'
|
|
|
+ // 98,0,0,0, // 'b'
|
|
|
+ // 0,0,0,2, // Code length of 'c'
|
|
|
+ // 99,0,0,0, // 'c'
|
|
|
+ // 255,255,255,255, // Delimiter
|
|
|
+ // 21, 128, // Encoded text.
|
|
|
+ // ]
|
|
|
let size = virtual_buffer.len();
|
|
|
- assert_eq!(&virtual_buffer[(size - 4)..size],
|
|
|
- &[255,255,21,128]);
|
|
|
+ assert_eq!(&virtual_buffer[(size - 4)..size], &[255, 255, 21, 128]);
|
|
|
// )
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn convert_array_to_u32() {
|
|
|
+ let two_hundred_fifty_seven: [u8; 4] = [0, 0, 1, 1];
|
|
|
+ let as_num = four_b_to_u32(&two_hundred_fifty_seven);
|
|
|
+ assert_eq!(as_num, 257u32);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn stores_and_unpack_works() {
|
|
|
+ let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
|
|
|
+ chars_and_freq.insert('a', 25);
|
|
|
+ chars_and_freq.insert('b', 14);
|
|
|
+ chars_and_freq.insert('c', 5);
|
|
|
+
|
|
|
+ let huff = Hufftree::new(chars_and_freq);
|
|
|
+ let canonical = CanonicalHufftree::from_tree(huff);
|
|
|
+
|
|
|
+ let input_text = String::from("aaabbc");
|
|
|
+
|
|
|
+ let mut virtual_buffer = Vec::new();
|
|
|
+ store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
|
|
|
+
|
|
|
+ println!("Virtual buffer: {:?}", virtual_buffer);
|
|
|
+
|
|
|
+ let decoded_text = read_tree_and_text(&mut &virtual_buffer[0..virtual_buffer.len()]);
|
|
|
+ }
|
|
|
}
|