瀏覽代碼

Ugh, not feeling so well boss.

AvariceLHubris 1 年之前
父節點
當前提交
58cde4e504
共有 2 個文件被更改,包括 134 次插入16 次删除
  1. 6 0
      src/hufftree/canonical.rs
  2. 128 16
      src/storage.rs

+ 6 - 0
src/hufftree/canonical.rs

@@ -96,6 +96,12 @@ impl CanonicalHufftree {
         }
     }
 
+    pub fn from_bimap(bi: BiMap<char, BitVec>) -> Self {
+        CanonicalHufftree {
+            characters_and_codes: bi
+        }
+    }
+
     // TODO: Optimise this (the vector copying is probably extremely inefficient)
     pub fn encode_text(&self, text: &String) -> BitVec {
         let mut converted_text = BitVec::new();

+ 128 - 16
src/storage.rs

@@ -1,5 +1,5 @@
-use crate::hufftree::canonical::convert_no_to_bit_vec;
 use crate::hufftree::canonical::CanonicalHufftree;
+use bimap::BiMap;
 use bit_vec::BitVec;
 use std::io::Read;
 use std::io::Write;
@@ -26,6 +26,8 @@ pub fn store_tree_and_text<F: Write>(
     let mut character_buff: [u8; 4] = [0; 4];
     let mut bit_length: u32 = 0;
 
+    // TODO: Fix bimap not being deterministic
+    // This needs to output characters in order of frequency.
     for (character, code) in tree.get_character_codes() {
         let code_length: u32 = code.len().try_into().unwrap();
         let code_length = code_length.to_be_bytes();
@@ -46,11 +48,11 @@ pub fn store_tree_and_text<F: Write>(
 
     let text_bits: u32 = encoded_text.len().try_into().unwrap();
 
-    println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
+    // println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
     bit_length += text_bits;
 
     let buff = buff.to_bytes();
-    println!("Buffer when in bytes:{:?}", buff);
+    // println!("Buffer when in bytes:{:?}", buff);
     // let buff_len: u32 = TryInto::<u32>::try_into(buff.len()).unwrap() * 8;
 
     let encoded_text = encoded_text.to_bytes();
@@ -60,6 +62,91 @@ pub fn store_tree_and_text<F: Write>(
     Ok(())
 }
 
+pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
+    let mut length_of_file_in_bits: [u8; 4] = [0; 4];
+
+    reader.read_exact(&mut length_of_file_in_bits).unwrap();
+
+    let mut length_of_file_in_bits: u32 = four_b_to_u32(&length_of_file_in_bits);
+
+    let mut working_bimap: BiMap<char, BitVec> = BiMap::new();
+
+    let mut char_and_code: [u8; 8] = [0; 8];
+    reader
+        .read_exact(&mut char_and_code)
+        .expect("Could not read further.");
+
+    let mut c: [u8; 4] = [0; 4];
+    while char_and_code[0..4] != [255, 255, 255, 255] {
+        println!("Char and code (start):\n{:?}\n", char_and_code);
+        c.clone_from_slice(&char_and_code[4..8]);
+
+        println!("Character: {:?}", c);
+        let c: String = String::from_utf8(Vec::from(c)).expect("Corrupted data 🪳");
+        // There should only be one character per 4 bytes.
+        let c = c.chars().next().expect("Corrupted data 🪳");
+        let code = BitVec::from_bytes(&char_and_code[0..4]);
+        println!("Character: {:?}", c);
+
+        working_bimap.insert(c, code);
+
+        length_of_file_in_bits -= 64;
+
+        // For small encodings
+        if length_of_file_in_bits < 64 {
+            break;
+        }
+        reader
+            .read_exact(&mut char_and_code)
+            .expect("Could not read further.");
+        println!("Char and code:\n{:?}\n", char_and_code);
+    }
+    println!("Char and code:\n{:?}\n", char_and_code);
+    length_of_file_in_bits -= 32;
+
+    println!("Length of file remaining: {}", length_of_file_in_bits);
+
+    if length_of_file_in_bits < 32 {
+        let mut rest_of_binary = Vec::new();
+        reader.read_to_end(&mut rest_of_binary).expect("Could not read data to end.");
+        println!("Rest of binary: {:?}", rest_of_binary);
+        let rest_of_binary = &rest_of_binary[4..];
+        println!("Rest of binary: {:?}", rest_of_binary);
+
+        let mut bits = BitVec::from_bytes(rest_of_binary);
+        bits.split_off(length_of_file_in_bits as usize);
+        println!("Bit vec: {:?}", bits);
+    }
+
+    let mut encoded_text = BitVec::from_bytes(&char_and_code[4..8]);
+    let mut rest_of_encoded_text = Vec::new();
+    reader
+        .read_to_end(&mut rest_of_encoded_text)
+        .expect("Could not read till EOF.");
+
+    let mut rest_of_encoded_text = BitVec::from_bytes(&rest_of_encoded_text);
+    rest_of_encoded_text.split_off(length_of_file_in_bits as usize);
+    encoded_text.append(&mut rest_of_encoded_text);
+
+    let can_tree = CanonicalHufftree::from_bimap(working_bimap);
+    can_tree.decode_text(encoded_text).unwrap()
+}
+
+fn four_b_to_u32(b: &[u8; 4]) -> u32 {
+    let mut result: u32 = 0;
+
+    for (i, bt) in b.iter().enumerate() {
+        let bt32 = *bt as u32;
+        result += bt32;
+
+        if i != 3 {
+            result = result << 8;
+        }
+    }
+
+    result
+}
+
 #[cfg(test)]
 mod test {
     use crate::hufftree::base::Hufftree;
@@ -83,20 +170,45 @@ mod test {
         store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
 
         println!("Buffer:{:?}", virtual_buffer);
-        assert_eq!(&virtual_buffer[0..4],
-            &[0,0,0,233]); // Length of tree + encoded text.
-                           // 0,0,0,1, //  Code length of 'a'
-                           // 97,0,0,0, // 'a'
-                           // 0,0,0,2, //  Code length of 'b'
-                           // 98,0,0,0, // 'b'
-                           // 0,0,0,2, //  Code length of 'c'
-                           // 99,0,0,0, // 'c'
-                           // 255,255,255,255, // Delimiter
-                           // 21, 128, // Encoded text.
-                           // ]
+        assert_eq!(&virtual_buffer[0..4], &[0, 0, 0, 233]); // Length of tree + encoded text.
+                                                            // 0,0,0,1, //  Code length of 'a'
+                                                            // 97,0,0,0, // 'a'
+                                                            // 0,0,0,2, //  Code length of 'b'
+                                                            // 98,0,0,0, // 'b'
+                                                            // 0,0,0,2, //  Code length of 'c'
+                                                            // 99,0,0,0, // 'c'
+                                                            // 255,255,255,255, // Delimiter
+                                                            // 21, 128, // Encoded text.
+                                                            // ]
         let size = virtual_buffer.len();
-        assert_eq!(&virtual_buffer[(size - 4)..size],
-            &[255,255,21,128]); 
+        assert_eq!(&virtual_buffer[(size - 4)..size], &[255, 255, 21, 128]);
         // )
     }
+
+    #[test]
+    fn convert_array_to_u32() {
+        let two_hundred_fifty_seven: [u8; 4] = [0, 0, 1, 1];
+        let as_num = four_b_to_u32(&two_hundred_fifty_seven);
+        assert_eq!(as_num, 257u32);
+    }
+
+    #[test]
+    fn stores_and_unpack_works() {
+        let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
+        chars_and_freq.insert('a', 25);
+        chars_and_freq.insert('b', 14);
+        chars_and_freq.insert('c', 5);
+
+        let huff = Hufftree::new(chars_and_freq);
+        let canonical = CanonicalHufftree::from_tree(huff);
+
+        let input_text = String::from("aaabbc");
+
+        let mut virtual_buffer = Vec::new();
+        store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
+
+        println!("Virtual buffer: {:?}", virtual_buffer);
+
+        let decoded_text = read_tree_and_text(&mut &virtual_buffer[0..virtual_buffer.len()]);
+    }
 }