Bladeren bron

Works for messages that are of length <32 bits.

Time to make it work for every other scenario.
AvariceLHubris 1 jaar geleden
bovenliggende
commit
e1f2a32d9f
2 gewijzigde bestanden met toevoegingen van 70 en 12 verwijderingen
  1. 61 5
      src/hufftree/canonical.rs
  2. 9 7
      src/storage.rs

+ 61 - 5
src/hufftree/canonical.rs

@@ -5,6 +5,7 @@ use bit_vec::BitVec;
 #[derive(Debug)]
 pub struct CanonicalHufftree {
     characters_and_codes: BiMap<char, BitVec>,
+    storage_char_codes: Vec<(char, u32)>,
 }
 
 #[derive(Debug)]
@@ -16,7 +17,8 @@ struct CharTempCode {
 
 impl Ord for CharTempCode {
     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.code_length.cmp(&other.code_length)
+        self.code_length
+            .cmp(&other.code_length)
             .then(other.code.cmp(&self.code))
     }
 }
@@ -40,6 +42,7 @@ impl CanonicalHufftree {
         let characters = base_tree.get_characters();
         let mut character_and_codes = Vec::new();
         let mut output_characters_and_codes: BiMap<char, BitVec> = BiMap::new();
+        let mut storage_char_codes = Vec::new();
 
         for character in characters {
             let code = base_tree.get_character_code(*character).unwrap();
@@ -62,6 +65,10 @@ impl CanonicalHufftree {
         let mut working_code: u32 = 0b0;
         while character_and_codes.len() > 0 {
             let temp_char = character_and_codes.pop().unwrap();
+
+            // This will results in the vector being ordered with the most frequent
+            // character first, ordered by code length.
+            storage_char_codes.push((temp_char.character, temp_char.code_length as u32));
             if first {
                 let mut code = BitVec::new();
                 code.grow(temp_char.code_length, false);
@@ -93,12 +100,57 @@ impl CanonicalHufftree {
 
         CanonicalHufftree {
             characters_and_codes: output_characters_and_codes,
+            storage_char_codes,
         }
     }
 
-    pub fn from_bimap(bi: BiMap<char, u32>) -> Self {
+    pub fn from_vec(storage: Vec<(char, u32)>) -> Self {
+        // Assume that vec is ordered, first by code length,
+        // then by character frequency, as this is the way
+        // the file should have been encoded.
+        // This means that it must first be reversed.
+        let mut temp_storage: Vec<(char, u32)> = storage.into_iter().rev().collect();
+
+        let mut bi = BiMap::new();
+        let mut first = true;
+        let mut prev_length = 0;
+        let mut working_code: u32 = 0b0;
+        while temp_storage.len() > 0 {
+            let (temp_char, code_length) = temp_storage.pop().unwrap();
+
+            // This will result in the vector being ordered with the most frequent
+            // character first, ordered by code length.
+            if first {
+                let mut code = BitVec::new();
+                code.grow(code_length as usize, false);
+                prev_length = code_length;
+
+                bi.insert(temp_char, code);
+                first = false;
+                continue;
+            }
+
+            if code_length > prev_length {
+                working_code += 1;
+                working_code = working_code << (code_length - prev_length);
+
+                bi.insert(temp_char, convert_no_to_bit_vec(working_code));
+            } else {
+                assert_eq!(
+                    code_length, prev_length,
+                    "Something went really wrong if we got here."
+                );
+                working_code += 1;
+                bi.insert(temp_char, convert_no_to_bit_vec(working_code));
+            }
+
+            prev_length = code_length;
+        }
+
         CanonicalHufftree {
-            characters_and_codes: bi
+            characters_and_codes: bi,
+            // Will be empty.
+            storage_char_codes: temp_storage,
         }
     }
 
@@ -140,8 +192,12 @@ impl CanonicalHufftree {
         }
     }
 
-    pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
-        self.characters_and_codes.clone()
+    // pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
+    //     self.characters_and_codes.clone()
+    // }
+
+    pub fn get_character_codes_for_storage(&self) -> Vec<(char, u32)> {
+        self.storage_char_codes.clone()
     }
 }
 

+ 9 - 7
src/storage.rs

@@ -28,8 +28,7 @@ pub fn store_tree_and_text<F: Write>(
 
     // TODO: Fix bimap not being deterministic
     // This needs to output characters in order of frequency.
-    for (character, code) in tree.get_character_codes() {
-        let code_length: u32 = code.len().try_into().unwrap();
+    for (character, code_length) in tree.get_character_codes_for_storage() {
         let code_length = code_length.to_be_bytes();
         buff.append(&mut BitVec::from_bytes(&code_length));
         bit_length += 32;
@@ -69,7 +68,7 @@ pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
 
     let mut length_of_file_in_bits: u32 = four_b_to_u32(&length_of_file_in_bits);
 
-    let mut working_bimap: BiMap<char, u32> = BiMap::new();
+    let mut working_vec: Vec<(char, u32)> = Vec::new();
 
     let mut char_and_code: [u8; 8] = [0; 8];
     reader
@@ -90,7 +89,7 @@ pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
         code.clone_from_slice(&char_and_code[0..4]);
         // println!("Character: {:?}", c);
 
-        working_bimap.insert(c, four_b_to_u32(&code));
+        working_vec.push((c, four_b_to_u32(&code)));
 
         length_of_file_in_bits -= 64;
 
@@ -121,7 +120,7 @@ pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
         bits.split_off(length_of_file_in_bits as usize);
         println!("Bit vec: {:?}", bits);
 
-        let can_tree = CanonicalHufftree::from_bimap(working_bimap);
+        let can_tree = CanonicalHufftree::from_vec(working_vec);
         return can_tree.decode_text(bits).unwrap();
     }
 
@@ -135,7 +134,7 @@ pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
     rest_of_encoded_text.split_off(length_of_file_in_bits as usize);
     encoded_text.append(&mut rest_of_encoded_text);
 
-    let can_tree = CanonicalHufftree::from_bimap(working_bimap);
+    let can_tree = CanonicalHufftree::from_vec(working_vec);
     can_tree.decode_text(encoded_text).unwrap()
 }
 
@@ -209,7 +208,7 @@ mod test {
         let huff = Hufftree::new(chars_and_freq);
         let canonical = CanonicalHufftree::from_tree(huff);
 
-        let input_text = String::from("aaabbc");
+        let input_text = String::from("aacacacacabbbbbbbaaac");
 
         let mut virtual_buffer = Vec::new();
         store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
@@ -217,5 +216,8 @@ mod test {
         println!("Virtual buffer: {:?}", virtual_buffer);
 
         let decoded_text = read_tree_and_text(&mut &virtual_buffer[0..virtual_buffer.len()]);
+
+        assert_eq!(decoded_text, input_text);
+        println!("Decoded text: {}\nInput text:{}", decoded_text, input_text);
     }
 }