AvariceLHubris пре 1 година
родитељ
комит
3bffb05864
5 измењених фајлова са 95 додато и 20 уклоњено
  1. 1 0
      Cargo.toml
  2. 11 0
      src/cli.rs
  3. 58 11
      src/hufftree/canonical.rs
  4. 23 8
      src/main.rs
  5. 2 1
      src/storage.rs

+ 1 - 0
Cargo.toml

@@ -1,6 +1,7 @@
 [package]
 name = "huffman"
 version = "0.1.0"
+authors = ["Michalis Marcoux"]
 edition = "2021"
 
 [dependencies]

+ 11 - 0
src/cli.rs

@@ -1,6 +1,7 @@
 use std::ffi::OsString;
 
 use clap::Parser;
+use clap::ValueEnum;
 
 #[derive(Parser, Debug)]
 #[command(version, about, long_about = None)]
@@ -10,4 +11,14 @@ pub struct Args {
 
     #[arg(short, long)]
     pub output_file: OsString,
+
+    #[arg(short, long, value_enum)]
+    pub mode: Mode,
+}
+
+#[derive(Clone, Copy, Debug, ValueEnum, Default)]
+pub enum Mode {
+    Extract,
+    #[default]
+    Compress,
 }

+ 58 - 11
src/hufftree/canonical.rs

@@ -55,7 +55,6 @@ impl CanonicalHufftree {
         }
 
         character_and_codes.sort();
-        // println!("Ordered characters: {:?}", character_and_codes);
         let mut character_and_codes: Vec<CharTempCode> =
             character_and_codes.into_iter().rev().collect();
         // println!("Ordered characters: {:?}", character_and_codes);
@@ -68,10 +67,12 @@ impl CanonicalHufftree {
 
             // This will results in the vector being ordered with the most frequent
             // character first, ordered by code length.
-            storage_char_codes.push((temp_char.character, temp_char.code_length as u32));
+            storage_char_codes.push((temp_char.character.clone(), temp_char.code_length as u32));
+
+            let mut code = BitVec::new();
+            code.grow(temp_char.code_length, false);
+
             if first {
-                let mut code = BitVec::new();
-                code.grow(temp_char.code_length, false);
                 prev_length = temp_char.code_length;
 
                 output_characters_and_codes.insert(temp_char.character, code);
@@ -83,16 +84,31 @@ impl CanonicalHufftree {
                 working_code += 1;
                 working_code = working_code << (temp_char.code_length - prev_length);
 
+
+                let code = convert_no_to_bit_vec_known_length(working_code, &mut code);
                 output_characters_and_codes
-                    .insert(temp_char.character, convert_no_to_bit_vec(working_code));
+                    .insert_no_overwrite(
+                        temp_char.character,
+                        code
+                    )
+                    .expect("There was already a character with that code.");
+
+                assert!(output_characters_and_codes.contains_left(&temp_char.character));
             } else {
                 assert_eq!(
                     temp_char.code_length, prev_length,
                     "Something went really wrong if we got here."
                 );
                 working_code += 1;
+
+                let code = convert_no_to_bit_vec_known_length(working_code, &mut code);
+
                 output_characters_and_codes
-                    .insert(temp_char.character, convert_no_to_bit_vec(working_code));
+                    .insert_no_overwrite(
+                        temp_char.character,
+                        code
+                    )
+                    .expect("There was already a character with that code.");
             }
 
             prev_length = temp_char.code_length;
@@ -117,12 +133,13 @@ impl CanonicalHufftree {
         let mut working_code: u32 = 0b0;
         while temp_storage.len() > 0 {
             let (temp_char, code_length) = temp_storage.pop().unwrap();
-
             // This will result in the vector being ordered with the most frequent
             // character first, ordered by code length.
+            //
+            let mut code = BitVec::new();
+            code.grow(code_length as usize, false);
+
             if first {
-                let mut code = BitVec::new();
-                code.grow(code_length as usize, false);
                 prev_length = code_length;
 
                 bi.insert(temp_char, code);
@@ -134,14 +151,22 @@ impl CanonicalHufftree {
                 working_code += 1;
                 working_code = working_code << (code_length - prev_length);
 
-                bi.insert(temp_char, convert_no_to_bit_vec(working_code));
+                let code = convert_no_to_bit_vec_known_length(working_code, &mut code);
+                bi.insert_no_overwrite(
+                    temp_char,
+                     code,
+                ).expect("There was already a character with that code.");
             } else {
                 assert_eq!(
                     code_length, prev_length,
                     "Something went really wrong if we got here."
                 );
                 working_code += 1;
-                bi.insert(temp_char, convert_no_to_bit_vec(working_code));
+                let code = convert_no_to_bit_vec_known_length(working_code, &mut code);
+                bi.insert_no_overwrite(
+                    temp_char,
+                     code,
+                ).expect("There was already a character with that code.");
             }
 
             prev_length = code_length;
@@ -185,6 +210,11 @@ impl CanonicalHufftree {
             }
         }
 
+        println!("Decoded text: {}", decoded_text);
+        println!("Buff: {:?}", buffer);
+
+        println!("\nSelf:\n{:?}\n\n", self.characters_and_codes);
+
         if !buffer.is_empty() {
             Err("Text was not decoded properly (trailing bits).")
         } else {
@@ -215,6 +245,23 @@ pub fn convert_no_to_bit_vec(mut numb: u32) -> BitVec {
     output_vec
 }
 
+pub fn convert_no_to_bit_vec_known_length(mut numb: u32, bits: &mut BitVec) -> BitVec  {
+    let mut counter = 0;
+    while numb > 0 {
+        if numb % 2 == 0 {
+            bits.set(counter ,false);
+        } else {
+            bits.set(counter, true);
+        }
+
+        numb = numb / 2;
+        counter += 1;
+    }
+
+    let bits = bits.iter().rev().collect();
+    bits
+}
+
 #[cfg(test)]
 mod canonical_tests {
     use std::collections::HashMap;

+ 23 - 8
src/main.rs

@@ -1,3 +1,5 @@
+use std::io::Write;
+
 use clap::Parser;
 use huffman::{cli, hufftree, storage};
 
@@ -6,21 +8,34 @@ fn main() -> Result<(), std::io::Error> {
 
     let inputf = args.input_file;
     let outputf = args.output_file;
+    let mode = args.mode;
 
     let working_directory = std::path::Path::new(".");
     let inputf = working_directory.join(inputf);
 
-    let inputf = std::fs::read_to_string(inputf)?;
-    let char_f = huffman::hufftree::base::get_char_frequencies(&inputf);
-
-    println!("Creating Huffman Tree...");
-    let base_tree = huffman::hufftree::base::Hufftree::new(char_f);
-    let canonical_tree = hufftree::canonical::CanonicalHufftree::from_tree(base_tree);
-
     let outputf = working_directory.join(outputf);
     let mut outputf = std::fs::File::create(outputf)?;
 
-    storage::store_tree_and_text(canonical_tree, &mut outputf, &inputf).expect("Could not store the tree and text.");
+    match mode {
+        cli::Mode::Extract => {
+            let inputf = std::fs::read(inputf)?;
+            let decoded_text = huffman::storage::read_tree_and_text(&mut &inputf[..]);
+
+            outputf.write_all(decoded_text.as_bytes()).unwrap();
+        }
+
+        cli::Mode::Compress => {
+            let inputf = std::fs::read_to_string(inputf)?;
+            let char_f = huffman::hufftree::base::get_char_frequencies(&inputf);
+
+            println!("Creating Huffman Tree...");
+            let base_tree = huffman::hufftree::base::Hufftree::new(char_f);
+            let canonical_tree = hufftree::canonical::CanonicalHufftree::from_tree(base_tree);
+
+            storage::store_tree_and_text(canonical_tree, &mut outputf, &inputf)
+                .expect("Could not store the tree and text.");
+        }
+    }
 
     Ok(())
 }

+ 2 - 1
src/storage.rs

@@ -133,7 +133,8 @@ pub fn read_tree_and_text<F: Read>(reader: &mut F) -> String {
     // println!("Bit vec: {:?}", rest_of_encoded_text);
     encoded_text.append(&mut rest_of_encoded_text);
     encoded_text.split_off(length_of_file_in_bits as usize);
-    // println!("Encoded text: {:?}", encoded_text);
+    println!("Encoded text: {:?}\n", encoded_text);
+    println!("Working vec: {:?}", working_vec);
 
     let can_tree = CanonicalHufftree::from_vec(working_vec);
     can_tree.decode_text(encoded_text).unwrap()