use crate::hufftree::base::Hufftree; use bimap::BiMap; use bit_vec::BitVec; #[derive(Debug)] pub struct CanonicalHufftree { characters_and_codes: BiMap, } #[derive(PartialEq, PartialOrd, Eq, Debug)] struct CharTempCode { character: char, code: BitVec, code_length: usize, } impl Ord for CharTempCode { fn cmp(&self, other: &Self) -> std::cmp::Ordering { return self.code_length.cmp(&other.code_length); } } impl CanonicalHufftree { pub fn from_tree(base_tree: Hufftree) -> Self { let characters = base_tree.get_characters(); let mut character_and_codes = Vec::new(); let mut output_characters_and_codes: BiMap = BiMap::new(); for character in characters { let code = base_tree.get_character_code(*character).unwrap(); let length = code.len(); character_and_codes.push(CharTempCode { character: *character, code, code_length: length, }); } character_and_codes.sort(); let mut character_and_codes: Vec = character_and_codes.into_iter().rev().collect(); let mut first = true; let mut prev_length = 0; let mut working_code: u32 = 0b0; while character_and_codes.len() > 0 { let temp_char = character_and_codes.pop().unwrap(); if first { let mut code = BitVec::new(); code.grow(temp_char.code_length, false); prev_length = temp_char.code_length; output_characters_and_codes.insert(temp_char.character, code); first = false; continue; } if temp_char.code_length > prev_length { working_code += 1; working_code = working_code << (temp_char.code_length - prev_length); output_characters_and_codes .insert(temp_char.character, convert_no_to_bit_vec(working_code)); } else { assert_eq!( temp_char.code_length, prev_length, "Something went really wrong if we got here." ); working_code += 1; output_characters_and_codes .insert(temp_char.character, convert_no_to_bit_vec(working_code)); } prev_length = temp_char.code_length; } CanonicalHufftree { characters_and_codes: output_characters_and_codes, } } // TODO: Optimise this (the vector copying is probably extremely inefficient) pub fn encode_text(&self, text: &String) -> BitVec { let mut converted_text = BitVec::new(); for character in text.chars() { let temp_code = self.characters_and_codes.get_by_left(&character).unwrap(); let mut temp_code = temp_code.clone(); converted_text.append(&mut temp_code); } converted_text } pub fn decode_text(&self, text: BitVec) -> Result { let mut decoded_text = String::new(); // So that popping bits removes them from the "start" of the text. let mut text: BitVec = text.iter().rev().collect(); let mut buffer = BitVec::new(); while text.len() > 0 { buffer.push(text.pop().unwrap()); match self.characters_and_codes.get_by_right(&buffer) { Some(character) => { decoded_text.push(*character); buffer.truncate(0); } None => continue, } } if !buffer.is_empty() { Err("Text was not decoded properly (trailing bits).") } else { Ok(decoded_text) } } pub fn get_character_codes(&self) -> BiMap { self.characters_and_codes.clone() } } pub fn convert_no_to_bit_vec(mut numb: u32) -> BitVec { let mut output_vec = BitVec::new(); while numb > 0 { if numb % 2 == 0 { output_vec.push(false); } else { output_vec.push(true); } numb = numb / 2; } let output_vec = output_vec.iter().rev().collect(); output_vec } #[cfg(test)] mod canonical_tests { use std::collections::HashMap; use super::*; #[test] fn correct_conversion_of_number() { let numb = 0b111001; let result = convert_no_to_bit_vec(numb); assert!(result.eq_vec(&[true, true, true, false, false, true])); } #[test] fn create_correct_canonical_codes() { let mut chars_and_freq: HashMap = HashMap::new(); chars_and_freq.insert('a', 25); chars_and_freq.insert('b', 14); chars_and_freq.insert('c', 5); let huff = Hufftree::new(chars_and_freq); let a_code = huff.get_character_code('a').unwrap(); let b_code = huff.get_character_code('b').unwrap(); let c_code = huff.get_character_code('c').unwrap(); assert_eq!(a_code.to_string(), "1"); assert_eq!(b_code.to_string(), "01"); assert_eq!(c_code.to_string(), "00"); let canonical = CanonicalHufftree::from_tree(huff); assert_eq!( canonical .characters_and_codes .get_by_left(&'a') .unwrap() .to_string(), "0" ); assert_eq!( canonical .characters_and_codes .get_by_left(&'b') .unwrap() .to_string(), "10" ); assert_eq!( canonical .characters_and_codes .get_by_left(&'c') .unwrap() .to_string(), "11" ); } #[test] fn encode_and_decode_text() { let mut chars_and_freq: HashMap = HashMap::new(); chars_and_freq.insert('a', 25); chars_and_freq.insert('b', 14); chars_and_freq.insert('c', 5); let huff = Hufftree::new(chars_and_freq); let canonical = CanonicalHufftree::from_tree(huff); let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc"); let encoded_text = canonical.encode_text(&input_text); let decoded_text = canonical.decode_text(encoded_text).unwrap(); assert_eq!(input_text, decoded_text); } }