|
|
@@ -0,0 +1,214 @@
|
|
|
+use crate::hufftree::base::Hufftree;
|
|
|
+use bimap::BiMap;
|
|
|
+use bit_vec::BitVec;
|
|
|
+
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct CanonicalHufftree {
|
|
|
+ base_tree: Hufftree,
|
|
|
+ characters_and_codes: BiMap<char, BitVec>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(PartialEq, PartialOrd, Eq, Debug)]
|
|
|
+struct CharTempCode {
|
|
|
+ character: char,
|
|
|
+ code: BitVec,
|
|
|
+ code_length: usize,
|
|
|
+}
|
|
|
+
|
|
|
+impl Ord for CharTempCode {
|
|
|
+ fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
|
+ return self.code_length.cmp(&other.code_length);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl CanonicalHufftree {
|
|
|
+ pub fn from_tree(base_tree: Hufftree) -> Self {
|
|
|
+ let characters = base_tree.get_characters();
|
|
|
+ let mut character_and_codes = Vec::new();
|
|
|
+ let mut output_characters_and_codes: BiMap<char, BitVec> = BiMap::new();
|
|
|
+
|
|
|
+ for character in characters {
|
|
|
+ let code = base_tree.get_character_code(*character).unwrap();
|
|
|
+ let length = code.len();
|
|
|
+ character_and_codes.push(CharTempCode {
|
|
|
+ character: *character,
|
|
|
+ code,
|
|
|
+ code_length: length,
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ character_and_codes.sort();
|
|
|
+ let mut character_and_codes: Vec<CharTempCode> =
|
|
|
+ character_and_codes.into_iter().rev().collect();
|
|
|
+
|
|
|
+ let mut first = true;
|
|
|
+ let mut prev_length = 0;
|
|
|
+ let mut working_code: u32 = 0b0;
|
|
|
+ while character_and_codes.len() > 0 {
|
|
|
+ let temp_char = character_and_codes.pop().unwrap();
|
|
|
+ if first {
|
|
|
+ let mut code = BitVec::new();
|
|
|
+ code.grow(temp_char.code_length, false);
|
|
|
+ prev_length = temp_char.code_length;
|
|
|
+
|
|
|
+ output_characters_and_codes.insert(temp_char.character, code);
|
|
|
+ first = false;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if temp_char.code_length > prev_length {
|
|
|
+ working_code += 1;
|
|
|
+ working_code = working_code << (temp_char.code_length - prev_length);
|
|
|
+
|
|
|
+ output_characters_and_codes
|
|
|
+ .insert(temp_char.character, convert_no_to_bit_vec(working_code));
|
|
|
+ } else {
|
|
|
+ assert_eq!(
|
|
|
+ temp_char.code_length, prev_length,
|
|
|
+ "Something went really wrong if we got here."
|
|
|
+ );
|
|
|
+ working_code += 1;
|
|
|
+ output_characters_and_codes
|
|
|
+ .insert(temp_char.character, convert_no_to_bit_vec(working_code));
|
|
|
+ }
|
|
|
+
|
|
|
+ prev_length = temp_char.code_length;
|
|
|
+ }
|
|
|
+
|
|
|
+ CanonicalHufftree {
|
|
|
+ base_tree,
|
|
|
+ characters_and_codes: output_characters_and_codes,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // TODO: Optimise this (the vector copying is probably extremely inefficient)
|
|
|
+ pub fn encode_text(&self, text: &String) -> BitVec {
|
|
|
+ let mut converted_text = BitVec::new();
|
|
|
+
|
|
|
+ for character in text.chars() {
|
|
|
+ let temp_code = self.characters_and_codes.get_by_left(&character).unwrap();
|
|
|
+ let mut temp_code = temp_code.clone();
|
|
|
+ converted_text.append(&mut temp_code);
|
|
|
+ }
|
|
|
+
|
|
|
+ converted_text
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn decode_text(&self, text: BitVec) -> Result<String, &str> {
|
|
|
+ let mut decoded_text = String::new();
|
|
|
+
|
|
|
+ // So that popping bits removes them from the "start" of the text.
|
|
|
+ let mut text: BitVec = text.iter().rev().collect();
|
|
|
+ println!("Reversed encoded text: {:?}", text);
|
|
|
+ let mut buffer = BitVec::new();
|
|
|
+
|
|
|
+ while text.len() > 0 {
|
|
|
+ buffer.push(text.pop().unwrap());
|
|
|
+ match self.characters_and_codes.get_by_right(&buffer) {
|
|
|
+ Some(character) => {
|
|
|
+ decoded_text.push(*character);
|
|
|
+ buffer.truncate(0);
|
|
|
+ }
|
|
|
+ None => continue,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if !buffer.is_empty() {
|
|
|
+ Err("Text was not decoded properly (trailing bits).")
|
|
|
+ } else {
|
|
|
+ Ok(decoded_text)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn convert_no_to_bit_vec(mut numb: u32) -> BitVec {
|
|
|
+ let mut output_vec = BitVec::new();
|
|
|
+ while numb > 0 {
|
|
|
+ if numb % 2 == 0 {
|
|
|
+ output_vec.push(false);
|
|
|
+ } else {
|
|
|
+ output_vec.push(true);
|
|
|
+ }
|
|
|
+ numb = numb / 2;
|
|
|
+ }
|
|
|
+ let output_vec = output_vec.iter().rev().collect();
|
|
|
+ output_vec
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod canonical_tests {
|
|
|
+ use std::collections::HashMap;
|
|
|
+
|
|
|
+ use super::*;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn correct_conversion_of_number() {
|
|
|
+ let numb = 0b111001;
|
|
|
+ let result = convert_no_to_bit_vec(numb);
|
|
|
+
|
|
|
+ assert!(result.eq_vec(&[true, true, true, false, false, true]));
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn create_correct_canonical_codes() {
|
|
|
+ let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
|
|
|
+ chars_and_freq.insert('a', 25);
|
|
|
+ chars_and_freq.insert('b', 14);
|
|
|
+ chars_and_freq.insert('c', 5);
|
|
|
+
|
|
|
+ let huff = Hufftree::new(chars_and_freq);
|
|
|
+
|
|
|
+ let a_code = huff.get_character_code('a').unwrap();
|
|
|
+ let b_code = huff.get_character_code('b').unwrap();
|
|
|
+ let c_code = huff.get_character_code('c').unwrap();
|
|
|
+ assert_eq!(a_code.to_string(), "1");
|
|
|
+ assert_eq!(b_code.to_string(), "01");
|
|
|
+ assert_eq!(c_code.to_string(), "00");
|
|
|
+
|
|
|
+ let canonical = CanonicalHufftree::from_tree(huff);
|
|
|
+ assert_eq!(
|
|
|
+ canonical
|
|
|
+ .characters_and_codes
|
|
|
+ .get_by_left(&'a')
|
|
|
+ .unwrap()
|
|
|
+ .to_string(),
|
|
|
+ "0"
|
|
|
+ );
|
|
|
+ assert_eq!(
|
|
|
+ canonical
|
|
|
+ .characters_and_codes
|
|
|
+ .get_by_left(&'b')
|
|
|
+ .unwrap()
|
|
|
+ .to_string(),
|
|
|
+ "10"
|
|
|
+ );
|
|
|
+
|
|
|
+ assert_eq!(
|
|
|
+ canonical
|
|
|
+ .characters_and_codes
|
|
|
+ .get_by_left(&'c')
|
|
|
+ .unwrap()
|
|
|
+ .to_string(),
|
|
|
+ "11"
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn encode_and_decode_text() {
|
|
|
+ let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
|
|
|
+ chars_and_freq.insert('a', 25);
|
|
|
+ chars_and_freq.insert('b', 14);
|
|
|
+ chars_and_freq.insert('c', 5);
|
|
|
+
|
|
|
+ let huff = Hufftree::new(chars_and_freq);
|
|
|
+ let canonical = CanonicalHufftree::from_tree(huff);
|
|
|
+
|
|
|
+ let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc");
|
|
|
+ let encoded_text = canonical.encode_text(&input_text);
|
|
|
+ println!("Encoded text:{:?}", encoded_text);
|
|
|
+
|
|
|
+ let decoded_text = canonical.decode_text(encoded_text).unwrap();
|
|
|
+
|
|
|
+ assert_eq!(input_text, decoded_text);
|
|
|
+ }
|
|
|
+}
|