| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214 |
- use crate::hufftree::base::Hufftree;
- use bimap::BiMap;
- use bit_vec::BitVec;
- #[derive(Debug)]
- pub struct CanonicalHufftree {
- characters_and_codes: BiMap<char, BitVec>,
- }
- #[derive(PartialEq, PartialOrd, Eq, Debug)]
- struct CharTempCode {
- character: char,
- code: BitVec,
- code_length: usize,
- }
- impl Ord for CharTempCode {
- fn cmp(&self, other: &Self) -> std::cmp::Ordering {
- return self.code_length.cmp(&other.code_length);
- }
- }
- impl CanonicalHufftree {
- pub fn from_tree(base_tree: Hufftree) -> Self {
- let characters = base_tree.get_characters();
- let mut character_and_codes = Vec::new();
- let mut output_characters_and_codes: BiMap<char, BitVec> = BiMap::new();
- for character in characters {
- let code = base_tree.get_character_code(*character).unwrap();
- let length = code.len();
- character_and_codes.push(CharTempCode {
- character: *character,
- code,
- code_length: length,
- });
- }
- character_and_codes.sort();
- let mut character_and_codes: Vec<CharTempCode> =
- character_and_codes.into_iter().rev().collect();
- let mut first = true;
- let mut prev_length = 0;
- let mut working_code: u32 = 0b0;
- while character_and_codes.len() > 0 {
- let temp_char = character_and_codes.pop().unwrap();
- if first {
- let mut code = BitVec::new();
- code.grow(temp_char.code_length, false);
- prev_length = temp_char.code_length;
- output_characters_and_codes.insert(temp_char.character, code);
- first = false;
- continue;
- }
- if temp_char.code_length > prev_length {
- working_code += 1;
- working_code = working_code << (temp_char.code_length - prev_length);
- output_characters_and_codes
- .insert(temp_char.character, convert_no_to_bit_vec(working_code));
- } else {
- assert_eq!(
- temp_char.code_length, prev_length,
- "Something went really wrong if we got here."
- );
- working_code += 1;
- output_characters_and_codes
- .insert(temp_char.character, convert_no_to_bit_vec(working_code));
- }
- prev_length = temp_char.code_length;
- }
- CanonicalHufftree {
- characters_and_codes: output_characters_and_codes,
- }
- }
- // TODO: Optimise this (the vector copying is probably extremely inefficient)
- pub fn encode_text(&self, text: &String) -> BitVec {
- let mut converted_text = BitVec::new();
- for character in text.chars() {
- let temp_code = self.characters_and_codes.get_by_left(&character).unwrap();
- let mut temp_code = temp_code.clone();
- converted_text.append(&mut temp_code);
- }
- converted_text
- }
- pub fn decode_text(&self, text: BitVec) -> Result<String, &str> {
- let mut decoded_text = String::new();
- // So that popping bits removes them from the "start" of the text.
- let mut text: BitVec = text.iter().rev().collect();
- let mut buffer = BitVec::new();
- while text.len() > 0 {
- buffer.push(text.pop().unwrap());
- match self.characters_and_codes.get_by_right(&buffer) {
- Some(character) => {
- decoded_text.push(*character);
- buffer.truncate(0);
- }
- None => continue,
- }
- }
- if !buffer.is_empty() {
- Err("Text was not decoded properly (trailing bits).")
- } else {
- Ok(decoded_text)
- }
- }
- pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
- self.characters_and_codes.clone()
- }
- }
- pub fn convert_no_to_bit_vec(mut numb: u32) -> BitVec {
- let mut output_vec = BitVec::new();
- while numb > 0 {
- if numb % 2 == 0 {
- output_vec.push(false);
- } else {
- output_vec.push(true);
- }
- numb = numb / 2;
- }
- let output_vec = output_vec.iter().rev().collect();
- output_vec
- }
- #[cfg(test)]
- mod canonical_tests {
- use std::collections::HashMap;
- use super::*;
- #[test]
- fn correct_conversion_of_number() {
- let numb = 0b111001;
- let result = convert_no_to_bit_vec(numb);
- assert!(result.eq_vec(&[true, true, true, false, false, true]));
- }
- #[test]
- fn create_correct_canonical_codes() {
- let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
- chars_and_freq.insert('a', 25);
- chars_and_freq.insert('b', 14);
- chars_and_freq.insert('c', 5);
- let huff = Hufftree::new(chars_and_freq);
- let a_code = huff.get_character_code('a').unwrap();
- let b_code = huff.get_character_code('b').unwrap();
- let c_code = huff.get_character_code('c').unwrap();
- assert_eq!(a_code.to_string(), "1");
- assert_eq!(b_code.to_string(), "01");
- assert_eq!(c_code.to_string(), "00");
- let canonical = CanonicalHufftree::from_tree(huff);
- assert_eq!(
- canonical
- .characters_and_codes
- .get_by_left(&'a')
- .unwrap()
- .to_string(),
- "0"
- );
- assert_eq!(
- canonical
- .characters_and_codes
- .get_by_left(&'b')
- .unwrap()
- .to_string(),
- "10"
- );
- assert_eq!(
- canonical
- .characters_and_codes
- .get_by_left(&'c')
- .unwrap()
- .to_string(),
- "11"
- );
- }
- #[test]
- fn encode_and_decode_text() {
- let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
- chars_and_freq.insert('a', 25);
- chars_and_freq.insert('b', 14);
- chars_and_freq.insert('c', 5);
- let huff = Hufftree::new(chars_and_freq);
- let canonical = CanonicalHufftree::from_tree(huff);
- let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc");
- let encoded_text = canonical.encode_text(&input_text);
- let decoded_text = canonical.decode_text(encoded_text).unwrap();
- assert_eq!(input_text, decoded_text);
- }
- }
|