canonical.rs 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. use crate::hufftree::base::Hufftree;
  2. use bimap::BiMap;
  3. use bit_vec::BitVec;
  4. #[derive(Debug)]
  5. pub struct CanonicalHufftree {
  6. characters_and_codes: BiMap<char, BitVec>,
  7. }
  8. #[derive(PartialEq, PartialOrd, Eq, Debug)]
  9. struct CharTempCode {
  10. character: char,
  11. code: BitVec,
  12. code_length: usize,
  13. }
  14. impl Ord for CharTempCode {
  15. fn cmp(&self, other: &Self) -> std::cmp::Ordering {
  16. return self.code_length.cmp(&other.code_length);
  17. }
  18. }
  19. impl CanonicalHufftree {
  20. pub fn from_tree(base_tree: Hufftree) -> Self {
  21. let characters = base_tree.get_characters();
  22. let mut character_and_codes = Vec::new();
  23. let mut output_characters_and_codes: BiMap<char, BitVec> = BiMap::new();
  24. for character in characters {
  25. let code = base_tree.get_character_code(*character).unwrap();
  26. let length = code.len();
  27. character_and_codes.push(CharTempCode {
  28. character: *character,
  29. code,
  30. code_length: length,
  31. });
  32. }
  33. character_and_codes.sort();
  34. let mut character_and_codes: Vec<CharTempCode> =
  35. character_and_codes.into_iter().rev().collect();
  36. let mut first = true;
  37. let mut prev_length = 0;
  38. let mut working_code: u32 = 0b0;
  39. while character_and_codes.len() > 0 {
  40. let temp_char = character_and_codes.pop().unwrap();
  41. if first {
  42. let mut code = BitVec::new();
  43. code.grow(temp_char.code_length, false);
  44. prev_length = temp_char.code_length;
  45. output_characters_and_codes.insert(temp_char.character, code);
  46. first = false;
  47. continue;
  48. }
  49. if temp_char.code_length > prev_length {
  50. working_code += 1;
  51. working_code = working_code << (temp_char.code_length - prev_length);
  52. output_characters_and_codes
  53. .insert(temp_char.character, convert_no_to_bit_vec(working_code));
  54. } else {
  55. assert_eq!(
  56. temp_char.code_length, prev_length,
  57. "Something went really wrong if we got here."
  58. );
  59. working_code += 1;
  60. output_characters_and_codes
  61. .insert(temp_char.character, convert_no_to_bit_vec(working_code));
  62. }
  63. prev_length = temp_char.code_length;
  64. }
  65. CanonicalHufftree {
  66. characters_and_codes: output_characters_and_codes,
  67. }
  68. }
  69. // TODO: Optimise this (the vector copying is probably extremely inefficient)
  70. pub fn encode_text(&self, text: &String) -> BitVec {
  71. let mut converted_text = BitVec::new();
  72. for character in text.chars() {
  73. let temp_code = self.characters_and_codes.get_by_left(&character).unwrap();
  74. let mut temp_code = temp_code.clone();
  75. converted_text.append(&mut temp_code);
  76. }
  77. converted_text
  78. }
  79. pub fn decode_text(&self, text: BitVec) -> Result<String, &str> {
  80. let mut decoded_text = String::new();
  81. // So that popping bits removes them from the "start" of the text.
  82. let mut text: BitVec = text.iter().rev().collect();
  83. let mut buffer = BitVec::new();
  84. while text.len() > 0 {
  85. buffer.push(text.pop().unwrap());
  86. match self.characters_and_codes.get_by_right(&buffer) {
  87. Some(character) => {
  88. decoded_text.push(*character);
  89. buffer.truncate(0);
  90. }
  91. None => continue,
  92. }
  93. }
  94. if !buffer.is_empty() {
  95. Err("Text was not decoded properly (trailing bits).")
  96. } else {
  97. Ok(decoded_text)
  98. }
  99. }
  100. pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
  101. self.characters_and_codes.clone()
  102. }
  103. }
  104. pub fn convert_no_to_bit_vec(mut numb: u32) -> BitVec {
  105. let mut output_vec = BitVec::new();
  106. while numb > 0 {
  107. if numb % 2 == 0 {
  108. output_vec.push(false);
  109. } else {
  110. output_vec.push(true);
  111. }
  112. numb = numb / 2;
  113. }
  114. let output_vec = output_vec.iter().rev().collect();
  115. output_vec
  116. }
  117. #[cfg(test)]
  118. mod canonical_tests {
  119. use std::collections::HashMap;
  120. use super::*;
  121. #[test]
  122. fn correct_conversion_of_number() {
  123. let numb = 0b111001;
  124. let result = convert_no_to_bit_vec(numb);
  125. assert!(result.eq_vec(&[true, true, true, false, false, true]));
  126. }
  127. #[test]
  128. fn create_correct_canonical_codes() {
  129. let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
  130. chars_and_freq.insert('a', 25);
  131. chars_and_freq.insert('b', 14);
  132. chars_and_freq.insert('c', 5);
  133. let huff = Hufftree::new(chars_and_freq);
  134. let a_code = huff.get_character_code('a').unwrap();
  135. let b_code = huff.get_character_code('b').unwrap();
  136. let c_code = huff.get_character_code('c').unwrap();
  137. assert_eq!(a_code.to_string(), "1");
  138. assert_eq!(b_code.to_string(), "01");
  139. assert_eq!(c_code.to_string(), "00");
  140. let canonical = CanonicalHufftree::from_tree(huff);
  141. assert_eq!(
  142. canonical
  143. .characters_and_codes
  144. .get_by_left(&'a')
  145. .unwrap()
  146. .to_string(),
  147. "0"
  148. );
  149. assert_eq!(
  150. canonical
  151. .characters_and_codes
  152. .get_by_left(&'b')
  153. .unwrap()
  154. .to_string(),
  155. "10"
  156. );
  157. assert_eq!(
  158. canonical
  159. .characters_and_codes
  160. .get_by_left(&'c')
  161. .unwrap()
  162. .to_string(),
  163. "11"
  164. );
  165. }
  166. #[test]
  167. fn encode_and_decode_text() {
  168. let mut chars_and_freq: HashMap<char, i32> = HashMap::new();
  169. chars_and_freq.insert('a', 25);
  170. chars_and_freq.insert('b', 14);
  171. chars_and_freq.insert('c', 5);
  172. let huff = Hufftree::new(chars_and_freq);
  173. let canonical = CanonicalHufftree::from_tree(huff);
  174. let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc");
  175. let encoded_text = canonical.encode_text(&input_text);
  176. let decoded_text = canonical.decode_text(encoded_text).unwrap();
  177. assert_eq!(input_text, decoded_text);
  178. }
  179. }