|
|
@@ -5,6 +5,7 @@ use bit_vec::BitVec;
|
|
|
#[derive(Debug)]
|
|
|
pub struct CanonicalHufftree {
|
|
|
characters_and_codes: BiMap<char, BitVec>,
|
|
|
+ storage_char_codes: Vec<(char, u32)>,
|
|
|
}
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
@@ -16,7 +17,8 @@ struct CharTempCode {
|
|
|
|
|
|
impl Ord for CharTempCode {
|
|
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
|
- self.code_length.cmp(&other.code_length)
|
|
|
+ self.code_length
|
|
|
+ .cmp(&other.code_length)
|
|
|
.then(other.code.cmp(&self.code))
|
|
|
}
|
|
|
}
|
|
|
@@ -40,6 +42,7 @@ impl CanonicalHufftree {
|
|
|
let characters = base_tree.get_characters();
|
|
|
let mut character_and_codes = Vec::new();
|
|
|
let mut output_characters_and_codes: BiMap<char, BitVec> = BiMap::new();
|
|
|
+ let mut storage_char_codes = Vec::new();
|
|
|
|
|
|
for character in characters {
|
|
|
let code = base_tree.get_character_code(*character).unwrap();
|
|
|
@@ -62,6 +65,10 @@ impl CanonicalHufftree {
|
|
|
let mut working_code: u32 = 0b0;
|
|
|
while character_and_codes.len() > 0 {
|
|
|
let temp_char = character_and_codes.pop().unwrap();
|
|
|
+
|
|
|
+ // This will results in the vector being ordered with the most frequent
|
|
|
+ // character first, ordered by code length.
|
|
|
+ storage_char_codes.push((temp_char.character, temp_char.code_length as u32));
|
|
|
if first {
|
|
|
let mut code = BitVec::new();
|
|
|
code.grow(temp_char.code_length, false);
|
|
|
@@ -93,12 +100,57 @@ impl CanonicalHufftree {
|
|
|
|
|
|
CanonicalHufftree {
|
|
|
characters_and_codes: output_characters_and_codes,
|
|
|
+ storage_char_codes,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- pub fn from_bimap(bi: BiMap<char, u32>) -> Self {
|
|
|
+ pub fn from_vec(storage: Vec<(char, u32)>) -> Self {
|
|
|
+ // Assume that vec is ordered, first by code length,
|
|
|
+ // then by character frequency, as this is the way
|
|
|
+ // the file should have been encoded.
|
|
|
+ // This means that it must first be reversed.
|
|
|
+ let mut temp_storage: Vec<(char, u32)> = storage.into_iter().rev().collect();
|
|
|
+
|
|
|
+ let mut bi = BiMap::new();
|
|
|
+ let mut first = true;
|
|
|
+ let mut prev_length = 0;
|
|
|
+ let mut working_code: u32 = 0b0;
|
|
|
+ while temp_storage.len() > 0 {
|
|
|
+ let (temp_char, code_length) = temp_storage.pop().unwrap();
|
|
|
+
|
|
|
+ // This will result in the vector being ordered with the most frequent
|
|
|
+ // character first, ordered by code length.
|
|
|
+ if first {
|
|
|
+ let mut code = BitVec::new();
|
|
|
+ code.grow(code_length as usize, false);
|
|
|
+ prev_length = code_length;
|
|
|
+
|
|
|
+ bi.insert(temp_char, code);
|
|
|
+ first = false;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if code_length > prev_length {
|
|
|
+ working_code += 1;
|
|
|
+ working_code = working_code << (code_length - prev_length);
|
|
|
+
|
|
|
+ bi.insert(temp_char, convert_no_to_bit_vec(working_code));
|
|
|
+ } else {
|
|
|
+ assert_eq!(
|
|
|
+ code_length, prev_length,
|
|
|
+ "Something went really wrong if we got here."
|
|
|
+ );
|
|
|
+ working_code += 1;
|
|
|
+ bi.insert(temp_char, convert_no_to_bit_vec(working_code));
|
|
|
+ }
|
|
|
+
|
|
|
+ prev_length = code_length;
|
|
|
+ }
|
|
|
+
|
|
|
CanonicalHufftree {
|
|
|
- characters_and_codes: bi
|
|
|
+ characters_and_codes: bi,
|
|
|
+ // Will be empty.
|
|
|
+ storage_char_codes: temp_storage,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -140,8 +192,12 @@ impl CanonicalHufftree {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
|
|
|
- self.characters_and_codes.clone()
|
|
|
+ // pub fn get_character_codes(&self) -> BiMap<char, BitVec> {
|
|
|
+ // self.characters_and_codes.clone()
|
|
|
+ // }
|
|
|
+
|
|
|
+ pub fn get_character_codes_for_storage(&self) -> Vec<(char, u32)> {
|
|
|
+ self.storage_char_codes.clone()
|
|
|
}
|
|
|
}
|
|
|
|