瀏覽代碼

Came up with basic version of file storage.

Not tested!
AvariceLHubris 1 年之前
父節點
當前提交
8812049de2
共有 1 個文件被更改,包括 39 次插入8 次删除
  1. 39 8
      src/storage.rs

+ 39 - 8
src/storage.rs

@@ -4,7 +4,24 @@ use bit_vec::BitVec;
 use std::io::Read;
 use std::io::Write;
 
-pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, text: &String) -> Result<(), String> {
+/* So what is our file format going to look like?
+ * This is a toy project, but its usage should be real. I should be able to use
+ * this to compress actual text files (hence the decision of using Rust chars)
+ * with Unicode.
+ *
+ * Here is the idea:
+ *
+ * 4 bytes     -> Length of the rest of file in bits.
+ * n * 8 bytes -> CanonicalHufftree stored (8 bytes: (4)chars with (4)code_length)
+ * 4 bytes     -> Ones to mark as delimiter.
+ * m bytes     -> Compressed data, read only up to the bit specified earlier.
+ *
+ */
+pub fn store_tree_and_text<F: Write>(
+    tree: CanonicalHufftree,
+    writer: &mut F,
+    text: &String,
+) -> Result<(), String> {
     let mut buff = BitVec::new();
     let mut character_buff: [u8; 4] = [0; 4];
     let mut bit_length: u32 = 0;
@@ -15,9 +32,6 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
         buff.append(&mut BitVec::from_bytes(&code_length));
         bit_length += 32;
 
-        buff.append(&mut BitVec::from_elem(8, false));
-        bit_length += 8;
-
         character.encode_utf8(&mut character_buff);
         buff.append(&mut BitVec::from_bytes(&character_buff));
         bit_length += 32;
@@ -25,9 +39,12 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
         character_buff.fill(0);
     }
 
+    buff.append(&mut BitVec::from_elem(32, true));
+    bit_length += 32;
+
     let encoded_text = tree.encode_text(&text);
 
-    let text_bits:u32 = encoded_text.len().try_into().unwrap();
+    let text_bits: u32 = encoded_text.len().try_into().unwrap();
 
     println!("Bit length: {}, Text bits: {}.", bit_length, text_bits);
     bit_length += text_bits;
@@ -45,8 +62,8 @@ pub fn store_tree_and_text<F: Write>(tree: CanonicalHufftree, writer: &mut F, te
 
 #[cfg(test)]
 mod test {
-    use std::collections::HashMap;
     use crate::hufftree::base::Hufftree;
+    use std::collections::HashMap;
 
     use super::*;
 
@@ -60,12 +77,26 @@ mod test {
         let huff = Hufftree::new(chars_and_freq);
         let canonical = CanonicalHufftree::from_tree(huff);
 
-        let input_text = String::from("aaabacacaaaabbbbbbbccccccccccccaacc");
+        let input_text = String::from("aaabbc");
 
         let mut virtual_buffer = Vec::new();
         store_tree_and_text(canonical, &mut virtual_buffer, &input_text).unwrap();
 
         println!("Buffer:{:?}", virtual_buffer);
-
+        assert_eq!(&virtual_buffer[0..4],
+            &[0,0,0,233]); // Length of tree + encoded text.
+                           // 0,0,0,1, //  Code length of 'a'
+                           // 97,0,0,0, // 'a'
+                           // 0,0,0,2, //  Code length of 'b'
+                           // 98,0,0,0, // 'b'
+                           // 0,0,0,2, //  Code length of 'c'
+                           // 99,0,0,0, // 'c'
+                           // 255,255,255,255, // Delimiter
+                           // 21, 128, // Encoded text.
+                           // ]
+        let size = virtual_buffer.len();
+        assert_eq!(&virtual_buffer[(size - 4)..size],
+            &[255,255,21,128]); 
+        // )
     }
 }