main.rs 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. use std::io::{self, Read, Write};
  2. use std::path::Path;
  3. use std::path::PathBuf;
  4. use anyhow::{Context, anyhow};
  5. use clap::Parser;
  6. use huffman::{cli, hufftree, storage};
  7. fn main() -> Result<(), anyhow::Error> {
  8. let args = cli::Args::parse();
  9. let inputf = args.input_file;
  10. let outputf = args.output_file;
  11. let mode = args.mode;
  12. let is_stdin = inputf == Path::new("-");
  13. // Read all input into memory upfront so we know its size and can inspect content.
  14. let input_bytes: Vec<u8> = if is_stdin {
  15. let mut buf = Vec::new();
  16. io::stdin()
  17. .read_to_end(&mut buf)
  18. .context("Could not read from stdin.")?;
  19. buf
  20. } else {
  21. if !inputf.exists() {
  22. return Err(anyhow!("Input file did not exist."));
  23. }
  24. std::fs::read(&inputf).context("Could not read input file.")?
  25. };
  26. let in_size = input_bytes.len() as u64;
  27. let mode = match mode {
  28. Some(m) => m,
  29. None => {
  30. if is_stdin {
  31. // No filename to inspect — infer from content validity.
  32. if std::str::from_utf8(&input_bytes).is_ok() {
  33. cli::Mode::C
  34. } else {
  35. cli::Mode::X
  36. }
  37. } else {
  38. determine_mode(&inputf, outputf.as_ref())
  39. }
  40. }
  41. };
  42. // None means write to stdout.
  43. let output_path: Option<PathBuf> = if is_stdin && outputf.is_none() {
  44. None
  45. } else {
  46. Some(match outputf {
  47. Some(p) => p,
  48. None => match mode {
  49. cli::Mode::X => {
  50. if let Some(ext) = inputf.extension()
  51. && ext.eq("z")
  52. {
  53. inputf.with_extension("")
  54. } else {
  55. inputf.with_extension("unhuffed")
  56. }
  57. }
  58. cli::Mode::C => match inputf.extension() {
  59. Some(ext) => {
  60. let ext = ext
  61. .to_str()
  62. .ok_or(anyhow!("Input file path was not valid unicode."))?;
  63. inputf.with_extension(ext.to_string() + ".z")
  64. }
  65. None => inputf.with_extension("z"),
  66. },
  67. },
  68. })
  69. };
  70. // When the output is stdout, status messages go to stderr to avoid corrupting binary output.
  71. macro_rules! status {
  72. ($($arg:tt)*) => {
  73. if output_path.is_none() {
  74. eprintln!($($arg)*);
  75. } else {
  76. println!($($arg)*);
  77. }
  78. };
  79. }
  80. let mut writer: Box<dyn Write> = match output_path {
  81. Some(ref p) => {
  82. Box::new(std::fs::File::create(p).context("Could not create output file.")?)
  83. }
  84. None => Box::new(io::stdout()),
  85. };
  86. status!("Read: {} bytes.", in_size);
  87. match mode {
  88. cli::Mode::X => {
  89. status!("Decoding text...");
  90. let decoded_text = huffman::storage::read_tree_and_text(&mut &input_bytes[..])?;
  91. status!("Decoded!");
  92. writer
  93. .write_all(decoded_text.as_bytes())
  94. .context("Could not write decoded text to output.")?;
  95. let out_size = decoded_text.len() as u64;
  96. status!("Stored: {} bytes.", out_size);
  97. let (compressed, original) = (in_size, out_size);
  98. status!("Compression Ratio: {:.2}.", compressed as f64 / original as f64);
  99. }
  100. cli::Mode::C => {
  101. let input_text =
  102. String::from_utf8(input_bytes).context("Input is not valid UTF-8.")?;
  103. status!("Encoding text...");
  104. let char_f = huffman::hufftree::base::get_char_frequencies(&input_text);
  105. let base_tree = huffman::hufftree::base::Hufftree::new(char_f);
  106. let canonical_tree = hufftree::canonical::CanonicalHufftree::from_tree(base_tree);
  107. // Buffer encoded output so we can report its size before writing.
  108. let mut out_buf: Vec<u8> = Vec::new();
  109. storage::store_tree_and_text(canonical_tree, &mut out_buf, &input_text)
  110. .expect("Could not store the tree and text.");
  111. let out_size = out_buf.len() as u64;
  112. writer
  113. .write_all(&out_buf)
  114. .context("Could not write encoded data to output.")?;
  115. status!("Encoded!");
  116. status!("Stored: {} bytes.", out_size);
  117. let (compressed, original) = (out_size, in_size);
  118. status!("Compression Ratio: {:.2}.", compressed as f64 / original as f64);
  119. }
  120. }
  121. Ok(())
  122. }
  123. fn determine_mode(inputf: &Path, _outputf: Option<&PathBuf>) -> cli::Mode {
  124. // If '.z' at end of inputf -> Decompress.
  125. if let Some(extension) = inputf.extension()
  126. && extension.eq("z")
  127. {
  128. cli::Mode::X
  129. } else {
  130. // Otherwise compress
  131. cli::Mode::C
  132. }
  133. }