From 518310b00f25d4cea09d378553d1882989055a32 Mon Sep 17 00:00:00 2001 From: nick Date: Thu, 14 Mar 2024 09:53:12 -0400 Subject: [PATCH] tweaked CLI options, fixed -p behaviour --- src/args.rs | 72 ++++++++++++++----------- src/directory.rs | 135 +++++++++++++++++++++++++---------------------- src/main.rs | 12 ++--- 3 files changed, 115 insertions(+), 104 deletions(-) diff --git a/src/args.rs b/src/args.rs index 063c29c..fa1955b 100644 --- a/src/args.rs +++ b/src/args.rs @@ -12,12 +12,21 @@ use std::iter::once_with; #[derive(Parser, Debug, Clone)] pub struct Args { #[arg( - short, long, default_value_t = false, + short, long, help = "keep going if an error occurs", - long_help = "keep going if an error occurs (ex. unreadable subdirectories in a readable directory)" + long_help = "keep going if an error occurs, silencing them in the process", + default_value_t = false, )] persistant: bool, + #[arg( + short, long, + help = "suppress error messages", + long_help = "suppress error messages, but still quit if an error occurs", + default_value_t = false, + )] + quiet: bool, + #[arg( short, long, help = "minimize output", @@ -79,7 +88,7 @@ pub struct Args { #[arg( short='x', long = "exclude", help = "include in search, but exclude from printing", - long_help = "include in search, but exclude from printing. accepts glob syntax", + long_help = "include in search, but exclude from printing. accepts glob syntax. separate rules by comma", default_values_t = once_with(|| Pattern::new(".*").unwrap()), value_parser = parse_glob, value_delimiter = ',', @@ -90,6 +99,7 @@ pub struct Args { #[arg( short='X', long, help = "exclude from search and printing", + long_help = "exclude from search and printin. accepts glob syntax. separate rules by comma", value_parser = parse_glob, value_delimiter = ',', action = ArgAction::Append, @@ -114,25 +124,27 @@ pub struct Args { path: Vec, } impl Args { - /// utility method to chuck default values on the end + /// utility method to chuck default values on the end. /// it feels like I should be able to do this with /// clever `clap` macros but I don't know how - pub fn post_process(mut self) -> Self { - if self.base_two { - self.unit = Unit::Kibi; - } else if self.si { - self.unit = Unit::Kilo; - } - - if self.show_hidden { - self.exclude_print = Vec::new(); - } - - if self.path.is_empty() { - self.path = vec![ ".".to_owned() ]; - } + pub fn parse_and_process() -> Self { + let mut this = Self::parse(); - self + if this.base_two { + this.unit = Unit::Kibi; + } else if this.si { + this.unit = Unit::Kilo; + } + + if this.show_hidden { + this.exclude_print = Vec::new(); + } + + if this.path.is_empty() { + this.path = vec![ ".".to_owned() ]; + } + + this } pub fn should_exclude(&self, path: &Path, file: &Metadata) -> bool { @@ -149,16 +161,9 @@ impl Args { any_pattern_matches_any_component(&self.exclude_search, path) } - + pub fn should_print(&self, path: &Path) -> bool { ! any_pattern_matches_any_component(&self.exclude_print, path) - // TODO: this exists because when a file matches an exclude pattern - // is it still returned, just with no size or children, so in order - // to not accidentally print things that we said we were excluding, - // we also have to check that it's not excluded by search. - // `self.exclude_print.extend(&self.exclude_search)` is wasteful, - // but until I find a better way this is what it's gotta be` - && ! any_pattern_matches_any_component(&self.exclude_search, path) } pub const fn persistant(&self) -> bool { @@ -168,15 +173,15 @@ impl Args { pub const fn minimal(&self) -> bool { self.minimal } - + pub const fn tree(&self) -> bool { self.tree } - + pub const fn total(&self) -> bool { self.total } - + pub const fn unit(&self) -> Unit { self.unit } @@ -184,6 +189,10 @@ impl Args { pub fn iter(&self) -> Iter<'_, String> { self.path.iter() } + + pub fn quiet(&self) -> bool { + self.quiet + } } fn validate_path(s: &str) -> Result { @@ -210,7 +219,8 @@ fn any_pattern_matches_any_component(patterns: &[Pattern], path: &Path) -> bool let Some(s) = cmp.to_str() else { // this is a code smell // I don't believe it, but I can't think - // of anything worthwhile to do + // of anything worthwhile to do when + // you can't get a usable &str continue }; if pat.matches(s) { diff --git a/src/directory.rs b/src/directory.rs index bee7a52..b4d3e69 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -30,12 +30,11 @@ impl Directory { let path = path.as_ref(); // NOTE: I go back and forth on canonicalize()ing all the time. // I feel like it changes every commit. The performance loss seems - // to be negligible, even when I do crazy things like `hb -p /`, which - // is the most I can currently do. - let name = match (path.canonicalize(), args.persistant()) { - (Ok(path), _) => path, - (Err(_), true) => return Ok(None), - (Err(e), false) => return Err(e.into()), + // to be negligible, even when I do crazy things like `hb -p /` + let name = match path.canonicalize() { + Ok(path) => path, + Err(_) if args.persistant() => return Ok(None), + Err(e) => return Err(e.into()), } .file_name() .map_or_else(|| OsString::from("/"), ToOwned::to_owned) @@ -43,24 +42,17 @@ impl Directory { // symlink_metadata() is the same as metadata() but it doesn't // traverse symlinks, so that we can exclude them if necessary - let meta = match (path.symlink_metadata(), args.persistant()) { - (Ok(md), _) => md, - (Err(_), true) => return Ok(None), - (Err(e), false) => return Err(e.into()), + let meta = match path.symlink_metadata() { + Ok(md) => md, + Err(_) if args.persistant() => return Ok(None), + Err(e) => return Err(e.into()), }; if args.should_exclude(path, &meta) { - // Ok(None) is only meant to arise from an error - // while persistant. When that happens, a no-op - // entry is substituted, which is precisely - // we want to happen when we hit an excluded file. - - // NOTE: return Ok(None) is *not* equivalent, because - // that's only produced when an error occurs but - // the program is running in persistant mode. I used - // to return that, but that causes incredibly - // bizarre and wrong behaviour. - return Ok(Some( Self { name, size: 0, children: Vec::new() } )) + // finding a file to exclude is behaviourally + // equivalent to hitting an error in persistant + // mode: just continue + return Ok(None) } let dir = match read_dir(path) { @@ -85,41 +77,47 @@ impl Directory { // this is a compicated iterator pattern. I'll do my best to explain. // 1. the end result is that we `reduce()` the iterator to a single // (u64, Vec) tuple to return. this is done by... - let (size, children) = - // 2. taking the iterator over the directory and parallelising it... - dir.par_bridge() - // 3, this is the recursive step: try to create new Directory - // objects from each item in the iterator - .map(|entry| Self::new(entry?.path(), args)) - // 4. the fold (this is try_fold because we're iterating over Result.). - // each fold adds a directory as a child and increases the total size - .try_fold( - || (0, Vec::new()), - |(mut size, mut children), dir| -> Result<(u64, Vec)> { - let Some(dir) = Result::from(dir)? - else { - // some intermediate operation failed, but we - // are persistant, so just skip - return Result::Ok((0, Vec::new())) - }; - size += dir.size; + let (size, children) = match + // 2. taking the iterator over the directory and parallelising it... + dir.par_bridge() + // 3, this is the recursive step: try to create new Directory + // objects from each item in the iterator + .map(|entry| Self::new(entry?.path(), args)) + // 4. the fold (this is try_fold because we're iterating over Result.). + // each fold adds a directory as a child and increases the total size + .try_fold( + || (0, Vec::new()), + |(mut size, mut children), dir| -> Result<(u64, Vec)> { + let dir = match (dir, args.persistant()) { + (Ok(Some(d)), _) => d, + (Ok(None), _) | (Err(_), true) => return Result::Ok((size, children)), + (Err(e), false) => return Err(e), + }; + size += dir.size; + if args.should_print(dir.path()) { + // since size was increased, this just prevents + // the directory from appearing in printing children.push(dir); - // have to specify anyhow::Result::Ok otherwise it complains - // that it can't infer the E in Result - Result::Ok((size, children)) } - ) - // 5. the final step is to reduce, which is as simple as concatenating - // every vector and summing up their sizes. - .try_reduce( - || (0, Vec::new()), - |(asize, mut avec), (bsize, bvec)| { - avec.extend(bvec); - Result::Ok((asize + bsize, avec)) - } - )?; - // ^ note the Try, because of course any of these operations could - // fail + // have to specify anyhow::Result::Ok otherwise it complains + // that it can't infer the E in Result + Result::Ok((size, children)) + } + ) + // 5. the final step is to reduce, which is as simple as concatenating + // every vector and summing up their sizes. + .try_reduce( + || (0, Vec::new()), + |(asize, mut avec), (bsize, bvec)| { + avec.extend(bvec); + Result::Ok((asize + bsize, avec)) + } + ) { + // remember that this is a match statement? + Ok(tuple) => tuple, + Err(_) if args.persistant() => return Ok(None), + Err(e) => return Err(e), + }; // final notes: // 1. I am unsure if it is better to do a bunch of partial sums @@ -142,11 +140,11 @@ impl Directory { )) } - pub fn tree(self, args: &Args) -> String { + pub fn tree(self, unit: Unit) -> String { // since self.size is definitionally the greatest value, the tab length // is just the length of self.len, plus two for a tab width - let tab_size = self.size.to_string().len() + 2; - self.vectorise(args) + let tab_size = unit.convert(self.size).len() + 2; + self.vectorise(unit) .iter() .map(|e| e.stringify_tabbed(tab_size)) .reduce(|s1, s2| s1 + "\n" + &s2) @@ -155,26 +153,32 @@ impl Directory { /// TODO: make not recursive, take &self if possible, /// and maybe write directly to stdout to not use so much mem - fn vectorise(mut self, args: &Args) -> Vec { + fn vectorise(self, unit: Unit) -> Vec { let mut result = Vec::new(); result.push(TreeEntry::new( - self.name.display().to_string(), self.size, args.unit() + self.name.display().to_string(), self.size, unit )); let mut new_entry_part = TreePart::First; let mut continue_part = TreePart::Wait; - - self.children.retain(|dir| args.should_print(dir.path())); + let len = self.children.len(); + // this is the display algorithm. it's built on the variables + // `new_entry_part` and `continue_part`. for most times, when + // we introduce a new item (which happens every iteration of + // the loop), it is `first` tree part and we can pad with the + // `wait` part. the last element of each one should however + // be introduced with a `last` part, and padding should with + // `blank` for (idx, child) in self.children.into_iter().enumerate() { if idx+1 == len { new_entry_part = TreePart::Last; continue_part = TreePart::Blank; } - let subtree = child.vectorise(args); + let subtree = child.vectorise(unit); for mut item in subtree { if item.parts.is_empty() { @@ -203,14 +207,13 @@ impl TreeEntry { parts: Vec::new(), path, size, unit } } - + fn stringify_tabbed(&self, tab_size: usize) -> String { let mut result = format!("{: ExitCode { - let args = Args::parse().post_process(); - // dbg!(&args); + let args = Args::parse_and_process(); let mut total = 0; for path in args.iter() { @@ -23,10 +20,7 @@ fn main() -> ExitCode { // so we don't need a match guard Ok(None) => continue, Err(e) => { - if args.persistant() { - continue; - } - if !args.minimal() { + if !args.minimal() && !args.quiet() { eprintln!("hb: {e}"); } return ExitCode::FAILURE; @@ -41,7 +35,7 @@ fn main() -> ExitCode { } if args.tree() { - println!("{}", dir_structure.tree(&args)); + println!("{}", dir_structure.tree(args.unit())); } else { println!( "{}: {}",