diff --git a/src/args.rs b/src/args.rs new file mode 100644 index 0000000..c8023b7 --- /dev/null +++ b/src/args.rs @@ -0,0 +1,204 @@ +use clap::{Parser, ArgAction}; +use glob::Pattern; + +use crate::unit::Unit; + +use std::fs::Metadata; +use std::path::{Component, Path}; +use std::slice::Iter; +use std::iter::once_with; + +#[allow(clippy::struct_excessive_bools)] +#[derive(Parser, Debug, Clone)] +pub struct Args { + #[arg( + short, long, default_value_t = false, + help = "keep going if an error occurs", + long_help = "keep going if an error occurs (ex. unreadable subdirectories in a readable directory)" + )] + persistant: bool, + + #[arg( + short, long, + help = "minimize output", + long_help = "print nothing but the total size for all directories, without a newline. Also supresses all error messages", + conflicts_with = "total", + default_value_t = false, + )] + minimal: bool, + + #[arg( + short='T', long, + help = "display in tree", + default_value_t = false, + conflicts_with = "minimal", + )] + tree: bool, + + #[arg( + short, long, + help = "display the total size", + conflicts_with = "minimal", + default_value_t = false, + )] + total: bool, + + #[arg( + short='2', long, + help = "alias for --unit 1024", + default_value_t = false, + conflicts_with_all = ["si","unit"], + )] + base_two: bool, + + #[arg( + short='k', long, + help = "alias for --unit 1000", + default_value_t = false, + conflicts_with_all = ["base_two","unit"], + )] + si: bool, + + #[arg( + short, long, + help = "unit to print in", + long_help = "printing unit (case insensitive): b = bytes, kb = kilobytes, ki = kibibytes, gb = gigabytes, gi = gibibytes, tb = terabytes, ti = tibibytes", + value_parser = Unit::parse, + default_value_t = Unit::Byte, + conflicts_with_all = ["base_two","si"], + )] + unit: Unit, + + #[arg( + short='s', long, + help = "follow symlinks", + default_value_t = false, + )] + follow_links: bool, + + #[arg( + short='x', long = "exclude", + help = "include in search, but exclude from printing", + long_help = "include in search, but exclude from printing. accepts glob syntax", + default_values_t = once_with(|| Pattern::new(".*").unwrap()), + value_parser = parse_glob, + value_delimiter = ',', + action = ArgAction::Append, + )] + exclude_print: Vec, + + #[arg( + short='X', long, + help = "exclude from search and printing", + default_values_t = once_with(|| Pattern::new("").unwrap()), + value_parser = parse_glob, + value_delimiter = ',', + action = ArgAction::Append, + )] + exclude_search: Vec, + + #[arg( + short='H', long, + help = "disable implicit hiding of results", + long_help = "don't implicitly hide dotfiles and dot directories", + conflicts_with = "exclude_print", + default_value_t = false, + )] + show_hidden: bool, + + #[arg( + value_parser = validate_path, + help = "items to summate", + action = ArgAction::Append, + num_args = 1.. + )] + path: Vec, +} +impl Args { + pub fn post_process(mut self) -> Self { + if self.base_two { + self.unit = Unit::Kibi; + } else if self.si { + self.unit = Unit::Kilo; + } + + if self.show_hidden { + self.exclude_print = Vec::new(); + } + + if self.path.is_empty() { + self.path = vec![ ".".to_owned() ]; + } + + self + } + + pub fn should_exclude(&self, path: &Path, file: &Metadata) -> bool { + if !self.follow_links && file.is_symlink() { + return true + } + + any_pattern_matches_any_component(&self.exclude_search, path) + } + + pub fn should_print(&self, path: &Path) -> bool { + ! any_pattern_matches_any_component(&self.exclude_print, path) + // TODO: this exists because when a file matches an exclude pattern + // is it still returned, just with no size or children, so in order + // to not accidentally print things that we said we were excluding, + // we also have to check that it's not excluded by search. + // `self.exclude_print.extend(&self.exclude_search)` is wasteful, + // but until I find a better way this is what it's gotta be` + && ! any_pattern_matches_any_component(&self.exclude_search, path) + } + + pub const fn persistant(&self) -> bool { + self.persistant + } + + pub const fn minimal(&self) -> bool { + self.minimal + } + + pub const fn tree(&self) -> bool { + self.tree + } + + pub const fn total(&self) -> bool { + self.total + } + + pub const fn unit(&self) -> Unit { + self.unit + } + + pub fn iter(&self) -> Iter<'_, String> { + self.path.iter() + } +} + +fn validate_path(s: &str) -> Result { + // try to access it's metadata, since that is what is used + // to get its length + std::fs::metadata(s) + .map(|_| s.to_string()) + .map_err(|e| e.to_string()) +} + +fn parse_glob(s: &str) -> Result { + Pattern::new(s).map_err(|_| format!("invalid glob: {s}")) +} + +fn any_pattern_matches_any_component(patterns: &[Pattern], path: &Path) -> bool { + for pat in patterns { + for cmp in path.components() { + let Component::Normal(cmp) = cmp else { continue }; + let Some(s) = cmp.to_str() else { continue }; + if pat.matches(s) { + return true + } + } + } + + false +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 60182e8..261ccaa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,107 +1,32 @@ #![feature(io_error_more, fs_try_exists)] -use clap::{Parser, ArgAction}; - +mod args; mod directory; mod unit; + +use args::Args; use directory::Directory; -use unit::Unit; + +use clap::Parser; use std::process::ExitCode; -#[allow(clippy::struct_excessive_bools)] -#[derive(Parser, Debug, Clone)] -pub struct Args { - #[arg( - short, long, default_value_t = false, - help = "keep going if an error occurs", - long_help = "keep going if an error occurs (ex. unreadable subdirectories in a readable directory)" - )] - persistant: bool, - - #[arg( - short, long, - help = "minimize output", - long_help = "print nothing but the total size for all directories, without a newline. Also supresses all error messages", - conflicts_with = "total", - default_value_t = false, - )] - minimal: bool, - - #[arg( - short='T', long, - help = "display in tree", - default_value_t = false, - conflicts_with = "minimal", - )] - tree: bool, - - #[arg( - short, long, - help = "display the total size", - conflicts_with = "minimal", - default_value_t = false, - )] - total: bool, - - #[arg( - short='2', long, - help = "alias for --unit 1024", - default_value_t = false, - conflicts_with_all = ["si","unit"], - )] - base_two: bool, - - #[arg( - short='0', long, - help = "alias for --unit 1000", - default_value_t = false, - conflicts_with_all = ["base_two","unit"], - )] - si: bool, - - #[arg( - short, long, - help = "unit to print in", - long_help = "printing unit (case insensitive): b = bytes, kb = kilobytes, ki = kibibytes, gb = gigabytes, gi = gibibytes, tb = terabytes, ti = tibibytes", - value_parser = Unit::parse, - default_value_t = Unit::Byte, - conflicts_with_all = ["base_two","si"], - )] - unit: Unit, - - #[arg( - value_parser = validate_path, - help = "items to summate", - action = ArgAction::Append, - num_args = 1.. - )] - path: Vec, -} - -fn validate_path(s: &str) -> Result { - // try to access it's metadata, since that is what is used - // to get its length - std::fs::metadata(s) - .map(|_| s.to_string()) - .map_err(|e| e.to_string()) -} - fn main() -> ExitCode { - let mut args = Args::parse(); + let args = Args::parse().post_process(); + // dbg!(&args); - if args.base_two { - args.unit = Unit::Kibi; - } else if args.si { - args.unit = Unit::Kilo; - } - let mut total = 0; - for path in args.path { - let dir_structure = match Directory::new(path) { - Ok(ds) => ds, + for path in args.iter() { + let dir_structure = match Directory::new(path, &args) { + Ok(Some(ds)) => ds, + // this only ever returns None when persistant, + // so we don't need a match guard + Ok(None) => continue, Err(e) => { - if !args.minimal { + if args.persistant() { + continue; + } + if !args.minimal() { eprintln!("hb: {e}"); } return ExitCode::FAILURE; @@ -110,28 +35,28 @@ fn main() -> ExitCode { total += dir_structure.size(); - if args.minimal { + if args.minimal() { // skip printing (this is a matter of indentation) continue; } - if args.tree { - println!("{}", dir_structure.display(args.unit)); + if args.tree() { + println!("{}", dir_structure.tree(&args)); } else { println!( "{}: {}", dir_structure.path().display(), - dir_structure.scale(args.unit), + args.unit().convert(dir_structure.size()) ); } } - let total = args.unit.convert(total); + let total = args.unit().convert(total); - if args.total { + if args.total() { println!("total: {total}"); } - else if args.minimal { + else if args.minimal() { print!("{total}"); } diff --git a/src/unit.rs b/src/unit.rs index a1393f8..6ee6c61 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -15,12 +15,14 @@ pub enum Unit { Tera, Tibi, + + Blocks, } impl Unit { pub fn parse(s: &str) -> Result { let s = s.to_lowercase(); match s.as_str() { - "b" => Ok(Self::Byte), + "b" | "bytes" => Ok(Self::Byte), "k" | "kb" => Ok(Self::Kilo), "ki" => Ok(Self::Kibi), "m" | "mb" => Ok(Self::Mega), @@ -29,11 +31,19 @@ impl Unit { "gi" => Ok(Self::Gibi), "t" | "tb" => Ok(Self::Tera), "ti" => Ok(Self::Tibi), + "blk" | "blks" + | "blck" |"blcks" + | "block" | "blocks" => Ok(Self::Blocks), _ => Err(s), } } pub fn convert(self, n: u64) -> String { + let n = if self == Self::Blocks { + n.next_multiple_of(self.integer_value()) + } else { + n + }; format!("{}{}", n/self.integer_value(), self.units_pretty()) } @@ -48,6 +58,7 @@ impl Unit { Self::Gibi => " Gi", Self::Tera => " T", Self::Tibi => " Ti", + Self::Blocks => " blocks" } } @@ -62,13 +73,14 @@ impl Unit { Self::Gibi => 1_073_741_824, Self::Tera => 1_000_000_000_000, Self::Tibi => 1_099_511_627_776, + Self::Blocks => 512, } } } impl Display for Unit { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let s = match self { - Self::Byte => "b", + Self::Byte => "bytes", Self::Kilo => "K", Self::Kibi => "Ki", Self::Mega => "M", @@ -77,6 +89,7 @@ impl Display for Unit { Self::Gibi => "Gi", Self::Tera => "T", Self::Tibi => "Ti", + Self::Blocks => "blk" }; f.write_str(s)