diff --git a/Cargo.lock b/Cargo.lock index 22c4ba5..e2afafc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,26 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "big-int" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31375ce97b1316b3a92644c2cbc93fa9dcfba06e4aec9a440bce23397af82fd6" +dependencies = [ + "big-int-proc", + "thiserror", +] + +[[package]] +name = "big-int-proc" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73cfa06eb56d71f2bb1874b101a50c3ba29fcf3ff7dd8de274e473929459863b" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "clap" version = "4.5.4" @@ -145,6 +165,7 @@ name = "hb" version = "0.1.0" dependencies = [ "anyhow", + "big-int", "clap", "glob", "rayon", @@ -217,6 +238,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index ae95d75..4ba1b35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] anyhow = "1.0.81" +big-int = "7.0.0" clap = { version = "4.4.3", features = ["derive"] } glob = "0.3.1" rayon = "1.7.0" diff --git a/src/args.rs b/src/args.rs index 1801879..ba9d1c4 100644 --- a/src/args.rs +++ b/src/args.rs @@ -2,6 +2,7 @@ use clap::{Parser, ArgAction}; use glob::Pattern; use rayon::ThreadPoolBuilder; +use crate::fs_size::FsSize; use crate::unit::Unit; use std::fs::{symlink_metadata, Metadata}; @@ -70,6 +71,14 @@ pub struct Args { )] si: bool, + #[arg( + short, long, + help = "use heap-allocated integers", + long_help = "use heap-allocated integers that won't overflow. Reduces performance", + default_value_t = false, + )] + big_int: bool, + #[arg( short, long, help = "unit to print in", @@ -159,11 +168,11 @@ impl Args { self } - pub fn len(&self, meta: &Metadata) -> u64 { + pub fn len(&self, meta: &Metadata) -> N { match self.unit { Unit::Blocks => meta.blocks(), _ => meta.len(), - } + }.into() } pub fn should_exclude(&self, path: &Path, file: &Metadata) -> bool { @@ -212,6 +221,10 @@ impl Args { pub fn paths(&self) -> Iter<'_, String> { self.paths.iter() } + + pub fn big_int(&self) -> bool { + self.big_int + } } fn validate_path(s: &str) -> Result { diff --git a/src/directory.rs b/src/directory.rs index bd3d9c3..0878082 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -5,20 +5,24 @@ use std::io::ErrorKind; use anyhow::{Context, Result}; use crate::args::Args; +use crate::fs_size::FsSize; use crate::unit::Unit; use rayon::prelude::*; #[derive(Debug, Clone)] -pub struct Directory { +pub struct Directory { name: PathBuf, - size: u64, - children: Vec, + size: N, + children: Vec>, } -impl Directory { +impl Directory +where + N: FsSize +{ #[inline] - pub const fn size(&self) -> u64 { - self.size + pub const fn size(&self) -> &N { + &self.size } pub fn new< P: AsRef >(path: P, args: &Args) -> Result> { @@ -74,13 +78,13 @@ impl Directory { } }; - let fold_op = |(mut size, mut children), dir| -> Result<(u64, Vec)> { + let fold_op = |(mut size, mut children), dir| -> Result<(N, Vec)> { let dir: Self = match (dir, args.persistant()) { (Ok(Some(d)), _) => d, (Ok(None), _) | (Err(_), true) => return Ok((size, children)), (Err(e), false) => return Err(e), }; - size += dir.size; + size += dir.size.clone(); if args.tree() && args.should_print(&dir.name) { // since size was increased, this just prevents // the directory from appearing in printing @@ -89,10 +93,11 @@ impl Directory { Ok((size, children)) }; - let reduce_op = |(asize, mut avec): (u64, Vec), (bsize, bvec)| { + let reduce_op = |(mut asize, mut avec): (N, Vec), (bsize, bvec)| { if args.tree() { avec.extend(bvec); } + asize += bsize; - Ok((asize + bsize, avec)) + Ok((asize, avec)) }; // this is a compicated iterator pattern. I'll do my best to explain. @@ -106,10 +111,10 @@ impl Directory { .map(|entry| Self::new(entry?.path(), args)) // 4. the fold (this is try_fold because we're iterating over Result.). // each fold adds a directory as a child and increases the total size - .try_fold(|| (0, Vec::new()), fold_op) + .try_fold(|| (N::zero(), Vec::new()), fold_op) // 5. the final step is to reduce, which is as simple as concatenating // every vector and summing up their sizes. - .try_reduce(|| (0, Vec::new()), reduce_op) { + .try_reduce(|| (N::zero(), Vec::new()), reduce_op) { // remember that this is a match statement? Ok(tuple) => tuple, Err(_) if args.persistant() => return Ok(None), @@ -140,7 +145,7 @@ impl Directory { pub fn tree(self, unit: Unit) -> String { // since self.size is definitionally the greatest value, the tab length // is just the length of self.len, plus two for a tab width - let tab_size = unit.convert_with_units(self.size).len() + 2; + let tab_size = unit.convert_with_units(&self.size).len() + 2; self.vectorise(unit) .iter() .map(|e| e.stringify_tabbed(tab_size)) @@ -149,7 +154,7 @@ impl Directory { } /// TODO: maybe write directly to stdout to not use so much mem - fn vectorise(&self, unit: Unit) -> Vec { + fn vectorise(self, unit: Unit) -> Vec> { let mut result = Vec::new(); result.push(TreeEntry::new( @@ -168,7 +173,7 @@ impl Directory { // `wait` part. the last element of each one should however // be introduced with a `last` part, and padding should with // `blank` - for (idx, child) in self.children.iter().enumerate() { + for (idx, child) in self.children.into_iter().enumerate() { if idx+1 == len { new_entry_part = TreePart::Last; continue_part = TreePart::Blank; @@ -191,21 +196,27 @@ impl Directory { } #[derive(Debug)] -struct TreeEntry { +struct TreeEntry { parts: Vec, path: String, - size: u64, + size: N, unit: Unit } -impl TreeEntry { - fn new(path: String, size: u64, unit: Unit) -> Self { +impl TreeEntry +where + N: FsSize +{ + fn new(path: String, size: N, unit: Unit) -> Self { Self { parts: Vec::new(), path, size, unit } } fn stringify_tabbed(&self, tab_size: usize) -> String { - let mut result = format!("{: + // used in the fold and reduce steps + + AddAssign + // must be printable + + Display + // rayon + + Send + Sync + + Clone +{ + fn zero() -> Self; + + fn divide(self, rhs: u64) -> Self; +} + +impl FsSize for Tight<10> { + fn zero() -> Self { + >::zero() + } + + fn divide(self, rhs: u64) -> Self { + self/Self::from(rhs) + } +} + +impl FsSize for u64 { + fn zero() -> Self { + 0 + } + + fn divide(self, rhs: u64) -> Self { + self/rhs + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 3ce53e7..036855b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,20 +3,35 @@ mod args; mod directory; mod unit; +mod fs_size; +use big_int::tight::Tight; use clap::Parser; use args::Args; use directory::Directory; +use fs_size::FsSize; use std::process::ExitCode; fn main() -> ExitCode { let args = Args::parse().post_process(); - let mut total = 0; + if args.big_int() { + parse_paths::>(args) + } else { + parse_paths::(args) + } +} + +fn parse_paths(args: Args) -> ExitCode +where + N: FsSize +{ + let mut total = N::zero(); + for path in args.paths() { - let dir_structure = match Directory::new(path, &args) { + let dir_structure = match Directory::::new(path, &args) { Ok(Some(ds)) => ds, // this only ever returns None when persistant, // so we don't need a match guard @@ -29,7 +44,7 @@ fn main() -> ExitCode { } }; - total += dir_structure.size(); + total += dir_structure.size().clone(); if args.minimal() { // skip printing (this is a matter of indentation) @@ -48,11 +63,11 @@ fn main() -> ExitCode { } if args.total() { - let total = args.unit().convert_with_units(total); + let total = args.unit().convert_with_units(&total); println!("total: {total}"); } else if args.minimal() { - let total = args.unit().convert(total); + let total = args.unit().convert(&total); print!("{total}"); } diff --git a/src/unit.rs b/src/unit.rs index 7a839dc..ee6ecf6 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -1,5 +1,7 @@ use std::fmt::Display; +use crate::fs_size::FsSize; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Unit { Byte, @@ -38,12 +40,13 @@ impl Unit { } } - pub fn convert(self, n: u64) -> String { - format!("{}", n/self.integer_value()) + pub fn convert(self, n: &impl FsSize) -> String { + format!("{}", n.clone().divide(self.integer_value())) } - - pub fn convert_with_units(self, n: u64) -> String { + + pub fn convert_with_units(self, n: &impl FsSize) -> String { self.convert(n) + self.units_pretty() + // todo!() } /// Units to print for each different unit.