Added support for big integers with -b

main
nick 2024-07-22 00:30:45 -04:00
parent 948d3763b5
commit 0cfa1f1572
7 changed files with 157 additions and 31 deletions

41
Cargo.lock generated
View File

@ -57,6 +57,26 @@ version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "big-int"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31375ce97b1316b3a92644c2cbc93fa9dcfba06e4aec9a440bce23397af82fd6"
dependencies = [
"big-int-proc",
"thiserror",
]
[[package]]
name = "big-int-proc"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73cfa06eb56d71f2bb1874b101a50c3ba29fcf3ff7dd8de274e473929459863b"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "clap"
version = "4.5.4"
@ -145,6 +165,7 @@ name = "hb"
version = "0.1.0"
dependencies = [
"anyhow",
"big-int",
"clap",
"glob",
"rayon",
@ -217,6 +238,26 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"

View File

@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0.81"
big-int = "7.0.0"
clap = { version = "4.4.3", features = ["derive"] }
glob = "0.3.1"
rayon = "1.7.0"

View File

@ -2,6 +2,7 @@ use clap::{Parser, ArgAction};
use glob::Pattern;
use rayon::ThreadPoolBuilder;
use crate::fs_size::FsSize;
use crate::unit::Unit;
use std::fs::{symlink_metadata, Metadata};
@ -70,6 +71,14 @@ pub struct Args {
)]
si: bool,
#[arg(
short, long,
help = "use heap-allocated integers",
long_help = "use heap-allocated integers that won't overflow. Reduces performance",
default_value_t = false,
)]
big_int: bool,
#[arg(
short, long,
help = "unit to print in",
@ -159,11 +168,11 @@ impl Args {
self
}
pub fn len(&self, meta: &Metadata) -> u64 {
pub fn len<N: FsSize>(&self, meta: &Metadata) -> N {
match self.unit {
Unit::Blocks => meta.blocks(),
_ => meta.len(),
}
}.into()
}
pub fn should_exclude(&self, path: &Path, file: &Metadata) -> bool {
@ -212,6 +221,10 @@ impl Args {
pub fn paths(&self) -> Iter<'_, String> {
self.paths.iter()
}
pub fn big_int(&self) -> bool {
self.big_int
}
}
fn validate_path(s: &str) -> Result<String, String> {

View File

@ -5,20 +5,24 @@ use std::io::ErrorKind;
use anyhow::{Context, Result};
use crate::args::Args;
use crate::fs_size::FsSize;
use crate::unit::Unit;
use rayon::prelude::*;
#[derive(Debug, Clone)]
pub struct Directory {
pub struct Directory<N> {
name: PathBuf,
size: u64,
children: Vec<Directory>,
size: N,
children: Vec<Directory<N>>,
}
impl Directory {
impl<N> Directory<N>
where
N: FsSize
{
#[inline]
pub const fn size(&self) -> u64 {
self.size
pub const fn size(&self) -> &N {
&self.size
}
pub fn new< P: AsRef<Path> >(path: P, args: &Args) -> Result<Option<Self>> {
@ -74,13 +78,13 @@ impl Directory {
}
};
let fold_op = |(mut size, mut children), dir| -> Result<(u64, Vec<Self>)> {
let fold_op = |(mut size, mut children), dir| -> Result<(N, Vec<Self>)> {
let dir: Self = match (dir, args.persistant()) {
(Ok(Some(d)), _) => d,
(Ok(None), _) | (Err(_), true) => return Ok((size, children)),
(Err(e), false) => return Err(e),
};
size += dir.size;
size += dir.size.clone();
if args.tree() && args.should_print(&dir.name) {
// since size was increased, this just prevents
// the directory from appearing in printing
@ -89,10 +93,11 @@ impl Directory {
Ok((size, children))
};
let reduce_op = |(asize, mut avec): (u64, Vec<Self>), (bsize, bvec)| {
let reduce_op = |(mut asize, mut avec): (N, Vec<Self>), (bsize, bvec)| {
if args.tree() { avec.extend(bvec); }
asize += bsize;
Ok((asize + bsize, avec))
Ok((asize, avec))
};
// this is a compicated iterator pattern. I'll do my best to explain.
@ -106,10 +111,10 @@ impl Directory {
.map(|entry| Self::new(entry?.path(), args))
// 4. the fold (this is try_fold because we're iterating over Result.).
// each fold adds a directory as a child and increases the total size
.try_fold(|| (0, Vec::new()), fold_op)
.try_fold(|| (N::zero(), Vec::new()), fold_op)
// 5. the final step is to reduce, which is as simple as concatenating
// every vector and summing up their sizes.
.try_reduce(|| (0, Vec::new()), reduce_op) {
.try_reduce(|| (N::zero(), Vec::new()), reduce_op) {
// remember that this is a match statement?
Ok(tuple) => tuple,
Err(_) if args.persistant() => return Ok(None),
@ -140,7 +145,7 @@ impl Directory {
pub fn tree(self, unit: Unit) -> String {
// since self.size is definitionally the greatest value, the tab length
// is just the length of self.len, plus two for a tab width
let tab_size = unit.convert_with_units(self.size).len() + 2;
let tab_size = unit.convert_with_units(&self.size).len() + 2;
self.vectorise(unit)
.iter()
.map(|e| e.stringify_tabbed(tab_size))
@ -149,7 +154,7 @@ impl Directory {
}
/// TODO: maybe write directly to stdout to not use so much mem
fn vectorise(&self, unit: Unit) -> Vec<TreeEntry> {
fn vectorise(self, unit: Unit) -> Vec<TreeEntry<N>> {
let mut result = Vec::new();
result.push(TreeEntry::new(
@ -168,7 +173,7 @@ impl Directory {
// `wait` part. the last element of each one should however
// be introduced with a `last` part, and padding should with
// `blank`
for (idx, child) in self.children.iter().enumerate() {
for (idx, child) in self.children.into_iter().enumerate() {
if idx+1 == len {
new_entry_part = TreePart::Last;
continue_part = TreePart::Blank;
@ -191,21 +196,27 @@ impl Directory {
}
#[derive(Debug)]
struct TreeEntry {
struct TreeEntry<N> {
parts: Vec<TreePart>,
path: String,
size: u64,
size: N,
unit: Unit
}
impl TreeEntry {
fn new(path: String, size: u64, unit: Unit) -> Self {
impl<N> TreeEntry<N>
where
N: FsSize
{
fn new(path: String, size: N, unit: Unit) -> Self {
Self {
parts: Vec::new(), path, size, unit
}
}
fn stringify_tabbed(&self, tab_size: usize) -> String {
let mut result = format!("{:<tab_size$}", self.unit.convert_with_units(self.size));
let mut result = format!(
"{:<tab_size$}",
self.unit.convert_with_units(&self.size)
);
for part in self.parts.iter().rev() {
result += part.display();

42
src/fs_size.rs Normal file
View File

@ -0,0 +1,42 @@
use std::fmt::Display;
use std::ops::AddAssign;
use big_int::{prelude::*, BigInt};
pub trait FsSize
// must be convertable from u64s
// for getting info from Metadata::len()
// or Metadata::blocks()
: From<u64>
// used in the fold and reduce steps
+ AddAssign
// must be printable
+ Display
// rayon
+ Send + Sync
+ Clone
{
fn zero() -> Self;
fn divide(self, rhs: u64) -> Self;
}
impl FsSize for Tight<10> {
fn zero() -> Self {
<Self as BigInt<10>>::zero()
}
fn divide(self, rhs: u64) -> Self {
self/Self::from(rhs)
}
}
impl FsSize for u64 {
fn zero() -> Self {
0
}
fn divide(self, rhs: u64) -> Self {
self/rhs
}
}

View File

@ -3,20 +3,35 @@
mod args;
mod directory;
mod unit;
mod fs_size;
use big_int::tight::Tight;
use clap::Parser;
use args::Args;
use directory::Directory;
use fs_size::FsSize;
use std::process::ExitCode;
fn main() -> ExitCode {
let args = Args::parse().post_process();
let mut total = 0;
if args.big_int() {
parse_paths::<Tight<10>>(args)
} else {
parse_paths::<u64>(args)
}
}
fn parse_paths<N>(args: Args) -> ExitCode
where
N: FsSize
{
let mut total = N::zero();
for path in args.paths() {
let dir_structure = match Directory::new(path, &args) {
let dir_structure = match Directory::<N>::new(path, &args) {
Ok(Some(ds)) => ds,
// this only ever returns None when persistant,
// so we don't need a match guard
@ -29,7 +44,7 @@ fn main() -> ExitCode {
}
};
total += dir_structure.size();
total += dir_structure.size().clone();
if args.minimal() {
// skip printing (this is a matter of indentation)
@ -48,11 +63,11 @@ fn main() -> ExitCode {
}
if args.total() {
let total = args.unit().convert_with_units(total);
let total = args.unit().convert_with_units(&total);
println!("total: {total}");
}
else if args.minimal() {
let total = args.unit().convert(total);
let total = args.unit().convert(&total);
print!("{total}");
}

View File

@ -1,5 +1,7 @@
use std::fmt::Display;
use crate::fs_size::FsSize;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Unit {
Byte,
@ -38,12 +40,13 @@ impl Unit {
}
}
pub fn convert(self, n: u64) -> String {
format!("{}", n/self.integer_value())
pub fn convert(self, n: &impl FsSize) -> String {
format!("{}", n.clone().divide(self.integer_value()))
}
pub fn convert_with_units(self, n: u64) -> String {
pub fn convert_with_units(self, n: &impl FsSize) -> String {
self.convert(n) + self.units_pretty()
// todo!()
}
/// Units to print for each different unit.