Compare commits

...

3 Commits

Author SHA1 Message Date
Nicholas Hope 3ae188be26 lints and optimisations 2024-03-11 12:59:48 -04:00
Nicholas Hope a65e78b915 minor refactoring 2024-03-11 12:59:34 -04:00
Nicholas Hope 60f0e550fa new arguments 2024-03-11 12:59:06 -04:00
3 changed files with 129 additions and 43 deletions

View File

@ -8,3 +8,20 @@ edition = "2021"
[dependencies]
clap = { version = "4.4.3", features = ["derive"] }
rayon = "1.7.0"
# lints from https://github.com/0atman/noboilerplate/blob/main/scripts/37-functional-rust.md
# these are deliberately extremely restrictive to avoid most of the easily-refactored
# footguns as much as possible. unlike the github lints, this does not forbid unsafe
# code
[lints.clippy]
enum_glob_use = "deny"
pedantic = "deny"
nursery = "deny"
unwrap_used = "deny"
[profile.release]
opt-level = 'z' # size optimisation
lto = true # link time optimisation
codegen-units = 1 # fewer -> more optimisation
panic = 'abort' # abort on panic
strip = 'symbols' # strip symbols

View File

@ -1,7 +1,10 @@
use std::fs::read_dir;
use std::hint::unreachable_unchecked;
use std::path::{Path, PathBuf};
use std::io::{Result, ErrorKind};
use crate::unit::Unit;
#[derive(Debug, Clone)]
pub struct Directory {
name: PathBuf,
@ -9,12 +12,17 @@ pub struct Directory {
children: Vec<Directory>,
}
impl Directory {
#[inline(always)]
#[inline]
pub const fn size(&self) -> u64 {
self.size
}
#[inline(always)]
#[inline]
pub fn scale(&self, unit: Unit) -> String {
unit.convert(self.size)
}
#[inline]
pub fn path(&self) -> &Path {
self.name.as_ref()
}
@ -24,9 +32,15 @@ impl Directory {
P: AsRef<Path>
{
let path = path.as_ref();
let name = path.canonicalize()?.file_name().unwrap().into();
let name = path.canonicalize()?
.file_name()
// file_name() returns None if and only if the path ends in "..".
// due to the call to canonicalize(), this can never be the case,
// so this can be safely unwrapped
.unwrap_or_else(|| unsafe { unreachable_unchecked() })
.into();
let dir = match read_dir(&path) {
let dir = match read_dir(path) {
Ok(dir) => dir,
Err(io_error) => match io_error.kind() {
ErrorKind::NotADirectory => return Ok(Self {
@ -37,11 +51,11 @@ impl Directory {
_ => return Err(io_error),
}
};
let mut size = 0;
let mut children = Vec::new();
for entry in dir {
let child = Directory::new(entry?.path())?;
let child = Self::new(entry?.path())?;
size += child.size;
children.push(child);
}
@ -53,19 +67,28 @@ impl Directory {
})
}
pub fn display(self) -> String {
pub fn display(self, unit: Unit) -> String {
// since self.size is definitionally the greatest value, the tab length
// is just the length of self.len, plus two for a tab width
let tab_size = self.size.to_string().len() + 2;
self.vectorise().iter().map(|e| e.to_string(tab_size) + "\n").collect()
let mut result = self.vectorise(unit).iter().map(|e| e.stringify_tabbed(tab_size) + "\n").collect::<String>();
if ! result.is_empty() {
let final_newline_char_range = result.len()-2 .. result.len();
result.drain(final_newline_char_range);
}
result
}
/// TODO: make not recursive, take &self if possible,
/// and maybe write directly to stdout to not use so much mem
fn vectorise(self) -> Vec<TreeEntry> {
fn vectorise(self, unit: Unit) -> Vec<TreeEntry> {
let mut result = Vec::new();
result.push(TreeEntry(Vec::new(), self.name.display().to_string(), self.size));
result.push(TreeEntry::new(
Vec::new(), self.name.display().to_string(), self.size, unit
));
let mut new_entry_part = TreePart::First;
let mut continue_part = TreePart::Wait;
@ -78,13 +101,13 @@ impl Directory {
continue_part = TreePart::Blank;
}
let subtree = child.vectorise();
let subtree = child.vectorise(unit);
for mut item in subtree {
if item.0.len() == 0 {
item.0.push(new_entry_part);
if item.parts.is_empty() {
item.parts.push(new_entry_part);
} else {
item.0.push(continue_part);
item.parts.push(continue_part);
}
result.push(item);
}
@ -95,18 +118,29 @@ impl Directory {
}
#[derive(Debug)]
struct TreeEntry(Vec<TreePart>, String, u64);
struct TreeEntry {
parts: Vec<TreePart>,
path: String,
size: u64,
unit: Unit
}
impl TreeEntry {
fn to_string(&self, tab_size: usize) -> String {
let mut result = format!("{:<tab_size$}", self.2);
fn new(parts: Vec<TreePart>, path: String, size: u64, unit: Unit) -> Self {
Self {
parts, path, size, unit
}
}
fn stringify_tabbed(&self, tab_size: usize) -> String {
let mut result = format!("{:<tab_size$}", self.unit.convert(self.size));
for part in self.0.iter().rev() {
result += &part.display().to_owned();
for part in self.parts.iter().rev() {
result += part.display();
}
// dont add the space to empty entries
result += " ";
result += &self.1;
result += &self.path;
result
}
}

View File

@ -1,13 +1,15 @@
#![feature(io_error_more, fs_try_exists)]
use clap::Parser;
use clap::ArgAction;
use clap::{Parser, ArgAction};
mod directory;
mod unit;
use directory::Directory;
use unit::Unit;
use std::process::ExitCode;
#[allow(clippy::struct_excessive_bools)]
#[derive(Parser, Debug, Clone)]
pub struct Args {
#[arg(
@ -16,43 +18,61 @@ pub struct Args {
long_help = "keep going if an error occurs (ex. unreadable subdirectories in a readable directory)"
)]
persistant: bool,
#[arg(
short, long,
help = "minimize output",
long_help = "like -t, but does not print \"total: \" before the summary or the newline after. It also surpresses all error messages",
conflicts_with = "total_only",
long_help = "print nothing but the total size for all directories, without a newline. Also supresses all error messages",
conflicts_with = "total",
default_value_t = false,
)]
minimal: bool,
#[arg(
short='T', long,
help = "display in tree",
default_value_t = false,
conflicts_with = "minimal",
)]
tree: bool,
#[arg(
short, long,
help = "only display the total size",
help = "display the total size",
conflicts_with = "minimal",
default_value_t = false,
)]
total_only: bool,
total: bool,
#[arg(
short='2', long,
help = "print sizes in powers of 1024",
help = "alias for --unit 1024",
default_value_t = false,
conflicts_with = "si"
conflicts_with_all = ["si","unit"],
)]
base_two: bool,
#[arg(
short='0', long,
help = "print sizes in powers of 1000",
help = "alias for --unit 1000",
default_value_t = false,
conflicts_with = "base_two"
conflicts_with_all = ["base_two","unit"],
)]
si: bool,
#[arg(
short, long,
help = "unit to print in",
long_help = "printing unit (case insensitive): b = bytes, kb = kilobytes, ki = kibibytes, gb = gigabytes, gi = gibibytes, tb = terabytes, ti = tibibytes",
value_parser = Unit::parse,
default_value_t = Unit::Byte,
conflicts_with_all = ["base_two","si"],
)]
unit: Unit,
#[arg(
value_parser = validate_path,
help = "directories to summate",
help = "items to summate",
action = ArgAction::Append,
num_args = 1..
)]
@ -68,8 +88,14 @@ fn validate_path(s: &str) -> Result<String, String> {
}
fn main() -> ExitCode {
let args = Args::parse();
let mut args = Args::parse();
if args.base_two {
args.unit = Unit::Kibi;
} else if args.si {
args.unit = Unit::Kilo;
}
let mut total = 0;
for path in args.path {
let dir_structure = match Directory::new(path) {
@ -81,24 +107,33 @@ fn main() -> ExitCode {
return ExitCode::FAILURE;
}
};
total += dir_structure.size();
if !args.minimal {
if args.total_only {
println!("{}: {}", dir_structure.path().to_str().unwrap(), dir_structure.size());
} else {
print!("{}", dir_structure.display());
}
if args.minimal {
// skip printing (this is a matter of indentation)
continue;
}
if args.tree {
println!("{}", dir_structure.display(args.unit));
} else {
println!(
"{}: {}",
dir_structure.path().display(),
dir_structure.scale(args.unit),
);
}
}
if args.total_only {
let total = args.unit.convert(total);
if args.total {
println!("total: {total}");
}
else if args.minimal {
print!("{total}");
}
ExitCode::SUCCESS
}