diff --git a/src/directory.rs b/src/directory.rs index 6bfdfdc..fefcb63 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -1,8 +1,10 @@ +use std::ffi::OsString; use std::fs::read_dir; -use std::hint::unreachable_unchecked; use std::path::{Path, PathBuf}; -use std::io::{Result, ErrorKind}; +use std::io::ErrorKind; +use anyhow::{Context, Result}; +use crate::args::Args; use crate::unit::Unit; use rayon::prelude::*; @@ -19,38 +21,55 @@ impl Directory { self.size } - #[inline] - pub fn scale(&self, unit: Unit) -> String { - unit.convert(self.size) - } - #[inline] pub fn path(&self) -> &Path { self.name.as_ref() } - pub fn new

(path: P) -> Result - where - P: AsRef - { + pub fn new< P: AsRef >(path: P, args: &Args) -> Result> { let path = path.as_ref(); - let name = path.canonicalize()? - .file_name() - // file_name() returns None if and only if the path ends in "..". - // due to the call to canonicalize(), this can never be the case, - // so this can be safely unwrapped - .unwrap_or_else(|| unsafe { unreachable_unchecked() }) + let name = path.file_name() + .map_or_else(|| OsString::from("/"), ToOwned::to_owned) .into(); + // symlink_metadata() is the same as metadata() but it doesn't + // traverse symlinks, so that we can exclude them if necessary + let meta = match (path.symlink_metadata(), args.persistant()) { + (Ok(md), _) => md, + (Err(_), true) => return Ok(None), + (Err(e), false) => return Err(e.into()), + }; + + if args.should_exclude(path, &meta) { + // Ok(None) is only meant to arise from an error + // while persistant. When that happens, a no-op + // entry is substituted, which is precisely + // we want to happen when we hit an excluded file. + + // NOTE: return Ok(None) is *not* equivalent, because + // that's only produced when an error occurs but + // the program is running in persistant mode. I used + // to return that, but that causes incredibly + // bizarre and wrong behaviour. + return Ok(Some( Self { name, size: 0, children: Vec::new() } )) + } + let dir = match read_dir(path) { Ok(dir) => dir, Err(io_error) => match io_error.kind() { - ErrorKind::NotADirectory => return Ok(Self { - name, - size: path.metadata()?.len(), - children: Vec::new() - }), - _ => return Err(io_error), + ErrorKind::NotADirectory => { + return Ok(Some( + Self { + name, + size: meta.len(), + children: Vec::new() + } + )) + }, + other => return Result::context( + Err(io_error), + format!("{}: {}", path.display(), other) + ), } }; @@ -62,16 +81,21 @@ impl Directory { dir.par_bridge() // 3, this is the recursive step: try to create new Directory // objects from each item in the iterator - .map(|entry| Self::new(entry?.path())) + .map(|entry| Self::new(entry?.path(), args)) // 4. the fold (this is try_fold because we're iterating over Result.). // each fold adds a directory as a child and increases the total size .try_fold( || (0, Vec::new()), - |(mut size, mut children), dir| { - let dir = dir?; + |(mut size, mut children), dir| -> Result<(u64, Vec)> { + let Some(dir) = Result::from(dir)? + else { + // some intermediate operation failed, but we + // are persistant, so just skip + return Result::Ok((0, Vec::new())) + }; size += dir.size; children.push(dir); - // have to specify std::io::Result::Ok otherwise it complains + // have to specify anyhow::Result::Ok otherwise it complains // that it can't infer the E in Result Result::Ok((size, children)) } @@ -82,56 +106,57 @@ impl Directory { || (0, Vec::new()), |(asize, mut avec), (bsize, bvec)| { avec.extend(bvec); - Ok((asize + bsize, avec)) + Result::Ok((asize + bsize, avec)) } )?; + // ^ note the Try, because of course any of these operations could + // fail // final notes: // 1. I am unsure if it is better to do a bunch of partial sums - // during the fold() and reduce() steps, or if it is best to - // have them only do data collection and sum the lengths - // later. intuitively we would want to do everything in - // parallel but I have no data to support this + // during the fold() and reduce() steps, or if it is best to + // have them only do data collection and sum the lengths + // later. intuitively we would want to do everything in + // parallel but I have no data to support this. // 2. this is a super complicated iterator pattern, If anyone - // knows how to simplify it I'm all ears, but this being - // parallel is the main advantage it has over du so I don't - // want to abandon that, even though a serial for loop is - // *incredibly* clearer + // knows how to simplify it I'm all ears, but being + // parallel is the main advantage it has over du so I don't + // want to abandon that, even though a serial for loop is + // *incredibly* clearer. - Ok(Self { - name, - size, - children, - }) + Ok(Some( + Self { + name, + size, + children, + } + )) } - pub fn display(self, unit: Unit) -> String { + pub fn tree(self, args: &Args) -> String { // since self.size is definitionally the greatest value, the tab length // is just the length of self.len, plus two for a tab width let tab_size = self.size.to_string().len() + 2; - let mut result = self.vectorise(unit).iter().map(|e| e.stringify_tabbed(tab_size) + "\n").collect::(); - - if ! result.is_empty() { - // unless there was no output, remove the final "\n" - let final_newline_char_range = result.len()-"\n".len() .. result.len(); - result.drain(final_newline_char_range); - } - - result + self.vectorise(args) + .iter() + .map(|e| e.stringify_tabbed(tab_size)) + .reduce(|s1, s2| s1 + "\n" + &s2) + .unwrap_or_default() } /// TODO: make not recursive, take &self if possible, /// and maybe write directly to stdout to not use so much mem - fn vectorise(self, unit: Unit) -> Vec { + fn vectorise(mut self, args: &Args) -> Vec { let mut result = Vec::new(); result.push(TreeEntry::new( - Vec::new(), self.name.display().to_string(), self.size, unit + self.name.display().to_string(), self.size, args.unit() )); let mut new_entry_part = TreePart::First; let mut continue_part = TreePart::Wait; - + + self.children.retain(|dir| args.should_print(dir.path())); let len = self.children.len(); for (idx, child) in self.children.into_iter().enumerate() { @@ -140,7 +165,7 @@ impl Directory { continue_part = TreePart::Blank; } - let subtree = child.vectorise(unit); + let subtree = child.vectorise(args); for mut item in subtree { if item.parts.is_empty() { @@ -164,9 +189,9 @@ struct TreeEntry { unit: Unit } impl TreeEntry { - fn new(parts: Vec, path: String, size: u64, unit: Unit) -> Self { + fn new(path: String, size: u64, unit: Unit) -> Self { Self { - parts, path, size, unit + parts: Vec::new(), path, size, unit } }