hb/src/directory.rs

use std::ffi::OsString;
use std::fs::read_dir;
use std::path::{Path, PathBuf};
use std::io::ErrorKind;
use anyhow::{Context, Result};

use crate::args::Args;
use crate::unit::Unit;

use rayon::prelude::*;

#[derive(Debug, Clone)]
pub struct Directory {
    name: PathBuf,
    size: u64,
    children: Vec<Directory>,
}
impl Directory {
    #[inline]
    pub const fn size(&self) -> u64 {
        self.size
    }

    #[inline]
    pub fn path(&self) -> &Path {
        self.name.as_ref()
    }

    pub fn new< P: AsRef<Path> >(path: P, args: &Args) -> Result<Option<Self>> {
        let path = path.as_ref();
        // NOTE: I go back and forth on canonicalize()ing all the time.
        // I feel like it changes every commit. The performance loss seems
        // to be negligible, even when I do crazy things like `hb -p /`
        let name = match path.canonicalize() {
            Ok(path) => path,
            Err(_) if args.persistant() => return Ok(None),
            Err(e) => return Err(e.into()),
        }
            .file_name()
            .map_or_else(|| OsString::from("/"), ToOwned::to_owned)
            .into();

        // symlink_metadata() is the same as metadata() but it doesn't
        // traverse symlinks, so that we can exclude them if necessary
        let meta = match path.symlink_metadata() {
            Ok(md) => md,
            Err(_) if args.persistant() => return Ok(None),
            Err(e) => return Err(e.into()),
        };

        if args.should_exclude(path, &meta) {
            // finding a file to exclude is behaviourally
            // equivalent to hitting an error in persistant
            // mode: just continue
            return Ok(None)
        }

        let dir = match read_dir(path) {
            Ok(dir) => dir,
            Err(io_error) => match io_error.kind() {
                ErrorKind::NotADirectory => {
                    return Ok(Some(
                        Self {
                            name,
                            size: meta.len(),
                            children: Vec::new()
                        }
                    ))
                },
                other => return Result::context(
                    Err(io_error),
                    format!("{}: {}", path.display(), other)
                ),
            }
        };

        // this is a compicated iterator pattern. I'll do my best to explain.
        // 1. the end result is that we `reduce()` the iterator to a single
        //    (u64, Vec<Directory>) tuple to return. this is done by...
        let (size, children) = match
        // 2. taking the iterator over the directory and parallelising it...
        dir.par_bridge()
        // 3, this is the recursive step: try to create new Directory
        //    objects from each item in the iterator
        .map(|entry| Self::new(entry?.path(), args))
        // 4. the fold (this is try_fold because we're iterating over Result.).
        //    each fold adds a directory as a child and increases the total size
        .try_fold(
            || (0, Vec::new()),
            |(mut size, mut children), dir| -> Result<(u64, Vec<Self>)> {
                let dir = match (dir, args.persistant()) {
                    (Ok(Some(d)), _) => d,
                    (Ok(None), _) | (Err(_), true) => return Result::Ok((size, children)),
                    (Err(e), false) => return Err(e),
                };
                size += dir.size;
                if args.should_print(dir.path()) {
                    // since size was increased, this just prevents
                    // the directory from appearing in printing
                    children.push(dir);
                }
                // have to specify anyhow::Result::Ok otherwise it complains
                // that it can't infer the E in Result<T, E>
                Result::Ok((size, children))
            }
        )
        // 5. the final step is to reduce, which is as simple as concatenating
        //    every vector and summing up their sizes.
        .try_reduce(
            || (0, Vec::new()),
            |(asize, mut avec), (bsize, bvec)| {
                avec.extend(bvec);
                Result::Ok((asize + bsize, avec))
            }
        ) {
            // remember that this is a match statement?
            Ok(tuple) => tuple,
            Err(_) if args.persistant() => return Ok(None),
            Err(e) => return Err(e),
        };

        // final notes:
        // 1. I am unsure if it is better to do a bunch of partial sums
        //    during the fold() and reduce() steps, or if it is best to
        //    have them only do data collection and sum the lengths
        //    later. intuitively we would want to do everything in
        //    parallel but I have no data to support this.
        // 2. this is a super complicated iterator pattern, If anyone
        //    knows how to simplify it I'm all ears, but being
        //    parallel is the main advantage it has over du so I don't
        //    want to abandon that, even though a serial for loop is
        //    *incredibly* clearer.

        Ok(Some(
            Self {
                name,
                size,
                children,
            }
        ))
    }

    pub fn tree(self, unit: Unit) -> String {
        // since self.size is definitionally the greatest value, the tab length
        // is just the length of self.len, plus two for a tab width
        let tab_size = unit.convert(self.size).len() + 2;
        self.vectorise(unit)
            .iter()
            .map(|e| e.stringify_tabbed(tab_size))
            .reduce(|s1, s2| s1 + "\n" + &s2)
            .unwrap_or_default()
    }

    /// TODO: make not recursive, take &self if possible,
    /// and maybe write directly to stdout to not use so much mem
    fn vectorise(self, unit: Unit) -> Vec<TreeEntry> {
        let mut result = Vec::new();

        result.push(TreeEntry::new(
            self.name.display().to_string(), self.size, unit
        ));

        let mut new_entry_part = TreePart::First;
        let mut continue_part = TreePart::Wait;

        let len = self.children.len();

        // this is the display algorithm. it's built on the variables
        // `new_entry_part` and `continue_part`. for most times, when
        // we introduce a new item (which happens every iteration of
        // the loop), it is `first` tree part and we can pad with the
        // `wait` part. the last element of each one should however
        // be introduced with a `last` part, and padding should with
        // `blank`
        for (idx, child) in self.children.into_iter().enumerate() {
            if idx+1 == len {
                new_entry_part = TreePart::Last;
                continue_part = TreePart::Blank;
            }

            let subtree = child.vectorise(unit);

            for mut item in subtree {
                if item.parts.is_empty() {
                    item.parts.push(new_entry_part);
                } else {
                    item.parts.push(continue_part);
                }
                result.push(item);
            }
        }

        result
    }
}

#[derive(Debug)]
struct TreeEntry {
    parts: Vec<TreePart>,
    path: String,
    size: u64,
    unit: Unit
}
impl TreeEntry {
    fn new(path: String, size: u64, unit: Unit) -> Self {
        Self {
            parts: Vec::new(), path, size, unit
        }
    }

    fn stringify_tabbed(&self, tab_size: usize) -> String {
        let mut result = format!("{:<tab_size$}", self.unit.convert(self.size));

        for part in self.parts.iter().rev() {
            result += part.display();
        }
        result += " ";
        result += &self.path;

        result
    }
}

#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum TreePart {
    /// `├──`
    First,
    /// `│  `
    Wait,
    /// `└──`
    Last,
    /// (blank)
    Blank
}
impl TreePart {
    /// convert to ascii art
    pub const fn display(&self) -> &str {
        match self {
            Self::First => "├──",
            Self::Wait  => "│  ",
            Self::Last  => "└──",
            Self::Blank => "   ",
        }
    }
}