hb/src/directory.rs

use std::fs::read_dir;
use std::hint::unreachable_unchecked;
use std::path::{Path, PathBuf};
use std::io::{Result, ErrorKind};

use crate::unit::Unit;

use rayon::prelude::*;

#[derive(Debug, Clone)]
pub struct Directory {
    name: PathBuf,
    size: u64,
    children: Vec<Directory>,
}
impl Directory {
    #[inline]
    pub const fn size(&self) -> u64 {
        self.size
    }

    #[inline]
    pub fn scale(&self, unit: Unit) -> String {
        unit.convert(self.size)
    }

    #[inline]
    pub fn path(&self) -> &Path {
        self.name.as_ref()
    }

    pub fn new<P>(path: P) -> Result<Self>
    where
        P: AsRef<Path>
    {
        let path = path.as_ref();
        let name = path.canonicalize()?
            .file_name()
            // file_name() returns None if and only if the path ends in "..".
            // due to the call to canonicalize(), this can never be the case,
            // so this can be safely unwrapped
            .unwrap_or_else(|| unsafe { unreachable_unchecked() })
            .into();

        let dir = match read_dir(path) {
            Ok(dir) => dir,
            Err(io_error) => match io_error.kind() {
                ErrorKind::NotADirectory => return Ok(Self {
                    name,
                    size: path.metadata()?.len(),
                    children: Vec::new()
                }),
                _ => return Err(io_error),
            }
        };

        // this is a compicated iterator pattern. I'll do my best to explain.
        // 1. the end result is that we `reduce()` the iterator to a single
        //    (u64, Vec<Directory>) tuple to return. this is done by...
        let (size, children) =
            // 2. taking the iterator over the directory and parallelising it...
            dir.par_bridge()
            // 3, this is the recursive step: try to create new Directory
            //    objects from each item in the iterator
            .map(|entry| Self::new(entry?.path()))
            // 4. the fold (this is try_fold because we're iterating over Result.).
            //    each fold adds a directory as a child and increases the total size
            .try_fold(
                || (0, Vec::new()),
                |(mut size, mut children), dir| {
                    let dir = dir?;
                    size += dir.size;
                    children.push(dir);
                    // have to specify std::io::Result::Ok otherwise it complains
                    // that it can't infer the E in Result<T, E>
                    Result::Ok((size, children))
                }
            )
            // 5. the final step is to reduce, which is as simple as concatenating
            //    every vector and summing up their sizes.
            .try_reduce(
                || (0, Vec::new()),
                |(asize, mut avec), (bsize, bvec)| {
                    avec.extend(bvec);
                    Ok((asize + bsize, avec))
                }
            )?;

        // final notes:
        // 1. I am unsure if it is better to do a bunch of partial sums
        // during the fold() and reduce() steps, or if it is best to
        // have them only do data collection and sum the lengths
        // later. intuitively we would want to do everything in
        // parallel but I have no data to support this
        // 2. this is a super complicated iterator pattern, If anyone
        // knows how to simplify it I'm all ears, but this being
        // parallel is the main advantage it has over du so I don't
        // want to abandon that, even though a serial for loop is
        // *incredibly* clearer

        Ok(Self {
            name,
            size,
            children,
        })
    }

    pub fn display(self, unit: Unit) -> String {
        // since self.size is definitionally the greatest value, the tab length
        // is just the length of self.len, plus two for a tab width
        let tab_size = self.size.to_string().len() + 2;
        let mut result = self.vectorise(unit).iter().map(|e| e.stringify_tabbed(tab_size) + "\n").collect::<String>();

        if ! result.is_empty() {
            // unless there was no output, remove the final "\n"
            let final_newline_char_range = result.len()-"\n".len() .. result.len();
            result.drain(final_newline_char_range);
        }

        result
    }

    /// TODO: make not recursive, take &self if possible,
    /// and maybe write directly to stdout to not use so much mem
    fn vectorise(self, unit: Unit) -> Vec<TreeEntry> {
        let mut result = Vec::new();

        result.push(TreeEntry::new(
            Vec::new(), self.name.display().to_string(), self.size, unit
        ));

        let mut new_entry_part = TreePart::First;
        let mut continue_part = TreePart::Wait;

        let len = self.children.len();

        for (idx, child) in self.children.into_iter().enumerate() {
            if idx+1 == len {
                new_entry_part = TreePart::Last;
                continue_part = TreePart::Blank;
            }

            let subtree = child.vectorise(unit);

            for mut item in subtree {
                if item.parts.is_empty() {
                    item.parts.push(new_entry_part);
                } else {
                    item.parts.push(continue_part);
                }
                result.push(item);
            }
        }

        result
    }
}

#[derive(Debug)]
struct TreeEntry {
    parts: Vec<TreePart>,
    path: String,
    size: u64,
    unit: Unit
}
impl TreeEntry {
    fn new(parts: Vec<TreePart>, path: String, size: u64, unit: Unit) -> Self {
        Self {
            parts, path, size, unit
        }
    }

    fn stringify_tabbed(&self, tab_size: usize) -> String {
        let mut result = format!("{:<tab_size$}", self.unit.convert(self.size));

        for part in self.parts.iter().rev() {
            result += part.display();
        }
        // dont add the space to empty entries
        result += " ";
        result += &self.path;

        result
    }
}

#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum TreePart {
    First,
    Wait,
    Last,
    Blank
}
impl TreePart {
    /// convert to ascii art
    pub const fn display(&self) -> &str {
        match self {
            Self::First => "├──",
            Self::Wait  => "│  ",
            Self::Last  => "└──",
            Self::Blank => "   ",
        }
    }
}