added parallelism
parent
5bc0c302f3
commit
fd292a53be
|
@ -5,6 +5,8 @@ use std::io::{Result, ErrorKind};
|
||||||
|
|
||||||
use crate::unit::Unit;
|
use crate::unit::Unit;
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Directory {
|
pub struct Directory {
|
||||||
name: PathBuf,
|
name: PathBuf,
|
||||||
|
@ -52,15 +54,51 @@ impl Directory {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut size = 0;
|
// this is a compicated iterator pattern. I'll do my best to explain.
|
||||||
let mut children = Vec::new();
|
// 1. the end result is that we `reduce()` the iterator to a single
|
||||||
for entry in dir {
|
// (u64, Vec<Directory>) tuple to return. this is done by...
|
||||||
let child = Self::new(entry?.path())?;
|
let (size, children) =
|
||||||
size += child.size;
|
// 2. taking the iterator over the directory and parallelising it...
|
||||||
children.push(child);
|
dir.par_bridge()
|
||||||
}
|
// 3, this is the recursive step: try to create new Directory
|
||||||
|
// objects from each item in the iterator
|
||||||
|
.map(|entry| Self::new(entry?.path()))
|
||||||
|
// 4. the fold (this is try_fold because we're iterating over Result.).
|
||||||
|
// each fold adds a directory as a child and increases the total size
|
||||||
|
.try_fold(
|
||||||
|
|| (0, Vec::new()),
|
||||||
|
|(mut size, mut children), dir| {
|
||||||
|
let dir = dir?;
|
||||||
|
size += dir.size;
|
||||||
|
children.push(dir);
|
||||||
|
// have to specify std::io::Result::Ok otherwise it complains
|
||||||
|
// that it can't infer the E in Result<T, E>
|
||||||
|
Result::Ok((size, children))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
// 5. the final step is to reduce, which is as simple as concatenating
|
||||||
|
// every vector and summing up their sizes.
|
||||||
|
.try_reduce(
|
||||||
|
|| (0, Vec::new()),
|
||||||
|
|(asize, mut avec), (bsize, bvec)| {
|
||||||
|
avec.extend(bvec);
|
||||||
|
Ok((asize + bsize, avec))
|
||||||
|
}
|
||||||
|
)?;
|
||||||
|
|
||||||
Ok(Self{
|
// final notes:
|
||||||
|
// 1. I am unsure if it is better to do a bunch of partial sums
|
||||||
|
// during the fold() and reduce() steps, or if it is best to
|
||||||
|
// have them only do data collection and sum the lengths
|
||||||
|
// later. intuitively we would want to do everything in
|
||||||
|
// parallel but I have no data to support this
|
||||||
|
// 2. this is a super complicated iterator pattern, If anyone
|
||||||
|
// knows how to simplify it I'm all ears, but this being
|
||||||
|
// parallel is the main advantage it has over du so I don't
|
||||||
|
// want to abandon that, even though a serial for loop is
|
||||||
|
// *incredibly* clearer
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
name,
|
name,
|
||||||
size,
|
size,
|
||||||
children,
|
children,
|
||||||
|
@ -74,7 +112,8 @@ impl Directory {
|
||||||
let mut result = self.vectorise(unit).iter().map(|e| e.stringify_tabbed(tab_size) + "\n").collect::<String>();
|
let mut result = self.vectorise(unit).iter().map(|e| e.stringify_tabbed(tab_size) + "\n").collect::<String>();
|
||||||
|
|
||||||
if ! result.is_empty() {
|
if ! result.is_empty() {
|
||||||
let final_newline_char_range = result.len()-2 .. result.len();
|
// unless there was no output, remove the final "\n"
|
||||||
|
let final_newline_char_range = result.len()-"\n".len() .. result.len();
|
||||||
result.drain(final_newline_char_range);
|
result.drain(final_newline_char_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue