refactor: move iterating over files to Path extension trait
parent
5275356353
commit
f7235fb342
|
@ -1,3 +1,4 @@
|
|||
use crate::server::path::PathExt;
|
||||
use chrono::{Local, Utc};
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
|
@ -14,52 +15,6 @@ extern "C" {
|
|||
fn getegid() -> u32;
|
||||
}
|
||||
|
||||
/// List all files in `src_dir` and all child directories.
|
||||
fn files(src_dir: PathBuf) -> io::Result<HashSet<PathBuf>> {
|
||||
let mut dirs = vec![src_dir.clone()];
|
||||
let mut files: HashSet<PathBuf> = HashSet::new();
|
||||
|
||||
while let Some(dir) = dirs.pop() {
|
||||
for res in dir.read_dir()? {
|
||||
let entry = res?;
|
||||
|
||||
if entry.file_name() == "cache" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if entry.file_type()?.is_dir() {
|
||||
dirs.push(entry.path());
|
||||
} else {
|
||||
files.insert(entry.path().strip_prefix(&src_dir).unwrap().to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// Check whether a file has been modified since the given timestamp.
|
||||
///
|
||||
/// Note that this function will *only* return true if it can determine with certainty that the
|
||||
/// file has not been modified. If any errors occur while obtaining the required metadata (e.g. if
|
||||
/// the file system does not support this metadata), this function will return false.
|
||||
fn not_modified_since<T: AsRef<Path>>(time: chrono::DateTime<Utc>, path: T) -> bool {
|
||||
let path = path.as_ref();
|
||||
|
||||
if let Ok(metadata) = path.metadata() {
|
||||
let last_modified = metadata.modified();
|
||||
|
||||
if let Ok(last_modified) = last_modified {
|
||||
let t: chrono::DateTime<Utc> = last_modified.into();
|
||||
let t = t.with_timezone(&Local);
|
||||
|
||||
return t < time;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum BackupType {
|
||||
Full,
|
||||
|
@ -162,11 +117,7 @@ impl Backup {
|
|||
|
||||
/// Returns a pointer to this backup's previous backup by cloning the Arc pointer.
|
||||
pub fn previous(&self) -> Option<Arc<Self>> {
|
||||
if let Some(previous) = &self.previous {
|
||||
Some(Arc::clone(&previous))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
self.previous.as_ref().map(Arc::clone)
|
||||
}
|
||||
|
||||
/// Calculate the full state of the backup by applying all its ancestors' delta's in order,
|
||||
|
@ -193,7 +144,7 @@ impl Backup {
|
|||
None
|
||||
} else if let Some(previous) = &self.previous {
|
||||
if n == 1 {
|
||||
Some(Arc::clone(&previous))
|
||||
Some(Arc::clone(previous))
|
||||
} else {
|
||||
previous.ancestor(n - 1)
|
||||
}
|
||||
|
@ -232,26 +183,27 @@ impl Backup {
|
|||
let enc = GzEncoder::new(tar_gz, Compression::default());
|
||||
let mut ar = tar::Builder::new(enc);
|
||||
|
||||
let mut added: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
|
||||
let mut delta = BackupDelta::new();
|
||||
|
||||
for (dir_in_tar, src_dir) in dirs {
|
||||
let files = files(src_dir.clone())?;
|
||||
let mut added_files: HashSet<PathBuf> = HashSet::new();
|
||||
|
||||
for path in &files {
|
||||
ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?;
|
||||
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
|
||||
let path = entry?.path();
|
||||
let stripped = path.strip_prefix(&src_dir).unwrap();
|
||||
|
||||
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
|
||||
added_files.insert(stripped.to_path_buf());
|
||||
}
|
||||
|
||||
added.insert(dir_in_tar, files);
|
||||
delta.added.insert(dir_in_tar, added_files);
|
||||
}
|
||||
|
||||
Ok(Backup {
|
||||
previous: None,
|
||||
type_: BackupType::Full,
|
||||
start_time,
|
||||
delta: BackupDelta {
|
||||
added,
|
||||
removed: HashMap::new(),
|
||||
},
|
||||
delta,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -274,17 +226,19 @@ impl Backup {
|
|||
let mut delta = BackupDelta::new();
|
||||
|
||||
for (dir_in_tar, src_dir) in dirs {
|
||||
let files = files(src_dir.clone())?;
|
||||
let added_files = files
|
||||
.iter()
|
||||
// This explicit negation is because we wish to also include files for which we
|
||||
// couldn't determine the last modified time
|
||||
.filter(|p| !not_modified_since(previous.start_time, src_dir.join(p)))
|
||||
.cloned()
|
||||
.collect::<HashSet<PathBuf>>();
|
||||
let mut all_files: HashSet<PathBuf> = HashSet::new();
|
||||
let mut added_files: HashSet<PathBuf> = HashSet::new();
|
||||
|
||||
for path in added_files.iter() {
|
||||
ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?;
|
||||
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
|
||||
let path = entry?.path();
|
||||
let stripped = path.strip_prefix(&src_dir).unwrap();
|
||||
|
||||
if !path.not_modified_since(previous.start_time) {
|
||||
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
|
||||
added_files.insert(stripped.to_path_buf());
|
||||
}
|
||||
|
||||
all_files.insert(stripped.to_path_buf());
|
||||
}
|
||||
|
||||
delta.added.insert(dir_in_tar.clone(), added_files);
|
||||
|
@ -292,7 +246,7 @@ impl Backup {
|
|||
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
|
||||
delta.removed.insert(
|
||||
dir_in_tar,
|
||||
previous_files.difference(&files).cloned().collect(),
|
||||
previous_files.difference(&all_files).cloned().collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,19 +1,147 @@
|
|||
use chrono::Utc;
|
||||
use chrono::{Local, Utc};
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::ffi::OsString;
|
||||
use std::fs::DirEntry;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{fs, io};
|
||||
|
||||
struct ReadDirRecursive {
|
||||
ignored_dirs: HashSet<PathBuf>,
|
||||
read_dir: Option<fs::ReadDir>,
|
||||
stack: Vec<fs::ReadDir>,
|
||||
pub struct ReadDirRecursive {
|
||||
ignored: HashSet<OsString>,
|
||||
read_dir: fs::ReadDir,
|
||||
dir_stack: Vec<PathBuf>,
|
||||
files_only: bool,
|
||||
}
|
||||
|
||||
impl ReadDirRecursive {
|
||||
// pub fn new()
|
||||
/// Start the iterator for a new directory
|
||||
pub fn start<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let read_dir = path.read_dir()?;
|
||||
|
||||
Ok(ReadDirRecursive {
|
||||
ignored: HashSet::new(),
|
||||
read_dir,
|
||||
dir_stack: Vec::new(),
|
||||
files_only: false,
|
||||
})
|
||||
}
|
||||
|
||||
trait PathExt {
|
||||
fn modified_since(timestamp: chrono::DateTime<Utc>) -> bool;
|
||||
fn read_dir_recusive() -> ReadDirRecursive;
|
||||
pub fn ignored<S: Into<OsString>>(mut self, s: S) -> Self {
|
||||
self.ignored.insert(s.into());
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn files(mut self) -> Self {
|
||||
self.files_only = true;
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Tries to populate the `read_dir` field with a new `ReadDir` instance to consume.
|
||||
fn next_read_dir(&mut self) -> io::Result<bool> {
|
||||
if let Some(path) = self.dir_stack.pop() {
|
||||
self.read_dir = path.read_dir()?;
|
||||
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience method to add a new directory to the stack.
|
||||
fn push_entry(&mut self, entry: &io::Result<DirEntry>) {
|
||||
if let Ok(entry) = entry {
|
||||
if entry.path().is_dir() {
|
||||
self.dir_stack.push(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether an entry should be returned by the iterator.
|
||||
fn should_return(&self, entry: &io::Result<DirEntry>) -> bool {
|
||||
if let Ok(entry) = entry {
|
||||
let mut res = !self.ignored.contains(&entry.file_name());
|
||||
|
||||
// Please just let me combine these already
|
||||
if self.files_only {
|
||||
if let Ok(file_type) = entry.file_type() {
|
||||
res = res && file_type.is_file();
|
||||
}
|
||||
// We couldn't determine if it's a file, so we don't return it
|
||||
else {
|
||||
res = false;
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ReadDirRecursive {
|
||||
type Item = io::Result<DirEntry>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
// First, we try to consume the current directory's items
|
||||
while let Some(entry) = self.read_dir.next() {
|
||||
self.push_entry(&entry);
|
||||
|
||||
if self.should_return(&entry) {
|
||||
return Some(entry);
|
||||
}
|
||||
}
|
||||
|
||||
// If we get an error while setting up a new directory, we return this, otherwise we
|
||||
// keep trying to consume the directories
|
||||
match self.next_read_dir() {
|
||||
Ok(true) => (),
|
||||
// There's no more directories to traverse, so the iterator is done
|
||||
Ok(false) => return None,
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PathExt {
|
||||
/// Confirm whether the file has not been modified since the given timestamp.
|
||||
///
|
||||
/// This function will only return true if it can determine with certainty that the file hasn't
|
||||
/// been modified.
|
||||
///
|
||||
/// # Args
|
||||
///
|
||||
/// * `timestamp` - Timestamp to compare modified time with
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// True if the file has not been modified for sure, false otherwise.
|
||||
fn not_modified_since(&self, timestamp: chrono::DateTime<Utc>) -> bool;
|
||||
|
||||
/// An extension of the `read_dir` command that runs through the entire underlying directory
|
||||
/// structure using breadth-first search
|
||||
fn read_dir_recursive(&self) -> io::Result<ReadDirRecursive>;
|
||||
}
|
||||
|
||||
impl PathExt for Path {
|
||||
fn not_modified_since(&self, timestamp: chrono::DateTime<Utc>) -> bool {
|
||||
if let Ok(metadata) = self.metadata() {
|
||||
if let Ok(last_modified) = metadata.modified() {
|
||||
let t: chrono::DateTime<Utc> = last_modified.into();
|
||||
let t = t.with_timezone(&Local);
|
||||
|
||||
return t < timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn read_dir_recursive(&self) -> io::Result<ReadDirRecursive> {
|
||||
ReadDirRecursive::start(self)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue