refactor: move iterating over files to Path extension trait
parent
5275356353
commit
f7235fb342
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::server::path::PathExt;
|
||||||
use chrono::{Local, Utc};
|
use chrono::{Local, Utc};
|
||||||
use flate2::write::GzEncoder;
|
use flate2::write::GzEncoder;
|
||||||
use flate2::Compression;
|
use flate2::Compression;
|
||||||
|
@ -14,52 +15,6 @@ extern "C" {
|
||||||
fn getegid() -> u32;
|
fn getegid() -> u32;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List all files in `src_dir` and all child directories.
|
|
||||||
fn files(src_dir: PathBuf) -> io::Result<HashSet<PathBuf>> {
|
|
||||||
let mut dirs = vec![src_dir.clone()];
|
|
||||||
let mut files: HashSet<PathBuf> = HashSet::new();
|
|
||||||
|
|
||||||
while let Some(dir) = dirs.pop() {
|
|
||||||
for res in dir.read_dir()? {
|
|
||||||
let entry = res?;
|
|
||||||
|
|
||||||
if entry.file_name() == "cache" {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry.file_type()?.is_dir() {
|
|
||||||
dirs.push(entry.path());
|
|
||||||
} else {
|
|
||||||
files.insert(entry.path().strip_prefix(&src_dir).unwrap().to_path_buf());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(files)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check whether a file has been modified since the given timestamp.
|
|
||||||
///
|
|
||||||
/// Note that this function will *only* return true if it can determine with certainty that the
|
|
||||||
/// file has not been modified. If any errors occur while obtaining the required metadata (e.g. if
|
|
||||||
/// the file system does not support this metadata), this function will return false.
|
|
||||||
fn not_modified_since<T: AsRef<Path>>(time: chrono::DateTime<Utc>, path: T) -> bool {
|
|
||||||
let path = path.as_ref();
|
|
||||||
|
|
||||||
if let Ok(metadata) = path.metadata() {
|
|
||||||
let last_modified = metadata.modified();
|
|
||||||
|
|
||||||
if let Ok(last_modified) = last_modified {
|
|
||||||
let t: chrono::DateTime<Utc> = last_modified.into();
|
|
||||||
let t = t.with_timezone(&Local);
|
|
||||||
|
|
||||||
return t < time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub enum BackupType {
|
pub enum BackupType {
|
||||||
Full,
|
Full,
|
||||||
|
@ -162,11 +117,7 @@ impl Backup {
|
||||||
|
|
||||||
/// Returns a pointer to this backup's previous backup by cloning the Arc pointer.
|
/// Returns a pointer to this backup's previous backup by cloning the Arc pointer.
|
||||||
pub fn previous(&self) -> Option<Arc<Self>> {
|
pub fn previous(&self) -> Option<Arc<Self>> {
|
||||||
if let Some(previous) = &self.previous {
|
self.previous.as_ref().map(Arc::clone)
|
||||||
Some(Arc::clone(&previous))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the full state of the backup by applying all its ancestors' delta's in order,
|
/// Calculate the full state of the backup by applying all its ancestors' delta's in order,
|
||||||
|
@ -193,7 +144,7 @@ impl Backup {
|
||||||
None
|
None
|
||||||
} else if let Some(previous) = &self.previous {
|
} else if let Some(previous) = &self.previous {
|
||||||
if n == 1 {
|
if n == 1 {
|
||||||
Some(Arc::clone(&previous))
|
Some(Arc::clone(previous))
|
||||||
} else {
|
} else {
|
||||||
previous.ancestor(n - 1)
|
previous.ancestor(n - 1)
|
||||||
}
|
}
|
||||||
|
@ -232,26 +183,27 @@ impl Backup {
|
||||||
let enc = GzEncoder::new(tar_gz, Compression::default());
|
let enc = GzEncoder::new(tar_gz, Compression::default());
|
||||||
let mut ar = tar::Builder::new(enc);
|
let mut ar = tar::Builder::new(enc);
|
||||||
|
|
||||||
let mut added: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
|
let mut delta = BackupDelta::new();
|
||||||
|
|
||||||
for (dir_in_tar, src_dir) in dirs {
|
for (dir_in_tar, src_dir) in dirs {
|
||||||
let files = files(src_dir.clone())?;
|
let mut added_files: HashSet<PathBuf> = HashSet::new();
|
||||||
|
|
||||||
for path in &files {
|
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
|
||||||
ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?;
|
let path = entry?.path();
|
||||||
|
let stripped = path.strip_prefix(&src_dir).unwrap();
|
||||||
|
|
||||||
|
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
|
||||||
|
added_files.insert(stripped.to_path_buf());
|
||||||
}
|
}
|
||||||
|
|
||||||
added.insert(dir_in_tar, files);
|
delta.added.insert(dir_in_tar, added_files);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Backup {
|
Ok(Backup {
|
||||||
previous: None,
|
previous: None,
|
||||||
type_: BackupType::Full,
|
type_: BackupType::Full,
|
||||||
start_time,
|
start_time,
|
||||||
delta: BackupDelta {
|
delta,
|
||||||
added,
|
|
||||||
removed: HashMap::new(),
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,17 +226,19 @@ impl Backup {
|
||||||
let mut delta = BackupDelta::new();
|
let mut delta = BackupDelta::new();
|
||||||
|
|
||||||
for (dir_in_tar, src_dir) in dirs {
|
for (dir_in_tar, src_dir) in dirs {
|
||||||
let files = files(src_dir.clone())?;
|
let mut all_files: HashSet<PathBuf> = HashSet::new();
|
||||||
let added_files = files
|
let mut added_files: HashSet<PathBuf> = HashSet::new();
|
||||||
.iter()
|
|
||||||
// This explicit negation is because we wish to also include files for which we
|
|
||||||
// couldn't determine the last modified time
|
|
||||||
.filter(|p| !not_modified_since(previous.start_time, src_dir.join(p)))
|
|
||||||
.cloned()
|
|
||||||
.collect::<HashSet<PathBuf>>();
|
|
||||||
|
|
||||||
for path in added_files.iter() {
|
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
|
||||||
ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?;
|
let path = entry?.path();
|
||||||
|
let stripped = path.strip_prefix(&src_dir).unwrap();
|
||||||
|
|
||||||
|
if !path.not_modified_since(previous.start_time) {
|
||||||
|
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
|
||||||
|
added_files.insert(stripped.to_path_buf());
|
||||||
|
}
|
||||||
|
|
||||||
|
all_files.insert(stripped.to_path_buf());
|
||||||
}
|
}
|
||||||
|
|
||||||
delta.added.insert(dir_in_tar.clone(), added_files);
|
delta.added.insert(dir_in_tar.clone(), added_files);
|
||||||
|
@ -292,7 +246,7 @@ impl Backup {
|
||||||
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
|
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
|
||||||
delta.removed.insert(
|
delta.removed.insert(
|
||||||
dir_in_tar,
|
dir_in_tar,
|
||||||
previous_files.difference(&files).cloned().collect(),
|
previous_files.difference(&all_files).cloned().collect(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,147 @@
|
||||||
use chrono::Utc;
|
use chrono::{Local, Utc};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::path::PathBuf;
|
use std::ffi::OsString;
|
||||||
|
use std::fs::DirEntry;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
use std::{fs, io};
|
use std::{fs, io};
|
||||||
|
|
||||||
struct ReadDirRecursive {
|
pub struct ReadDirRecursive {
|
||||||
ignored_dirs: HashSet<PathBuf>,
|
ignored: HashSet<OsString>,
|
||||||
read_dir: Option<fs::ReadDir>,
|
read_dir: fs::ReadDir,
|
||||||
stack: Vec<fs::ReadDir>,
|
dir_stack: Vec<PathBuf>,
|
||||||
|
files_only: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ReadDirRecursive {
|
impl ReadDirRecursive {
|
||||||
// pub fn new()
|
/// Start the iterator for a new directory
|
||||||
|
pub fn start<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
let read_dir = path.read_dir()?;
|
||||||
|
|
||||||
|
Ok(ReadDirRecursive {
|
||||||
|
ignored: HashSet::new(),
|
||||||
|
read_dir,
|
||||||
|
dir_stack: Vec::new(),
|
||||||
|
files_only: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ignored<S: Into<OsString>>(mut self, s: S) -> Self {
|
||||||
|
self.ignored.insert(s.into());
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn files(mut self) -> Self {
|
||||||
|
self.files_only = true;
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tries to populate the `read_dir` field with a new `ReadDir` instance to consume.
|
||||||
|
fn next_read_dir(&mut self) -> io::Result<bool> {
|
||||||
|
if let Some(path) = self.dir_stack.pop() {
|
||||||
|
self.read_dir = path.read_dir()?;
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience method to add a new directory to the stack.
|
||||||
|
fn push_entry(&mut self, entry: &io::Result<DirEntry>) {
|
||||||
|
if let Ok(entry) = entry {
|
||||||
|
if entry.path().is_dir() {
|
||||||
|
self.dir_stack.push(entry.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether an entry should be returned by the iterator.
|
||||||
|
fn should_return(&self, entry: &io::Result<DirEntry>) -> bool {
|
||||||
|
if let Ok(entry) = entry {
|
||||||
|
let mut res = !self.ignored.contains(&entry.file_name());
|
||||||
|
|
||||||
|
// Please just let me combine these already
|
||||||
|
if self.files_only {
|
||||||
|
if let Ok(file_type) = entry.file_type() {
|
||||||
|
res = res && file_type.is_file();
|
||||||
|
}
|
||||||
|
// We couldn't determine if it's a file, so we don't return it
|
||||||
|
else {
|
||||||
|
res = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trait PathExt {
|
impl Iterator for ReadDirRecursive {
|
||||||
fn modified_since(timestamp: chrono::DateTime<Utc>) -> bool;
|
type Item = io::Result<DirEntry>;
|
||||||
fn read_dir_recusive() -> ReadDirRecursive;
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
loop {
|
||||||
|
// First, we try to consume the current directory's items
|
||||||
|
while let Some(entry) = self.read_dir.next() {
|
||||||
|
self.push_entry(&entry);
|
||||||
|
|
||||||
|
if self.should_return(&entry) {
|
||||||
|
return Some(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get an error while setting up a new directory, we return this, otherwise we
|
||||||
|
// keep trying to consume the directories
|
||||||
|
match self.next_read_dir() {
|
||||||
|
Ok(true) => (),
|
||||||
|
// There's no more directories to traverse, so the iterator is done
|
||||||
|
Ok(false) => return None,
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait PathExt {
|
||||||
|
/// Confirm whether the file has not been modified since the given timestamp.
|
||||||
|
///
|
||||||
|
/// This function will only return true if it can determine with certainty that the file hasn't
|
||||||
|
/// been modified.
|
||||||
|
///
|
||||||
|
/// # Args
|
||||||
|
///
|
||||||
|
/// * `timestamp` - Timestamp to compare modified time with
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// True if the file has not been modified for sure, false otherwise.
|
||||||
|
fn not_modified_since(&self, timestamp: chrono::DateTime<Utc>) -> bool;
|
||||||
|
|
||||||
|
/// An extension of the `read_dir` command that runs through the entire underlying directory
|
||||||
|
/// structure using breadth-first search
|
||||||
|
fn read_dir_recursive(&self) -> io::Result<ReadDirRecursive>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PathExt for Path {
|
||||||
|
fn not_modified_since(&self, timestamp: chrono::DateTime<Utc>) -> bool {
|
||||||
|
if let Ok(metadata) = self.metadata() {
|
||||||
|
if let Ok(last_modified) = metadata.modified() {
|
||||||
|
let t: chrono::DateTime<Utc> = last_modified.into();
|
||||||
|
let t = t.with_timezone(&Local);
|
||||||
|
|
||||||
|
return t < timestamp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_dir_recursive(&self) -> io::Result<ReadDirRecursive> {
|
||||||
|
ReadDirRecursive::start(self)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue