From f7235fb34243767711d42767c912055a47ce7452 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 17 Jun 2023 12:08:46 +0200 Subject: [PATCH] refactor: move iterating over files to Path extension trait --- src/server/backups.rs | 98 ++++++++-------------------- src/server/path.rs | 148 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 164 insertions(+), 82 deletions(-) diff --git a/src/server/backups.rs b/src/server/backups.rs index e225aec..cdb0409 100644 --- a/src/server/backups.rs +++ b/src/server/backups.rs @@ -1,3 +1,4 @@ +use crate::server::path::PathExt; use chrono::{Local, Utc}; use flate2::write::GzEncoder; use flate2::Compression; @@ -14,52 +15,6 @@ extern "C" { fn getegid() -> u32; } -/// List all files in `src_dir` and all child directories. -fn files(src_dir: PathBuf) -> io::Result> { - let mut dirs = vec![src_dir.clone()]; - let mut files: HashSet = HashSet::new(); - - while let Some(dir) = dirs.pop() { - for res in dir.read_dir()? { - let entry = res?; - - if entry.file_name() == "cache" { - continue; - } - - if entry.file_type()?.is_dir() { - dirs.push(entry.path()); - } else { - files.insert(entry.path().strip_prefix(&src_dir).unwrap().to_path_buf()); - } - } - } - - Ok(files) -} - -/// Check whether a file has been modified since the given timestamp. -/// -/// Note that this function will *only* return true if it can determine with certainty that the -/// file has not been modified. If any errors occur while obtaining the required metadata (e.g. if -/// the file system does not support this metadata), this function will return false. -fn not_modified_since>(time: chrono::DateTime, path: T) -> bool { - let path = path.as_ref(); - - if let Ok(metadata) = path.metadata() { - let last_modified = metadata.modified(); - - if let Ok(last_modified) = last_modified { - let t: chrono::DateTime = last_modified.into(); - let t = t.with_timezone(&Local); - - return t < time; - } - } - - false -} - #[derive(Debug, PartialEq, Serialize, Deserialize)] pub enum BackupType { Full, @@ -162,11 +117,7 @@ impl Backup { /// Returns a pointer to this backup's previous backup by cloning the Arc pointer. pub fn previous(&self) -> Option> { - if let Some(previous) = &self.previous { - Some(Arc::clone(&previous)) - } else { - None - } + self.previous.as_ref().map(Arc::clone) } /// Calculate the full state of the backup by applying all its ancestors' delta's in order, @@ -193,7 +144,7 @@ impl Backup { None } else if let Some(previous) = &self.previous { if n == 1 { - Some(Arc::clone(&previous)) + Some(Arc::clone(previous)) } else { previous.ancestor(n - 1) } @@ -232,26 +183,27 @@ impl Backup { let enc = GzEncoder::new(tar_gz, Compression::default()); let mut ar = tar::Builder::new(enc); - let mut added: HashMap> = HashMap::new(); + let mut delta = BackupDelta::new(); for (dir_in_tar, src_dir) in dirs { - let files = files(src_dir.clone())?; + let mut added_files: HashSet = HashSet::new(); - for path in &files { - ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?; + for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { + let path = entry?.path(); + let stripped = path.strip_prefix(&src_dir).unwrap(); + + ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; + added_files.insert(stripped.to_path_buf()); } - added.insert(dir_in_tar, files); + delta.added.insert(dir_in_tar, added_files); } Ok(Backup { previous: None, type_: BackupType::Full, start_time, - delta: BackupDelta { - added, - removed: HashMap::new(), - }, + delta, }) } @@ -274,17 +226,19 @@ impl Backup { let mut delta = BackupDelta::new(); for (dir_in_tar, src_dir) in dirs { - let files = files(src_dir.clone())?; - let added_files = files - .iter() - // This explicit negation is because we wish to also include files for which we - // couldn't determine the last modified time - .filter(|p| !not_modified_since(previous.start_time, src_dir.join(p))) - .cloned() - .collect::>(); + let mut all_files: HashSet = HashSet::new(); + let mut added_files: HashSet = HashSet::new(); - for path in added_files.iter() { - ar.append_path_with_name(src_dir.join(path), dir_in_tar.join(path))?; + for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { + let path = entry?.path(); + let stripped = path.strip_prefix(&src_dir).unwrap(); + + if !path.not_modified_since(previous.start_time) { + ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; + added_files.insert(stripped.to_path_buf()); + } + + all_files.insert(stripped.to_path_buf()); } delta.added.insert(dir_in_tar.clone(), added_files); @@ -292,7 +246,7 @@ impl Backup { if let Some(previous_files) = previous_state.get(&dir_in_tar) { delta.removed.insert( dir_in_tar, - previous_files.difference(&files).cloned().collect(), + previous_files.difference(&all_files).cloned().collect(), ); } } diff --git a/src/server/path.rs b/src/server/path.rs index d9df799..b8b5ae9 100644 --- a/src/server/path.rs +++ b/src/server/path.rs @@ -1,19 +1,147 @@ -use chrono::Utc; +use chrono::{Local, Utc}; use std::collections::HashSet; -use std::path::PathBuf; +use std::ffi::OsString; +use std::fs::DirEntry; +use std::path::{Path, PathBuf}; use std::{fs, io}; -struct ReadDirRecursive { - ignored_dirs: HashSet, - read_dir: Option, - stack: Vec, +pub struct ReadDirRecursive { + ignored: HashSet, + read_dir: fs::ReadDir, + dir_stack: Vec, + files_only: bool, } impl ReadDirRecursive { - // pub fn new() + /// Start the iterator for a new directory + pub fn start>(path: P) -> io::Result { + let path = path.as_ref(); + let read_dir = path.read_dir()?; + + Ok(ReadDirRecursive { + ignored: HashSet::new(), + read_dir, + dir_stack: Vec::new(), + files_only: false, + }) + } + + pub fn ignored>(mut self, s: S) -> Self { + self.ignored.insert(s.into()); + + self + } + + pub fn files(mut self) -> Self { + self.files_only = true; + + self + } + + /// Tries to populate the `read_dir` field with a new `ReadDir` instance to consume. + fn next_read_dir(&mut self) -> io::Result { + if let Some(path) = self.dir_stack.pop() { + self.read_dir = path.read_dir()?; + + Ok(true) + } else { + Ok(false) + } + } + + /// Convenience method to add a new directory to the stack. + fn push_entry(&mut self, entry: &io::Result) { + if let Ok(entry) = entry { + if entry.path().is_dir() { + self.dir_stack.push(entry.path()); + } + } + } + + /// Determine whether an entry should be returned by the iterator. + fn should_return(&self, entry: &io::Result) -> bool { + if let Ok(entry) = entry { + let mut res = !self.ignored.contains(&entry.file_name()); + + // Please just let me combine these already + if self.files_only { + if let Ok(file_type) = entry.file_type() { + res = res && file_type.is_file(); + } + // We couldn't determine if it's a file, so we don't return it + else { + res = false; + } + } + + res + } else { + true + } + } } -trait PathExt { - fn modified_since(timestamp: chrono::DateTime) -> bool; - fn read_dir_recusive() -> ReadDirRecursive; +impl Iterator for ReadDirRecursive { + type Item = io::Result; + + fn next(&mut self) -> Option { + loop { + // First, we try to consume the current directory's items + while let Some(entry) = self.read_dir.next() { + self.push_entry(&entry); + + if self.should_return(&entry) { + return Some(entry); + } + } + + // If we get an error while setting up a new directory, we return this, otherwise we + // keep trying to consume the directories + match self.next_read_dir() { + Ok(true) => (), + // There's no more directories to traverse, so the iterator is done + Ok(false) => return None, + Err(e) => return Some(Err(e)), + } + } + } +} + +pub trait PathExt { + /// Confirm whether the file has not been modified since the given timestamp. + /// + /// This function will only return true if it can determine with certainty that the file hasn't + /// been modified. + /// + /// # Args + /// + /// * `timestamp` - Timestamp to compare modified time with + /// + /// # Returns + /// + /// True if the file has not been modified for sure, false otherwise. + fn not_modified_since(&self, timestamp: chrono::DateTime) -> bool; + + /// An extension of the `read_dir` command that runs through the entire underlying directory + /// structure using breadth-first search + fn read_dir_recursive(&self) -> io::Result; +} + +impl PathExt for Path { + fn not_modified_since(&self, timestamp: chrono::DateTime) -> bool { + if let Ok(metadata) = self.metadata() { + if let Ok(last_modified) = metadata.modified() { + let t: chrono::DateTime = last_modified.into(); + let t = t.with_timezone(&Local); + + return t < timestamp; + } + } + + false + } + + fn read_dir_recursive(&self) -> io::Result { + ReadDirRecursive::start(self) + } }