From ef631fab1d6c35998cfedbf8c4714bd41deff981 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Mon, 19 Jun 2023 14:04:38 +0200 Subject: [PATCH] refactor: separate backup logic into own module --- src/backup/delta.rs | 75 +++++++ src/backup/manager.rs | 128 ++++++++++++ src/backup/mod.rs | 150 ++++++++++++++ src/{server => backup}/path.rs | 0 src/main.rs | 3 +- src/server/backups.rs | 345 --------------------------------- src/server/command.rs | 3 +- src/server/mod.rs | 3 - src/server/process.rs | 2 +- 9 files changed, 358 insertions(+), 351 deletions(-) create mode 100644 src/backup/delta.rs create mode 100644 src/backup/manager.rs create mode 100644 src/backup/mod.rs rename src/{server => backup}/path.rs (100%) delete mode 100644 src/server/backups.rs diff --git a/src/backup/delta.rs b/src/backup/delta.rs new file mode 100644 index 0000000..15f233b --- /dev/null +++ b/src/backup/delta.rs @@ -0,0 +1,75 @@ +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; + +/// Represents the changes relative to the previous backup +#[derive(Debug, Serialize, Deserialize)] +pub struct Delta { + /// What files were added/modified in each part of the tarball. + pub added: HashMap>, + /// What files were removed in this backup, in comparison to the previous backup. For full + /// backups, this will always be empty, as they do not consider previous backups. + /// The map stores a separate list for each top-level directory, as the contents of these + /// directories can come for different source directories. + pub removed: HashMap>, +} + +impl Delta { + pub fn new() -> Self { + Self { + added: HashMap::new(), + removed: HashMap::new(), + } + } + + /// Update the current state so that its result becomes the merge of itself and the other + /// state. + pub fn merge(&mut self, delta: &Self) { + for (dir, added) in delta.added.iter() { + // Files that were removed in the current state, but added in the new state, are no + // longer removed + if let Some(orig_removed) = self.removed.get_mut(dir) { + orig_removed.retain(|k| !added.contains(k)); + } + + // Newly added files are added to the state as well + if let Some(orig_added) = self.added.get_mut(dir) { + orig_added.extend(added.iter().cloned()); + } else { + self.added.insert(dir.clone(), added.clone()); + } + } + + for (dir, removed) in delta.removed.iter() { + // Files that were originally added, but now deleted are removed from the added list + if let Some(orig_added) = self.added.get_mut(dir) { + orig_added.retain(|k| !removed.contains(k)); + } + + // Newly removed files are added to the state as well + if let Some(orig_removed) = self.removed.get_mut(dir) { + orig_removed.extend(removed.iter().cloned()); + } else { + self.removed.insert(dir.clone(), removed.clone()); + } + } + } + + /// Modify the given state by applying this delta's changes to it + pub fn apply(&self, state: &mut HashMap>) { + // First we add new files, then we remove the old ones + for (dir, added) in self.added.iter() { + if let Some(current) = state.get_mut(dir) { + current.extend(added.iter().cloned()); + } else { + state.insert(dir.clone(), added.clone()); + } + } + + for (dir, removed) in self.removed.iter() { + if let Some(current) = state.get_mut(dir) { + current.retain(|k| !removed.contains(k)); + } + } + } +} diff --git a/src/backup/manager.rs b/src/backup/manager.rs new file mode 100644 index 0000000..71633a3 --- /dev/null +++ b/src/backup/manager.rs @@ -0,0 +1,128 @@ +use super::Backup; +use std::fs::File; +use std::io; +use std::path::PathBuf; + +pub struct Manager { + backup_dir: PathBuf, + config_dir: PathBuf, + world_dir: PathBuf, + chain_len: u64, + chains_to_keep: u64, + chains: Vec>, +} + +impl Manager { + const METADATA_FILE: &str = "alex.json"; + + /// Initialize a new instance of a `BackupManager`. + pub fn new( + backup_dir: PathBuf, + config_dir: PathBuf, + world_dir: PathBuf, + chain_len: u64, + chains_to_keep: u64, + ) -> Self { + Self { + backup_dir, + config_dir, + world_dir, + chain_len, + chains_to_keep, + chains: Vec::new(), + } + } + + /// Create a new backup with the expected type. + pub fn create_backup(&mut self) -> io::Result<()> { + let dirs = vec![ + (PathBuf::from("config"), self.config_dir.clone()), + (PathBuf::from("worlds"), self.world_dir.clone()), + ]; + + // I kinda hate this statement, please just let me combine let statements in if statements + // already + let backup = if let Some(current_chain) = self.chains.last() { + let current_chain_len: u64 = current_chain.len().try_into().unwrap(); + + if current_chain_len < self.chain_len { + if let Some(previous_backup) = current_chain.last() { + let state = Backup::state(current_chain); + + Backup::create_from(state, previous_backup.start_time, &self.backup_dir, dirs)? + } else { + Backup::create(&self.backup_dir, dirs)? + } + } else { + self.chains.push(Vec::new()); + + Backup::create(&self.backup_dir, dirs)? + } + } else { + self.chains.push(Vec::new()); + + Backup::create(&self.backup_dir, dirs)? + }; + + // The above statement always creates this element, so this unwrap is safe + self.chains.last_mut().unwrap().push(backup); + + self.save()?; + + Ok(()) + } + + /// Delete all backups associated with outdated chains, and forget those chains. + pub fn remove_old_backups(&mut self) -> std::io::Result<()> { + let chains_to_store: usize = self.chains_to_keep.try_into().unwrap(); + + if chains_to_store < self.chains.len() { + let mut remove_count: usize = self.chains.len() - chains_to_store; + + // We only count finished chains towards the list of stored chains + let chain_len: usize = self.chain_len.try_into().unwrap(); + if self.chains.last().unwrap().len() < chain_len { + remove_count -= 1; + } + + for chain in self.chains.drain(..remove_count) { + for backup in chain { + let path = Backup::path(&self.backup_dir, backup.start_time); + std::fs::remove_file(path)?; + } + } + } + + self.save()?; + + Ok(()) + } + + /// Write the in-memory state to disk. + pub fn save(&self) -> std::io::Result<()> { + let json_file = File::create(self.backup_dir.join(Self::METADATA_FILE))?; + serde_json::to_writer(json_file, &self.chains)?; + + Ok(()) + } + + /// Overwrite the in-memory state with the on-disk state. + pub fn load(&mut self) -> std::io::Result<()> { + let json_file = match File::open(self.backup_dir.join(Self::METADATA_FILE)) { + Ok(f) => f, + Err(e) => { + // Don't error out if the file isn't there, it will be created when necessary + if e.kind() == io::ErrorKind::NotFound { + self.chains = Vec::new(); + + return Ok(()); + } else { + return Err(e); + } + } + }; + self.chains = serde_json::from_reader(json_file)?; + + Ok(()) + } +} diff --git a/src/backup/mod.rs b/src/backup/mod.rs new file mode 100644 index 0000000..600ef6d --- /dev/null +++ b/src/backup/mod.rs @@ -0,0 +1,150 @@ +mod delta; +mod manager; +mod path; + +use delta::Delta; +pub use manager::Manager; + +use chrono::Utc; +use flate2::write::GzEncoder; +use flate2::Compression; +use path::PathExt; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::io; +use std::path::{Path, PathBuf}; + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub enum BackupType { + Full, + Incremental, +} + +/// Represents a successful backup +#[derive(Debug, Serialize, Deserialize)] +pub struct Backup { + /// When the backup was started (also corresponds to the name) + pub start_time: chrono::DateTime, + /// Type of the backup + pub type_: BackupType, + pub delta: Delta, +} + +impl Backup { + const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz"; + + /// Resolve the state of the list of backups by applying their deltas in-order to an initially + /// empty state. + pub fn state(backups: &Vec) -> HashMap> { + let mut state: HashMap> = HashMap::new(); + + for backup in backups { + backup.delta.apply(&mut state); + } + + state + } + + /// Return the path to a backup file by properly formatting the data. + pub fn path>(backup_dir: P, start_time: chrono::DateTime) -> PathBuf { + let backup_dir = backup_dir.as_ref(); + + let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT)); + backup_dir.join(filename) + } + + /// Create a new Full backup, populated with the given directories. + /// + /// # Arguments + /// + /// * `backup_dir` - Directory to store archive in + /// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name + /// under which `src_dir`'s contents should be stored in the archive + /// + /// # Returns + /// + /// The `Backup` instance describing this new backup. + pub fn create>( + backup_dir: P, + dirs: Vec<(PathBuf, PathBuf)>, + ) -> io::Result { + let start_time = chrono::offset::Utc::now(); + + let path = Self::path(backup_dir, start_time); + let tar_gz = File::create(path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut ar = tar::Builder::new(enc); + + let mut delta = Delta::new(); + + for (dir_in_tar, src_dir) in dirs { + let mut added_files: HashSet = HashSet::new(); + + for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { + let path = entry?.path(); + let stripped = path.strip_prefix(&src_dir).unwrap(); + + ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; + added_files.insert(stripped.to_path_buf()); + } + + delta.added.insert(dir_in_tar, added_files); + } + + Ok(Backup { + type_: BackupType::Full, + start_time, + delta, + }) + } + + /// Create a new incremental backup from a given previous backup + pub fn create_from>( + previous_state: HashMap>, + previous_start_time: chrono::DateTime, + backup_dir: P, + dirs: Vec<(PathBuf, PathBuf)>, + ) -> io::Result { + let start_time = chrono::offset::Utc::now(); + + let path = Self::path(backup_dir, start_time); + let tar_gz = File::create(path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut ar = tar::Builder::new(enc); + + let mut delta = Delta::new(); + + for (dir_in_tar, src_dir) in dirs { + let mut all_files: HashSet = HashSet::new(); + let mut added_files: HashSet = HashSet::new(); + + for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { + let path = entry?.path(); + let stripped = path.strip_prefix(&src_dir).unwrap(); + + if !path.not_modified_since(previous_start_time) { + ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; + added_files.insert(stripped.to_path_buf()); + } + + all_files.insert(stripped.to_path_buf()); + } + + delta.added.insert(dir_in_tar.clone(), added_files); + + if let Some(previous_files) = previous_state.get(&dir_in_tar) { + delta.removed.insert( + dir_in_tar, + previous_files.difference(&all_files).cloned().collect(), + ); + } + } + + Ok(Backup { + type_: BackupType::Incremental, + start_time, + delta, + }) + } +} diff --git a/src/server/path.rs b/src/backup/path.rs similarity index 100% rename from src/server/path.rs rename to src/backup/path.rs diff --git a/src/main.rs b/src/main.rs index 0aef676..40d112c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod backup; mod cli; mod server; mod signals; @@ -59,7 +60,7 @@ fn command_run(cli: &Cli, args: &RunArgs) -> io::Result<()> { } fn commands_backup(cli: &Cli, _args: &BackupArgs) -> io::Result<()> { - let mut manager = server::BackupManager::new( + let mut manager = backup::Manager::new( cli.backup.clone(), cli.config.clone(), cli.world.clone(), diff --git a/src/server/backups.rs b/src/server/backups.rs deleted file mode 100644 index 5097746..0000000 --- a/src/server/backups.rs +++ /dev/null @@ -1,345 +0,0 @@ -use crate::server::path::PathExt; -use chrono::Utc; -use flate2::write::GzEncoder; -use flate2::Compression; -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; -use std::fs::File; -use std::io; -use std::path::{Path, PathBuf}; - -#[link(name = "c")] -extern "C" { - fn geteuid() -> u32; - fn getegid() -> u32; -} - -#[derive(Debug, PartialEq, Serialize, Deserialize)] -pub enum BackupType { - Full, - Incremental, -} - -/// Represents the changes relative to the previous backup -#[derive(Debug, Serialize, Deserialize)] -pub struct BackupDelta { - /// What files were added/modified in each part of the tarball. - pub added: HashMap>, - /// What files were removed in this backup, in comparison to the previous backup. For full - /// backups, this will always be empty, as they do not consider previous backups. - /// The map stores a separate list for each top-level directory, as the contents of these - /// directories can come for different source directories. - pub removed: HashMap>, -} - -impl BackupDelta { - pub fn new() -> Self { - BackupDelta { - added: HashMap::new(), - removed: HashMap::new(), - } - } - - /// Update the current state so that its result becomes the merge of itself and the other - /// state. - pub fn merge(&mut self, delta: &BackupDelta) { - for (dir, added) in delta.added.iter() { - // Files that were removed in the current state, but added in the new state, are no - // longer removed - if let Some(orig_removed) = self.removed.get_mut(dir) { - orig_removed.retain(|k| !added.contains(k)); - } - - // Newly added files are added to the state as well - if let Some(orig_added) = self.added.get_mut(dir) { - orig_added.extend(added.iter().cloned()); - } else { - self.added.insert(dir.clone(), added.clone()); - } - } - - for (dir, removed) in delta.removed.iter() { - // Files that were originally added, but now deleted are removed from the added list - if let Some(orig_added) = self.added.get_mut(dir) { - orig_added.retain(|k| !removed.contains(k)); - } - - // Newly removed files are added to the state as well - if let Some(orig_removed) = self.removed.get_mut(dir) { - orig_removed.extend(removed.iter().cloned()); - } else { - self.removed.insert(dir.clone(), removed.clone()); - } - } - } - - /// Modify the given state by applying this delta's changes to it - pub fn apply(&self, state: &mut HashMap>) { - // First we add new files, then we remove the old ones - for (dir, added) in self.added.iter() { - if let Some(current) = state.get_mut(dir) { - current.extend(added.iter().cloned()); - } else { - state.insert(dir.clone(), added.clone()); - } - } - - for (dir, removed) in self.removed.iter() { - if let Some(current) = state.get_mut(dir) { - current.retain(|k| !removed.contains(k)); - } - } - } -} - -/// Represents a successful backup -#[derive(Debug, Serialize, Deserialize)] -pub struct Backup { - /// When the backup was started (also corresponds to the name) - start_time: chrono::DateTime, - /// Type of the backup - type_: BackupType, - delta: BackupDelta, -} - -impl Backup { - const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz"; - - /// Resolve the state of the list of backups by applying their deltas in-order to an initially - /// empty state. - pub fn state(backups: &Vec) -> HashMap> { - let mut state: HashMap> = HashMap::new(); - - for backup in backups { - backup.delta.apply(&mut state); - } - - state - } - - /// Return the path to a backup file by properly formatting the data. - pub fn path>(backup_dir: P, start_time: chrono::DateTime) -> PathBuf { - let backup_dir = backup_dir.as_ref(); - - let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT)); - backup_dir.join(filename) - } - - /// Create a new Full backup, populated with the given directories. - /// - /// # Arguments - /// - /// * `backup_dir` - Directory to store archive in - /// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name - /// under which `src_dir`'s contents should be stored in the archive - /// - /// # Returns - /// - /// The `Backup` instance describing this new backup. - pub fn create>( - backup_dir: P, - dirs: Vec<(PathBuf, PathBuf)>, - ) -> io::Result { - let start_time = chrono::offset::Utc::now(); - - let path = Self::path(backup_dir, start_time); - let tar_gz = File::create(path)?; - let enc = GzEncoder::new(tar_gz, Compression::default()); - let mut ar = tar::Builder::new(enc); - - let mut delta = BackupDelta::new(); - - for (dir_in_tar, src_dir) in dirs { - let mut added_files: HashSet = HashSet::new(); - - for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { - let path = entry?.path(); - let stripped = path.strip_prefix(&src_dir).unwrap(); - - ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; - added_files.insert(stripped.to_path_buf()); - } - - delta.added.insert(dir_in_tar, added_files); - } - - Ok(Backup { - type_: BackupType::Full, - start_time, - delta, - }) - } - - /// Create a new incremental backup from a given previous backup - pub fn create_from>( - previous_state: HashMap>, - previous_start_time: chrono::DateTime, - backup_dir: P, - dirs: Vec<(PathBuf, PathBuf)>, - ) -> io::Result { - let start_time = chrono::offset::Utc::now(); - - let path = Self::path(backup_dir, start_time); - let tar_gz = File::create(path)?; - let enc = GzEncoder::new(tar_gz, Compression::default()); - let mut ar = tar::Builder::new(enc); - - let mut delta = BackupDelta::new(); - - for (dir_in_tar, src_dir) in dirs { - let mut all_files: HashSet = HashSet::new(); - let mut added_files: HashSet = HashSet::new(); - - for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { - let path = entry?.path(); - let stripped = path.strip_prefix(&src_dir).unwrap(); - - if !path.not_modified_since(previous_start_time) { - ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; - added_files.insert(stripped.to_path_buf()); - } - - all_files.insert(stripped.to_path_buf()); - } - - delta.added.insert(dir_in_tar.clone(), added_files); - - if let Some(previous_files) = previous_state.get(&dir_in_tar) { - delta.removed.insert( - dir_in_tar, - previous_files.difference(&all_files).cloned().collect(), - ); - } - } - - Ok(Backup { - type_: BackupType::Incremental, - start_time, - delta, - }) - } -} - -pub struct BackupManager { - backup_dir: PathBuf, - config_dir: PathBuf, - world_dir: PathBuf, - chain_len: u64, - chains_to_keep: u64, - chains: Vec>, -} - -impl BackupManager { - const METADATA_FILE: &str = "alex.json"; - - /// Initialize a new instance of a `BackupManager`. - pub fn new( - backup_dir: PathBuf, - config_dir: PathBuf, - world_dir: PathBuf, - chain_len: u64, - chains_to_keep: u64, - ) -> Self { - BackupManager { - backup_dir, - config_dir, - world_dir, - chain_len, - chains_to_keep, - chains: Vec::new(), - } - } - - /// Create a new backup with the expected type. - pub fn create_backup(&mut self) -> io::Result<()> { - let dirs = vec![ - (PathBuf::from("config"), self.config_dir.clone()), - (PathBuf::from("worlds"), self.world_dir.clone()), - ]; - - // I kinda hate this statement, please just let me combine let statements in if statements - // already - let backup = if let Some(current_chain) = self.chains.last() { - let current_chain_len: u64 = current_chain.len().try_into().unwrap(); - - if current_chain_len < self.chain_len { - if let Some(previous_backup) = current_chain.last() { - let state = Backup::state(current_chain); - - Backup::create_from(state, previous_backup.start_time, &self.backup_dir, dirs)? - } else { - Backup::create(&self.backup_dir, dirs)? - } - } else { - self.chains.push(Vec::new()); - - Backup::create(&self.backup_dir, dirs)? - } - } else { - self.chains.push(Vec::new()); - - Backup::create(&self.backup_dir, dirs)? - }; - - // The above statement always creates this element, so this unwrap is safe - self.chains.last_mut().unwrap().push(backup); - - self.save()?; - - Ok(()) - } - - /// Delete all backups associated with outdated chains, and forget those chains. - pub fn remove_old_backups(&mut self) -> std::io::Result<()> { - let chains_to_store: usize = self.chains_to_keep.try_into().unwrap(); - - if chains_to_store < self.chains.len() { - let mut remove_count: usize = self.chains.len() - chains_to_store; - - // We only count finished chains towards the list of stored chains - let chain_len: usize = self.chain_len.try_into().unwrap(); - if self.chains.last().unwrap().len() < chain_len { - remove_count -= 1; - } - - for chain in self.chains.drain(..remove_count) { - for backup in chain { - let path = Backup::path(&self.backup_dir, backup.start_time); - std::fs::remove_file(path)?; - } - } - } - - self.save()?; - - Ok(()) - } - - /// Write the in-memory state to disk. - pub fn save(&self) -> std::io::Result<()> { - let json_file = File::create(self.backup_dir.join(Self::METADATA_FILE))?; - serde_json::to_writer(json_file, &self.chains)?; - - Ok(()) - } - - /// Overwrite the in-memory state with the on-disk state. - pub fn load(&mut self) -> std::io::Result<()> { - let json_file = match File::open(self.backup_dir.join(Self::METADATA_FILE)) { - Ok(f) => f, - Err(e) => { - // Don't error out if the file isn't there, it will be created when necessary - if e.kind() == io::ErrorKind::NotFound { - self.chains = Vec::new(); - - return Ok(()); - } else { - return Err(e); - } - } - }; - self.chains = serde_json::from_reader(json_file)?; - - Ok(()) - } -} diff --git a/src/server/command.rs b/src/server/command.rs index bd1f0e6..808d6bf 100644 --- a/src/server/command.rs +++ b/src/server/command.rs @@ -1,4 +1,5 @@ -use crate::server::{BackupManager, ServerProcess}; +use crate::backup::Manager as BackupManager; +use crate::server::ServerProcess; use clap::ValueEnum; use std::fmt; use std::fs::File; diff --git a/src/server/mod.rs b/src/server/mod.rs index 4c2beb2..e3e3131 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1,8 +1,5 @@ -mod backups; mod command; -mod path; mod process; -pub use backups::BackupManager; pub use command::{ServerCommand, ServerType}; pub use process::ServerProcess; diff --git a/src/server/process.rs b/src/server/process.rs index 1e24373..3ac7beb 100644 --- a/src/server/process.rs +++ b/src/server/process.rs @@ -1,4 +1,4 @@ -use crate::server::BackupManager; +use crate::backup::Manager as BackupManager; use crate::server::ServerType; use std::io::Write; use std::process::Child;