refactor: separate backup logic into own module
ci/woodpecker/push/clippy Pipeline failed Details
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details

incremental-backups
Jef Roosens 2023-06-19 14:04:38 +02:00
parent 74a0b91fd1
commit ef631fab1d
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
9 changed files with 358 additions and 351 deletions

View File

@ -0,0 +1,75 @@
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
/// Represents the changes relative to the previous backup
#[derive(Debug, Serialize, Deserialize)]
pub struct Delta {
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
}
impl Delta {
pub fn new() -> Self {
Self {
added: HashMap::new(),
removed: HashMap::new(),
}
}
/// Update the current state so that its result becomes the merge of itself and the other
/// state.
pub fn merge(&mut self, delta: &Self) {
for (dir, added) in delta.added.iter() {
// Files that were removed in the current state, but added in the new state, are no
// longer removed
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.retain(|k| !added.contains(k));
}
// Newly added files are added to the state as well
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.extend(added.iter().cloned());
} else {
self.added.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
// Files that were originally added, but now deleted are removed from the added list
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.retain(|k| !removed.contains(k));
}
// Newly removed files are added to the state as well
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.extend(removed.iter().cloned());
} else {
self.removed.insert(dir.clone(), removed.clone());
}
}
}
/// Modify the given state by applying this delta's changes to it
pub fn apply(&self, state: &mut HashMap<PathBuf, HashSet<PathBuf>>) {
// First we add new files, then we remove the old ones
for (dir, added) in self.added.iter() {
if let Some(current) = state.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
state.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in self.removed.iter() {
if let Some(current) = state.get_mut(dir) {
current.retain(|k| !removed.contains(k));
}
}
}
}

View File

@ -0,0 +1,128 @@
use super::Backup;
use std::fs::File;
use std::io;
use std::path::PathBuf;
pub struct Manager {
backup_dir: PathBuf,
config_dir: PathBuf,
world_dir: PathBuf,
chain_len: u64,
chains_to_keep: u64,
chains: Vec<Vec<Backup>>,
}
impl Manager {
const METADATA_FILE: &str = "alex.json";
/// Initialize a new instance of a `BackupManager`.
pub fn new(
backup_dir: PathBuf,
config_dir: PathBuf,
world_dir: PathBuf,
chain_len: u64,
chains_to_keep: u64,
) -> Self {
Self {
backup_dir,
config_dir,
world_dir,
chain_len,
chains_to_keep,
chains: Vec::new(),
}
}
/// Create a new backup with the expected type.
pub fn create_backup(&mut self) -> io::Result<()> {
let dirs = vec![
(PathBuf::from("config"), self.config_dir.clone()),
(PathBuf::from("worlds"), self.world_dir.clone()),
];
// I kinda hate this statement, please just let me combine let statements in if statements
// already
let backup = if let Some(current_chain) = self.chains.last() {
let current_chain_len: u64 = current_chain.len().try_into().unwrap();
if current_chain_len < self.chain_len {
if let Some(previous_backup) = current_chain.last() {
let state = Backup::state(current_chain);
Backup::create_from(state, previous_backup.start_time, &self.backup_dir, dirs)?
} else {
Backup::create(&self.backup_dir, dirs)?
}
} else {
self.chains.push(Vec::new());
Backup::create(&self.backup_dir, dirs)?
}
} else {
self.chains.push(Vec::new());
Backup::create(&self.backup_dir, dirs)?
};
// The above statement always creates this element, so this unwrap is safe
self.chains.last_mut().unwrap().push(backup);
self.save()?;
Ok(())
}
/// Delete all backups associated with outdated chains, and forget those chains.
pub fn remove_old_backups(&mut self) -> std::io::Result<()> {
let chains_to_store: usize = self.chains_to_keep.try_into().unwrap();
if chains_to_store < self.chains.len() {
let mut remove_count: usize = self.chains.len() - chains_to_store;
// We only count finished chains towards the list of stored chains
let chain_len: usize = self.chain_len.try_into().unwrap();
if self.chains.last().unwrap().len() < chain_len {
remove_count -= 1;
}
for chain in self.chains.drain(..remove_count) {
for backup in chain {
let path = Backup::path(&self.backup_dir, backup.start_time);
std::fs::remove_file(path)?;
}
}
}
self.save()?;
Ok(())
}
/// Write the in-memory state to disk.
pub fn save(&self) -> std::io::Result<()> {
let json_file = File::create(self.backup_dir.join(Self::METADATA_FILE))?;
serde_json::to_writer(json_file, &self.chains)?;
Ok(())
}
/// Overwrite the in-memory state with the on-disk state.
pub fn load(&mut self) -> std::io::Result<()> {
let json_file = match File::open(self.backup_dir.join(Self::METADATA_FILE)) {
Ok(f) => f,
Err(e) => {
// Don't error out if the file isn't there, it will be created when necessary
if e.kind() == io::ErrorKind::NotFound {
self.chains = Vec::new();
return Ok(());
} else {
return Err(e);
}
}
};
self.chains = serde_json::from_reader(json_file)?;
Ok(())
}
}

150
src/backup/mod.rs 100644
View File

@ -0,0 +1,150 @@
mod delta;
mod manager;
mod path;
use delta::Delta;
pub use manager::Manager;
use chrono::Utc;
use flate2::write::GzEncoder;
use flate2::Compression;
use path::PathExt;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub enum BackupType {
Full,
Incremental,
}
/// Represents a successful backup
#[derive(Debug, Serialize, Deserialize)]
pub struct Backup {
/// When the backup was started (also corresponds to the name)
pub start_time: chrono::DateTime<Utc>,
/// Type of the backup
pub type_: BackupType,
pub delta: Delta,
}
impl Backup {
const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz";
/// Resolve the state of the list of backups by applying their deltas in-order to an initially
/// empty state.
pub fn state(backups: &Vec<Backup>) -> HashMap<PathBuf, HashSet<PathBuf>> {
let mut state: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
for backup in backups {
backup.delta.apply(&mut state);
}
state
}
/// Return the path to a backup file by properly formatting the data.
pub fn path<P: AsRef<Path>>(backup_dir: P, start_time: chrono::DateTime<Utc>) -> PathBuf {
let backup_dir = backup_dir.as_ref();
let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT));
backup_dir.join(filename)
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
///
/// * `backup_dir` - Directory to store archive in
/// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name
/// under which `src_dir`'s contents should be stored in the archive
///
/// # Returns
///
/// The `Backup` instance describing this new backup.
pub fn create<P: AsRef<Path>>(
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Self::path(backup_dir, start_time);
let tar_gz = File::create(path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = Delta::new();
for (dir_in_tar, src_dir) in dirs {
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(&src_dir).unwrap();
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar, added_files);
}
Ok(Backup {
type_: BackupType::Full,
start_time,
delta,
})
}
/// Create a new incremental backup from a given previous backup
pub fn create_from<P: AsRef<Path>>(
previous_state: HashMap<PathBuf, HashSet<PathBuf>>,
previous_start_time: chrono::DateTime<Utc>,
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Self::path(backup_dir, start_time);
let tar_gz = File::create(path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = Delta::new();
for (dir_in_tar, src_dir) in dirs {
let mut all_files: HashSet<PathBuf> = HashSet::new();
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(&src_dir).unwrap();
if !path.not_modified_since(previous_start_time) {
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
all_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar.clone(), added_files);
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
delta.removed.insert(
dir_in_tar,
previous_files.difference(&all_files).cloned().collect(),
);
}
}
Ok(Backup {
type_: BackupType::Incremental,
start_time,
delta,
})
}
}

View File

@ -1,3 +1,4 @@
mod backup;
mod cli; mod cli;
mod server; mod server;
mod signals; mod signals;
@ -59,7 +60,7 @@ fn command_run(cli: &Cli, args: &RunArgs) -> io::Result<()> {
} }
fn commands_backup(cli: &Cli, _args: &BackupArgs) -> io::Result<()> { fn commands_backup(cli: &Cli, _args: &BackupArgs) -> io::Result<()> {
let mut manager = server::BackupManager::new( let mut manager = backup::Manager::new(
cli.backup.clone(), cli.backup.clone(),
cli.config.clone(), cli.config.clone(),
cli.world.clone(), cli.world.clone(),

View File

@ -1,345 +0,0 @@
use crate::server::path::PathExt;
use chrono::Utc;
use flate2::write::GzEncoder;
use flate2::Compression;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
#[link(name = "c")]
extern "C" {
fn geteuid() -> u32;
fn getegid() -> u32;
}
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub enum BackupType {
Full,
Incremental,
}
/// Represents the changes relative to the previous backup
#[derive(Debug, Serialize, Deserialize)]
pub struct BackupDelta {
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
}
impl BackupDelta {
pub fn new() -> Self {
BackupDelta {
added: HashMap::new(),
removed: HashMap::new(),
}
}
/// Update the current state so that its result becomes the merge of itself and the other
/// state.
pub fn merge(&mut self, delta: &BackupDelta) {
for (dir, added) in delta.added.iter() {
// Files that were removed in the current state, but added in the new state, are no
// longer removed
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.retain(|k| !added.contains(k));
}
// Newly added files are added to the state as well
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.extend(added.iter().cloned());
} else {
self.added.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
// Files that were originally added, but now deleted are removed from the added list
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.retain(|k| !removed.contains(k));
}
// Newly removed files are added to the state as well
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.extend(removed.iter().cloned());
} else {
self.removed.insert(dir.clone(), removed.clone());
}
}
}
/// Modify the given state by applying this delta's changes to it
pub fn apply(&self, state: &mut HashMap<PathBuf, HashSet<PathBuf>>) {
// First we add new files, then we remove the old ones
for (dir, added) in self.added.iter() {
if let Some(current) = state.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
state.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in self.removed.iter() {
if let Some(current) = state.get_mut(dir) {
current.retain(|k| !removed.contains(k));
}
}
}
}
/// Represents a successful backup
#[derive(Debug, Serialize, Deserialize)]
pub struct Backup {
/// When the backup was started (also corresponds to the name)
start_time: chrono::DateTime<Utc>,
/// Type of the backup
type_: BackupType,
delta: BackupDelta,
}
impl Backup {
const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz";
/// Resolve the state of the list of backups by applying their deltas in-order to an initially
/// empty state.
pub fn state(backups: &Vec<Backup>) -> HashMap<PathBuf, HashSet<PathBuf>> {
let mut state: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
for backup in backups {
backup.delta.apply(&mut state);
}
state
}
/// Return the path to a backup file by properly formatting the data.
pub fn path<P: AsRef<Path>>(backup_dir: P, start_time: chrono::DateTime<Utc>) -> PathBuf {
let backup_dir = backup_dir.as_ref();
let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT));
backup_dir.join(filename)
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
///
/// * `backup_dir` - Directory to store archive in
/// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name
/// under which `src_dir`'s contents should be stored in the archive
///
/// # Returns
///
/// The `Backup` instance describing this new backup.
pub fn create<P: AsRef<Path>>(
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Self::path(backup_dir, start_time);
let tar_gz = File::create(path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = BackupDelta::new();
for (dir_in_tar, src_dir) in dirs {
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(&src_dir).unwrap();
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar, added_files);
}
Ok(Backup {
type_: BackupType::Full,
start_time,
delta,
})
}
/// Create a new incremental backup from a given previous backup
pub fn create_from<P: AsRef<Path>>(
previous_state: HashMap<PathBuf, HashSet<PathBuf>>,
previous_start_time: chrono::DateTime<Utc>,
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Self::path(backup_dir, start_time);
let tar_gz = File::create(path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = BackupDelta::new();
for (dir_in_tar, src_dir) in dirs {
let mut all_files: HashSet<PathBuf> = HashSet::new();
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(&src_dir).unwrap();
if !path.not_modified_since(previous_start_time) {
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
all_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar.clone(), added_files);
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
delta.removed.insert(
dir_in_tar,
previous_files.difference(&all_files).cloned().collect(),
);
}
}
Ok(Backup {
type_: BackupType::Incremental,
start_time,
delta,
})
}
}
pub struct BackupManager {
backup_dir: PathBuf,
config_dir: PathBuf,
world_dir: PathBuf,
chain_len: u64,
chains_to_keep: u64,
chains: Vec<Vec<Backup>>,
}
impl BackupManager {
const METADATA_FILE: &str = "alex.json";
/// Initialize a new instance of a `BackupManager`.
pub fn new(
backup_dir: PathBuf,
config_dir: PathBuf,
world_dir: PathBuf,
chain_len: u64,
chains_to_keep: u64,
) -> Self {
BackupManager {
backup_dir,
config_dir,
world_dir,
chain_len,
chains_to_keep,
chains: Vec::new(),
}
}
/// Create a new backup with the expected type.
pub fn create_backup(&mut self) -> io::Result<()> {
let dirs = vec![
(PathBuf::from("config"), self.config_dir.clone()),
(PathBuf::from("worlds"), self.world_dir.clone()),
];
// I kinda hate this statement, please just let me combine let statements in if statements
// already
let backup = if let Some(current_chain) = self.chains.last() {
let current_chain_len: u64 = current_chain.len().try_into().unwrap();
if current_chain_len < self.chain_len {
if let Some(previous_backup) = current_chain.last() {
let state = Backup::state(current_chain);
Backup::create_from(state, previous_backup.start_time, &self.backup_dir, dirs)?
} else {
Backup::create(&self.backup_dir, dirs)?
}
} else {
self.chains.push(Vec::new());
Backup::create(&self.backup_dir, dirs)?
}
} else {
self.chains.push(Vec::new());
Backup::create(&self.backup_dir, dirs)?
};
// The above statement always creates this element, so this unwrap is safe
self.chains.last_mut().unwrap().push(backup);
self.save()?;
Ok(())
}
/// Delete all backups associated with outdated chains, and forget those chains.
pub fn remove_old_backups(&mut self) -> std::io::Result<()> {
let chains_to_store: usize = self.chains_to_keep.try_into().unwrap();
if chains_to_store < self.chains.len() {
let mut remove_count: usize = self.chains.len() - chains_to_store;
// We only count finished chains towards the list of stored chains
let chain_len: usize = self.chain_len.try_into().unwrap();
if self.chains.last().unwrap().len() < chain_len {
remove_count -= 1;
}
for chain in self.chains.drain(..remove_count) {
for backup in chain {
let path = Backup::path(&self.backup_dir, backup.start_time);
std::fs::remove_file(path)?;
}
}
}
self.save()?;
Ok(())
}
/// Write the in-memory state to disk.
pub fn save(&self) -> std::io::Result<()> {
let json_file = File::create(self.backup_dir.join(Self::METADATA_FILE))?;
serde_json::to_writer(json_file, &self.chains)?;
Ok(())
}
/// Overwrite the in-memory state with the on-disk state.
pub fn load(&mut self) -> std::io::Result<()> {
let json_file = match File::open(self.backup_dir.join(Self::METADATA_FILE)) {
Ok(f) => f,
Err(e) => {
// Don't error out if the file isn't there, it will be created when necessary
if e.kind() == io::ErrorKind::NotFound {
self.chains = Vec::new();
return Ok(());
} else {
return Err(e);
}
}
};
self.chains = serde_json::from_reader(json_file)?;
Ok(())
}
}

View File

@ -1,4 +1,5 @@
use crate::server::{BackupManager, ServerProcess}; use crate::backup::Manager as BackupManager;
use crate::server::ServerProcess;
use clap::ValueEnum; use clap::ValueEnum;
use std::fmt; use std::fmt;
use std::fs::File; use std::fs::File;

View File

@ -1,8 +1,5 @@
mod backups;
mod command; mod command;
mod path;
mod process; mod process;
pub use backups::BackupManager;
pub use command::{ServerCommand, ServerType}; pub use command::{ServerCommand, ServerType};
pub use process::ServerProcess; pub use process::ServerProcess;

View File

@ -1,4 +1,4 @@
use crate::server::BackupManager; use crate::backup::Manager as BackupManager;
use crate::server::ServerType; use crate::server::ServerType;
use std::io::Write; use std::io::Write;
use std::process::Child; use std::process::Child;