Compare commits

...

7 Commits

Author SHA1 Message Date
Jef Roosens 32d923e64b
refactor: this is fun
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/clippy Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details
2023-07-08 13:53:18 +02:00
Jef Roosens 1acfc9c422
refactor: have fun with rust's functional stuff
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/clippy Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details
2023-07-08 13:39:51 +02:00
Jef Roosens fc8e8d37d3
refactor: remove some code duplication
ci/woodpecker/push/clippy Pipeline was successful Details
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details
2023-07-08 10:32:56 +02:00
Jef Roosens 5567323473
feat: initially working export command
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/clippy Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details
2023-07-07 23:12:07 +02:00
Jef Roosens 80b814bcff
feat: further use State abstraction 2023-07-07 18:06:15 +02:00
Jef Roosens 4ec336eb86
feat: abstract State 2023-07-07 17:05:24 +02:00
Jef Roosens 6e216aa88f
feat: define delta difference & strict difference 2023-07-06 15:46:36 +02:00
8 changed files with 404 additions and 116 deletions

View File

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased](https://git.rustybever.be/Chewing_Bever/alex/src/branch/dev)
### Added
* Export command to export any backup as a new full backup
## [0.3.0](https://git.rustybever.be/Chewing_Bever/alex/src/tag/0.3.0)
### Added

View File

@ -1,78 +1,146 @@
use super::State;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::borrow::Borrow;
use std::fmt;
use std::path::PathBuf;
/// Represents the changes relative to the previous backup
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Delta {
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
pub added: State,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
pub removed: State,
}
impl Delta {
pub fn new() -> Self {
Self {
added: HashMap::new(),
removed: HashMap::new(),
added: Default::default(),
removed: Default::default(),
}
}
/// Update the current state so that its result becomes the merge of itself and the other
/// state.
#[allow(dead_code)]
pub fn merge(&mut self, delta: &Self) {
/// Calculate the union of this delta with another delta.
///
/// The union of two deltas is a delta that produces the same state as if you were to apply
/// both deltas in-order. Note that this operation is not commutative.
pub fn union(&self, delta: &Self) -> Self {
let mut out = self.clone();
for (dir, added) in delta.added.iter() {
// Files that were removed in the current state, but added in the new state, are no
// longer removed
if let Some(orig_removed) = self.removed.get_mut(dir) {
if let Some(orig_removed) = out.removed.get_mut(dir) {
orig_removed.retain(|k| !added.contains(k));
}
// Newly added files are added to the state as well
if let Some(orig_added) = self.added.get_mut(dir) {
if let Some(orig_added) = out.added.get_mut(dir) {
orig_added.extend(added.iter().cloned());
} else {
self.added.insert(dir.clone(), added.clone());
out.added.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
// Files that were originally added, but now deleted are removed from the added list
if let Some(orig_added) = self.added.get_mut(dir) {
if let Some(orig_added) = out.added.get_mut(dir) {
orig_added.retain(|k| !removed.contains(k));
}
// Newly removed files are added to the state as well
if let Some(orig_removed) = self.removed.get_mut(dir) {
if let Some(orig_removed) = out.removed.get_mut(dir) {
orig_removed.extend(removed.iter().cloned());
} else {
self.removed.insert(dir.clone(), removed.clone());
out.removed.insert(dir.clone(), removed.clone());
}
}
out
}
/// Modify the given state by applying this delta's changes to it
pub fn apply(&self, state: &mut HashMap<PathBuf, HashSet<PathBuf>>) {
// First we add new files, then we remove the old ones
for (dir, added) in self.added.iter() {
if let Some(current) = state.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
state.insert(dir.clone(), added.clone());
// Calculate the difference between this delta and the other delta.
//
// The difference simply means removing all adds and removes that are also performed in the
// other delta.
pub fn difference(&self, other: &Self) -> Self {
let mut out = self.clone();
for (dir, added) in out.added.iter_mut() {
// If files are added in the other delta, we don't add them in this delta
if let Some(other_added) = other.added.get(dir) {
added.retain(|k| !other_added.contains(k));
};
}
for (dir, removed) in out.removed.iter_mut() {
// If files are removed in the other delta, we don't remove them in this delta either
if let Some(other_removed) = other.removed.get(dir) {
removed.retain(|k| !other_removed.contains(k));
}
}
for (dir, removed) in self.removed.iter() {
if let Some(current) = state.get_mut(dir) {
current.retain(|k| !removed.contains(k));
out
}
// Calculate the strict difference between this delta and the other delta.
//
// The strict difference is a difference where all operations that would be overwritten by the
// other delta are also removed (a.k.a. adding a file after removing it, or vice versa)
pub fn strict_difference(&self, other: &Self) -> Self {
let mut out = self.difference(other);
for (dir, added) in out.added.iter_mut() {
// Remove additions that are removed in the other delta
if let Some(other_removed) = other.removed.get(dir) {
added.retain(|k| !other_removed.contains(k));
}
}
for (dir, removed) in out.removed.iter_mut() {
// Remove removals that are re-added in the other delta
if let Some(other_added) = other.added.get(dir) {
removed.retain(|k| !other_added.contains(k));
}
}
out
}
/// Given a chain of deltas, ordered from last to first, calculate the "contribution" for each
/// state.
///
/// The contribution of a delta in a given chain is defined as the parts of the state produced
/// by this chain that are actually provided by this delta. This comes down to calculating the
/// strict difference of this delta and all of its successive deltas.
pub fn contributions<I>(deltas: I) -> Vec<State>
where
I: IntoIterator,
I::Item: Borrow<Delta>,
{
let mut contributions: Vec<State> = Vec::new();
let mut deltas = deltas.into_iter();
if let Some(first_delta) = deltas.next() {
// From last to first, we calculate the strict difference of the delta with the union of all its
// following deltas. The list of added files of this difference is the contribution for
// that delta.
contributions.push(first_delta.borrow().added.clone());
let mut union_future = first_delta.borrow().clone();
for delta in deltas {
contributions.push(delta.borrow().strict_difference(&union_future).added);
union_future = union_future.union(delta.borrow());
}
}
// contributions.reverse();
contributions
}
}

View File

@ -4,7 +4,7 @@ use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
/// Manages a collection of backup layers, allowing them to be utilized as a single object.
pub struct MetaManager<T>
@ -129,6 +129,17 @@ where
.map(|manager| manager.restore_backup(start_time, dirs))
}
pub fn export_backup<P: AsRef<Path>>(
&self,
layer: &str,
start_time: chrono::DateTime<Utc>,
output_path: P,
) -> Option<io::Result<()>> {
self.managers
.get(layer)
.map(|manager| manager.export_backup(start_time, output_path))
}
pub fn managers(&self) -> &HashMap<String, Manager<T>> {
&self.managers
}

View File

@ -4,15 +4,17 @@ mod meta;
pub use config::ManagerConfig;
pub use meta::MetaManager;
use super::Backup;
use super::{Backup, BackupType, Delta, State};
use crate::other;
use chrono::SubsecRound;
use chrono::Utc;
use flate2::write::GzEncoder;
use flate2::Compression;
use serde::Deserialize;
use serde::Serialize;
use std::fs::File;
use std::fs::{File, OpenOptions};
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
/// Manages a single backup layer consisting of one or more chains of backups.
pub struct Manager<T>
@ -72,10 +74,10 @@ where
let mut backup = if !current_chain.is_empty() {
let previous_backup = current_chain.last().unwrap();
let state = Backup::state(current_chain);
let previous_state = State::from(current_chain.iter().map(|b| &b.delta));
Backup::create_from(
state,
previous_state,
previous_backup.start_time,
&self.backup_dir,
&self.dirs,
@ -151,13 +153,30 @@ where
/// Calculate the next time a backup should be created. If no backup has been created yet, it
/// will return now.
pub fn next_scheduled_time(&self) -> chrono::DateTime<Utc> {
if let Some(last_chain) = self.chains.last() {
if let Some(last_backup) = last_chain.last() {
return last_backup.start_time + self.frequency;
self.chains
.last()
.and_then(|last_chain| last_chain.last())
.map(|last_backup| last_backup.start_time + self.frequency)
.unwrap_or_else(chrono::offset::Utc::now)
}
/// Search for a chain containing a backup with the specified start time.
///
/// # Returns
///
/// A tuple (chain, index) with index being the index of the found backup in the returned
/// chain.
fn find(&self, start_time: chrono::DateTime<Utc>) -> Option<(&Vec<Backup<T>>, usize)> {
for chain in &self.chains {
if let Some(index) = chain
.iter()
.position(|b| b.start_time.trunc_subsecs(0) == start_time)
{
return Some((chain, index));
}
}
chrono::offset::Utc::now()
None
}
/// Restore the backup with the given start time by restoring its chain up to and including the
@ -167,24 +186,57 @@ where
start_time: chrono::DateTime<Utc>,
dirs: &Vec<(PathBuf, PathBuf)>,
) -> io::Result<()> {
// Iterate over each chain, skipping elements until the element with the given start time
// is possibly found.
for chain in &self.chains {
// If we find the element in the chain, restore the entire chain up to and including
// the element
if let Some(index) = chain
.iter()
.position(|b| b.start_time.trunc_subsecs(0) == start_time)
{
self.find(start_time)
.ok_or_else(|| other("Unknown layer."))
.and_then(|(chain, index)| {
for backup in chain.iter().take(index + 1) {
backup.restore(&self.backup_dir, dirs)?;
}
return Ok(());
}
}
Ok(())
})
}
Err(other("Unknown backup."))
/// Export the backup with the given start time as a new full archive.
pub fn export_backup<P: AsRef<Path>>(
&self,
start_time: chrono::DateTime<Utc>,
output_path: P,
) -> io::Result<()> {
self.find(start_time)
.ok_or_else(|| other("Unknown layer."))
.and_then(|(chain, index)| {
match chain[index].type_ {
// A full backup is simply copied to the output path
BackupType::Full => std::fs::copy(
Backup::path(&self.backup_dir, chain[index].start_time),
output_path,
)
.map(|_| ()),
// Incremental backups are exported one by one according to their contribution
BackupType::Incremental => {
let contributions = Delta::contributions(
chain.iter().take(index + 1).map(|b| &b.delta).rev(),
);
let tar_gz = OpenOptions::new()
.write(true)
.create(true)
.open(output_path.as_ref())?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
for (contribution, backup) in
contributions.iter().rev().zip(chain.iter().take(index + 1))
{
backup.append(&self.backup_dir, contribution, &mut ar)?;
}
let mut enc = ar.into_inner()?;
enc.try_finish()
}
}
})
}
/// Get a reference to the underlying chains

View File

@ -2,11 +2,13 @@ mod delta;
mod io_ext;
pub mod manager;
mod path;
mod state;
use delta::Delta;
pub use manager::Manager;
pub use manager::ManagerConfig;
pub use manager::MetaManager;
pub use state::State;
use chrono::Utc;
use flate2::read::GzDecoder;
@ -14,7 +16,7 @@ use flate2::write::GzEncoder;
use flate2::Compression;
use path::PathExt;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt;
use std::fs::File;
use std::io;
@ -61,18 +63,6 @@ impl<T: Clone> Backup<T> {
self.metadata = Some(metadata);
}
/// Resolve the state of the list of backups by applying their deltas in-order to an initially
/// empty state.
pub fn state(backups: &Vec<Self>) -> HashMap<PathBuf, HashSet<PathBuf>> {
let mut state: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
for backup in backups {
backup.delta.apply(&mut state);
}
state
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
@ -141,7 +131,7 @@ impl<T: Clone> Backup<T> {
///
/// The `Backup` instance describing this new backup.
pub fn create_from<P: AsRef<Path>>(
previous_state: HashMap<PathBuf, HashSet<PathBuf>>,
previous_state: State,
previous_start_time: chrono::DateTime<Utc>,
backup_dir: P,
dirs: &Vec<(PathBuf, PathBuf)>,
@ -247,6 +237,36 @@ impl<T: Clone> Backup<T> {
Ok(())
}
pub fn open<P: AsRef<Path>>(&self, backup_dir: P) -> io::Result<tar::Archive<GzDecoder<File>>> {
let path = Backup::path(backup_dir, self.start_time);
let tar_gz = File::open(path)?;
let enc = GzDecoder::new(tar_gz);
Ok(tar::Archive::new(enc))
}
/// Open this backup's archive and append all its files that are part of the provided state to
/// the archive file.
pub fn append<P: AsRef<Path>>(
&self,
backup_dir: P,
state: &State,
ar: &mut tar::Builder<GzEncoder<File>>,
) -> io::Result<()> {
let mut own_ar = self.open(backup_dir)?;
for entry in own_ar.entries()? {
let entry = entry?;
let entry_path_in_tar = entry.path()?.to_path_buf();
if state.contains(&entry_path_in_tar) {
let header = entry.header().clone();
ar.append(&header, entry)?;
}
}
Ok(())
}
}
impl<T: Clone> fmt::Display for Backup<T> {

View File

@ -129,16 +129,15 @@ pub trait PathExt {
impl PathExt for Path {
fn not_modified_since(&self, timestamp: chrono::DateTime<Utc>) -> bool {
if let Ok(metadata) = self.metadata() {
if let Ok(last_modified) = metadata.modified() {
self.metadata()
.and_then(|m| m.modified())
.map(|last_modified| {
let t: chrono::DateTime<Utc> = last_modified.into();
let t = t.with_timezone(&Local);
return t < timestamp;
}
}
false
t < timestamp
})
.unwrap_or(false)
}
fn read_dir_recursive(&self) -> io::Result<ReadDirRecursive> {

View File

@ -0,0 +1,86 @@
use crate::backup::Delta;
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::collections::{HashMap, HashSet};
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
/// Struct that represents a current state for a backup. This struct acts as a smart pointer around
/// a HashMap.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct State(HashMap<PathBuf, HashSet<PathBuf>>);
impl State {
pub fn new() -> Self {
State(HashMap::new())
}
/// Apply the delta to the current state.
pub fn apply(&mut self, delta: &Delta) {
// First we add new files, then we remove the old ones
for (dir, added) in delta.added.iter() {
if let Some(current) = self.0.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
self.0.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
if let Some(current) = self.0.get_mut(dir) {
current.retain(|k| !removed.contains(k));
}
}
}
/// Returns whether the provided relative path is part of the given state.
pub fn contains<P: AsRef<Path>>(&self, path: P) -> bool {
let path = path.as_ref();
self.0.iter().any(|(dir, files)| {
path.starts_with(dir) && files.contains(path.strip_prefix(dir).unwrap())
})
}
}
impl<T> From<T> for State
where
T: IntoIterator,
T::Item: Borrow<Delta>,
{
fn from(deltas: T) -> Self {
let mut state = State::new();
for delta in deltas {
state.apply(delta.borrow());
}
state
}
}
impl AsRef<HashMap<PathBuf, HashSet<PathBuf>>> for State {
fn as_ref(&self) -> &HashMap<PathBuf, HashSet<PathBuf>> {
&self.0
}
}
impl Deref for State {
type Target = HashMap<PathBuf, HashSet<PathBuf>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for State {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl Default for State {
fn default() -> Self {
Self::new()
}
}

View File

@ -4,7 +4,7 @@ use crate::other;
use chrono::{TimeZone, Utc};
use clap::{Args, Subcommand};
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
#[derive(Subcommand)]
pub enum BackupCommands {
@ -14,6 +14,8 @@ pub enum BackupCommands {
Create(BackupCreateArgs),
/// Restore a backup
Restore(BackupRestoreArgs),
/// Export a backup into a full archive
Export(BackupExportArgs),
}
#[derive(Args)]
@ -36,13 +38,13 @@ pub struct BackupListArgs {
#[derive(Args)]
pub struct BackupRestoreArgs {
/// Path to the backup inside the backup directory
/// Path to the backup inside the backup directory to restore
path: PathBuf,
/// Directory to store config in
output_config: PathBuf,
/// Directory to store worlds in
output_worlds: PathBuf,
/// Whether to overwrite the contents of the existing directories
/// Whether to overwrite the contents of the output directories
#[arg(short, long, default_value_t = false)]
force: bool,
/// Create output directories if they don't exist
@ -50,12 +52,24 @@ pub struct BackupRestoreArgs {
make: bool,
}
#[derive(Args)]
pub struct BackupExportArgs {
/// Path to the backup inside the backup directory to export
path: PathBuf,
/// Path to store the exported archive
output: PathBuf,
/// Create output directories if they don't exist
#[arg(short, long, default_value_t = false)]
make: bool,
}
impl BackupArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
match &self.command {
BackupCommands::Create(args) => args.run(cli),
BackupCommands::List(args) => args.run(cli),
BackupCommands::Restore(args) => args.run(cli),
BackupCommands::Export(args) => args.run(cli),
}
}
}
@ -72,6 +86,63 @@ impl BackupCreateArgs {
}
}
impl BackupListArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
let meta = cli.meta()?;
// A bit scuffed? Sure
for (name, manager) in meta
.managers()
.iter()
.filter(|(name, _)| self.layer.is_none() || &self.layer.as_ref().unwrap() == name)
{
println!("{}", name);
for chain in manager.chains().iter().filter(|c| !c.is_empty()) {
let mut iter = chain.iter();
println!(" {}", iter.next().unwrap());
for backup in iter {
println!(" {}", backup);
}
}
}
Ok(())
}
}
/// Tries to parse the given path as the path to a backup inside the backup directory with a
/// formatted timestamp.
fn parse_backup_path(
backup_dir: &Path,
backup_path: &Path,
) -> io::Result<(String, chrono::DateTime<Utc>)> {
if !backup_path.starts_with(backup_dir) {
return Err(other("Provided file is not inside the backup directory."));
}
let layer = if let Some(parent) = backup_path.parent() {
// Backup files should be stored nested inside a layer's folder
if parent != backup_dir {
parent.file_name().unwrap().to_string_lossy()
} else {
return Err(other("Invalid path."));
}
} else {
return Err(other("Invalid path."));
};
let timestamp = if let Some(filename) = backup_path.file_name() {
Utc.datetime_from_str(&filename.to_string_lossy(), Backup::FILENAME_FORMAT)
.map_err(|_| other("Invalid filename."))?
} else {
return Err(other("Invalid filename."));
};
Ok((layer.to_string(), timestamp))
}
impl BackupRestoreArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
let backup_dir = cli.backup.canonicalize()?;
@ -86,29 +157,8 @@ impl BackupRestoreArgs {
let output_worlds = self.output_worlds.canonicalize()?;
// Parse input path
let path = self.path.canonicalize()?;
if !path.starts_with(&backup_dir) {
return Err(other("Provided file is not inside the backup directory."));
}
let layer = if let Some(parent) = path.parent() {
// Backup files should be stored nested inside a layer's folder
if parent != backup_dir {
parent.file_name().unwrap().to_string_lossy()
} else {
return Err(other("Invalid path."));
}
} else {
return Err(other("Invalid path."));
};
let timestamp = if let Some(filename) = path.file_name() {
Utc.datetime_from_str(&filename.to_string_lossy(), Backup::FILENAME_FORMAT)
.map_err(|_| other("Invalid filename."))?
} else {
return Err(other("Invalid filename."));
};
let backup_path = self.path.canonicalize()?;
let (layer, timestamp) = parse_backup_path(&backup_dir, &backup_path)?;
let meta = cli.meta()?;
@ -146,28 +196,26 @@ impl BackupRestoreArgs {
}
}
impl BackupListArgs {
impl BackupExportArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
let meta = cli.meta()?;
let backup_dir = cli.backup.canonicalize()?;
// A bit scuffed? Sure
for (name, manager) in meta
.managers()
.iter()
.filter(|(name, _)| self.layer.is_none() || &self.layer.as_ref().unwrap() == name)
{
println!("{}", name);
for chain in manager.chains().iter().filter(|c| !c.is_empty()) {
let mut iter = chain.iter();
println!(" {}", iter.next().unwrap());
for backup in iter {
println!(" {}", backup);
}
if self.make {
if let Some(parent) = &self.output.parent() {
std::fs::create_dir_all(parent)?;
}
}
Ok(())
// Parse input path
let backup_path = self.path.canonicalize()?;
let (layer, timestamp) = parse_backup_path(&backup_dir, &backup_path)?;
let meta = cli.meta()?;
if let Some(res) = meta.export_backup(&layer, timestamp, &self.output) {
res
} else {
Err(other("Unknown layer"))
}
}
}