Compare commits

...

4 Commits

Author SHA1 Message Date
Jef Roosens 5567323473
feat: initially working export command
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/push/clippy Pipeline was successful Details
ci/woodpecker/push/build Pipeline was successful Details
2023-07-07 23:12:07 +02:00
Jef Roosens 80b814bcff
feat: further use State abstraction 2023-07-07 18:06:15 +02:00
Jef Roosens 4ec336eb86
feat: abstract State 2023-07-07 17:05:24 +02:00
Jef Roosens 6e216aa88f
feat: define delta difference & strict difference 2023-07-06 15:46:36 +02:00
6 changed files with 339 additions and 50 deletions

View File

@ -1,78 +1,146 @@
use super::State;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::borrow::Borrow;
use std::fmt;
use std::path::PathBuf;
/// Represents the changes relative to the previous backup
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Delta {
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
pub added: State,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
pub removed: State,
}
impl Delta {
pub fn new() -> Self {
Self {
added: HashMap::new(),
removed: HashMap::new(),
added: Default::default(),
removed: Default::default(),
}
}
/// Update the current state so that its result becomes the merge of itself and the other
/// state.
#[allow(dead_code)]
pub fn merge(&mut self, delta: &Self) {
/// Calculate the union of this delta with another delta.
///
/// The union of two deltas is a delta that produces the same state as if you were to apply
/// both deltas in-order. Note that this operation is not commutative.
pub fn union(&self, delta: &Self) -> Self {
let mut out = self.clone();
for (dir, added) in delta.added.iter() {
// Files that were removed in the current state, but added in the new state, are no
// longer removed
if let Some(orig_removed) = self.removed.get_mut(dir) {
if let Some(orig_removed) = out.removed.get_mut(dir) {
orig_removed.retain(|k| !added.contains(k));
}
// Newly added files are added to the state as well
if let Some(orig_added) = self.added.get_mut(dir) {
if let Some(orig_added) = out.added.get_mut(dir) {
orig_added.extend(added.iter().cloned());
} else {
self.added.insert(dir.clone(), added.clone());
out.added.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
// Files that were originally added, but now deleted are removed from the added list
if let Some(orig_added) = self.added.get_mut(dir) {
if let Some(orig_added) = out.added.get_mut(dir) {
orig_added.retain(|k| !removed.contains(k));
}
// Newly removed files are added to the state as well
if let Some(orig_removed) = self.removed.get_mut(dir) {
if let Some(orig_removed) = out.removed.get_mut(dir) {
orig_removed.extend(removed.iter().cloned());
} else {
self.removed.insert(dir.clone(), removed.clone());
out.removed.insert(dir.clone(), removed.clone());
}
}
out
}
/// Modify the given state by applying this delta's changes to it
pub fn apply(&self, state: &mut HashMap<PathBuf, HashSet<PathBuf>>) {
// First we add new files, then we remove the old ones
for (dir, added) in self.added.iter() {
if let Some(current) = state.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
state.insert(dir.clone(), added.clone());
// Calculate the difference between this delta and the other delta.
//
// The difference simply means removing all adds and removes that are also performed in the
// other delta.
pub fn difference(&self, other: &Self) -> Self {
let mut out = self.clone();
for (dir, added) in out.added.iter_mut() {
// If files are added in the other delta, we don't add them in this delta
if let Some(other_added) = other.added.get(dir) {
added.retain(|k| !other_added.contains(k));
};
}
for (dir, removed) in out.removed.iter_mut() {
// If files are removed in the other delta, we don't remove them in this delta either
if let Some(other_removed) = other.removed.get(dir) {
removed.retain(|k| !other_removed.contains(k));
}
}
for (dir, removed) in self.removed.iter() {
if let Some(current) = state.get_mut(dir) {
current.retain(|k| !removed.contains(k));
out
}
// Calculate the strict difference between this delta and the other delta.
//
// The strict difference is a difference where all operations that would be overwritten by the
// other delta are also removed (a.k.a. adding a file after removing it, or vice versa)
pub fn strict_difference(&self, other: &Self) -> Self {
let mut out = self.difference(other);
for (dir, added) in out.added.iter_mut() {
// Remove additions that are removed in the other delta
if let Some(other_removed) = other.removed.get(dir) {
added.retain(|k| !other_removed.contains(k));
}
}
for (dir, removed) in out.removed.iter_mut() {
// Remove removals that are re-added in the other delta
if let Some(other_added) = other.added.get(dir) {
removed.retain(|k| !other_added.contains(k));
}
}
out
}
/// Given a chain of deltas, ordered from last to first, calculate the "contribution" for each
/// state.
///
/// The contribution of a delta in a given chain is defined as the parts of the state produced
/// by this chain that are actually provided by this delta. This comes down to calculating the
/// strict difference of this delta and all of its successive deltas.
pub fn contributions<I>(deltas: I) -> Vec<State>
where
I: IntoIterator,
I::Item: Borrow<Delta>,
{
let mut contributions: Vec<State> = Vec::new();
let mut deltas = deltas.into_iter();
if let Some(first_delta) = deltas.next() {
// From last to first, we calculate the strict difference of the delta with the union of all its
// following deltas. The list of added files of this difference is the contribution for
// that delta.
contributions.push(first_delta.borrow().added.clone());
let mut union_future = first_delta.borrow().clone();
for delta in deltas {
contributions.push(delta.borrow().strict_difference(&union_future).added);
union_future = union_future.union(delta.borrow());
}
}
// contributions.reverse();
contributions
}
}

View File

@ -4,7 +4,7 @@ use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
/// Manages a collection of backup layers, allowing them to be utilized as a single object.
pub struct MetaManager<T>
@ -129,6 +129,17 @@ where
.map(|manager| manager.restore_backup(start_time, dirs))
}
pub fn export_backup<P: AsRef<Path>>(
&self,
layer: &str,
start_time: chrono::DateTime<Utc>,
output_path: P,
) -> Option<io::Result<()>> {
self.managers
.get(layer)
.map(|manager| manager.export_backup(start_time, output_path))
}
pub fn managers(&self) -> &HashMap<String, Manager<T>> {
&self.managers
}

View File

@ -4,15 +4,17 @@ mod meta;
pub use config::ManagerConfig;
pub use meta::MetaManager;
use super::Backup;
use super::{Backup, Delta, State};
use crate::other;
use chrono::SubsecRound;
use chrono::Utc;
use flate2::write::GzEncoder;
use flate2::Compression;
use serde::Deserialize;
use serde::Serialize;
use std::fs::File;
use std::fs::{File, OpenOptions};
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
/// Manages a single backup layer consisting of one or more chains of backups.
pub struct Manager<T>
@ -72,10 +74,10 @@ where
let mut backup = if !current_chain.is_empty() {
let previous_backup = current_chain.last().unwrap();
let state = Backup::state(current_chain);
let previous_state = State::from(current_chain.iter().map(|b| &b.delta));
Backup::create_from(
state,
previous_state,
previous_backup.start_time,
&self.backup_dir,
&self.dirs,
@ -187,6 +189,47 @@ where
Err(other("Unknown backup."))
}
pub fn export_backup<P: AsRef<Path>>(
&self,
start_time: chrono::DateTime<Utc>,
output_path: P,
) -> io::Result<()> {
// Iterate over each chain, skipping elements until the element with the given start time
// is possibly found.
for chain in &self.chains {
// If we find the element in the chain, restore the entire chain up to and including
// the element
if let Some(index) = chain
.iter()
.position(|b| b.start_time.trunc_subsecs(0) == start_time)
{
let contributions =
Delta::contributions(chain.iter().take(index + 1).map(|b| &b.delta).rev());
let tar_gz = OpenOptions::new()
.write(true)
.create(true)
.open(output_path.as_ref())?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
for (contribution, backup) in
contributions.iter().rev().zip(chain.iter().take(index + 1))
{
backup.append(&self.backup_dir, contribution, &mut ar)?;
}
let mut enc = ar.into_inner()?;
enc.try_finish()?;
enc.finish()?;
return Ok(());
}
}
Err(other("Unknown backup."))
}
/// Get a reference to the underlying chains
pub fn chains(&self) -> &Vec<Vec<Backup<T>>> {
&self.chains

View File

@ -2,11 +2,13 @@ mod delta;
mod io_ext;
pub mod manager;
mod path;
mod state;
use delta::Delta;
pub use manager::Manager;
pub use manager::ManagerConfig;
pub use manager::MetaManager;
pub use state::State;
use chrono::Utc;
use flate2::read::GzDecoder;
@ -14,7 +16,7 @@ use flate2::write::GzEncoder;
use flate2::Compression;
use path::PathExt;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::fmt;
use std::fs::File;
use std::io;
@ -61,18 +63,6 @@ impl<T: Clone> Backup<T> {
self.metadata = Some(metadata);
}
/// Resolve the state of the list of backups by applying their deltas in-order to an initially
/// empty state.
pub fn state(backups: &Vec<Self>) -> HashMap<PathBuf, HashSet<PathBuf>> {
let mut state: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
for backup in backups {
backup.delta.apply(&mut state);
}
state
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
@ -141,7 +131,7 @@ impl<T: Clone> Backup<T> {
///
/// The `Backup` instance describing this new backup.
pub fn create_from<P: AsRef<Path>>(
previous_state: HashMap<PathBuf, HashSet<PathBuf>>,
previous_state: State,
previous_start_time: chrono::DateTime<Utc>,
backup_dir: P,
dirs: &Vec<(PathBuf, PathBuf)>,
@ -247,6 +237,38 @@ impl<T: Clone> Backup<T> {
Ok(())
}
pub fn open<P: AsRef<Path>>(&self, backup_dir: P) -> io::Result<tar::Archive<GzDecoder<File>>> {
let path = Backup::path(backup_dir, self.start_time);
let tar_gz = File::open(path)?;
let enc = GzDecoder::new(tar_gz);
Ok(tar::Archive::new(enc))
}
/// Open this backup's archive and append all its files that are part of the provided state to
/// the archive file.
pub fn append<P: AsRef<Path>>(
&self,
backup_dir: P,
state: &State,
ar: &mut tar::Builder<GzEncoder<File>>,
) -> io::Result<()> {
let mut own_ar = self.open(backup_dir)?;
// println!("{:?}", &state);
for entry in own_ar.entries()? {
let entry = entry?;
let entry_path_in_tar = entry.path()?.to_path_buf();
if state.contains(&entry_path_in_tar) {
println!("{:?}", &entry_path_in_tar);
let header = entry.header().clone();
ar.append(&header, entry)?;
}
}
Ok(())
}
}
impl<T: Clone> fmt::Display for Backup<T> {

View File

@ -0,0 +1,86 @@
use crate::backup::Delta;
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::collections::{HashMap, HashSet};
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
/// Struct that represents a current state for a backup. This struct acts as a smart pointer around
/// a HashMap.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct State(HashMap<PathBuf, HashSet<PathBuf>>);
impl State {
pub fn new() -> Self {
State(HashMap::new())
}
/// Apply the delta to the current state.
pub fn apply(&mut self, delta: &Delta) {
// First we add new files, then we remove the old ones
for (dir, added) in delta.added.iter() {
if let Some(current) = self.0.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
self.0.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
if let Some(current) = self.0.get_mut(dir) {
current.retain(|k| !removed.contains(k));
}
}
}
/// Returns whether the provided relative path is part of the given state.
pub fn contains<P: AsRef<Path>>(&self, path: P) -> bool {
let path = path.as_ref();
self.0.iter().any(|(dir, files)| {
path.starts_with(dir) && files.contains(path.strip_prefix(dir).unwrap())
})
}
}
impl<T> From<T> for State
where
T: IntoIterator,
T::Item: Borrow<Delta>,
{
fn from(deltas: T) -> Self {
let mut state = State::new();
for delta in deltas {
state.apply(delta.borrow());
}
state
}
}
impl AsRef<HashMap<PathBuf, HashSet<PathBuf>>> for State {
fn as_ref(&self) -> &HashMap<PathBuf, HashSet<PathBuf>> {
&self.0
}
}
impl Deref for State {
type Target = HashMap<PathBuf, HashSet<PathBuf>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for State {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl Default for State {
fn default() -> Self {
Self::new()
}
}

View File

@ -14,6 +14,8 @@ pub enum BackupCommands {
Create(BackupCreateArgs),
/// Restore a backup
Restore(BackupRestoreArgs),
/// Export a backup into a full archive
Export(BackupExportArgs),
}
#[derive(Args)]
@ -36,13 +38,13 @@ pub struct BackupListArgs {
#[derive(Args)]
pub struct BackupRestoreArgs {
/// Path to the backup inside the backup directory
/// Path to the backup inside the backup directory to restore
path: PathBuf,
/// Directory to store config in
output_config: PathBuf,
/// Directory to store worlds in
output_worlds: PathBuf,
/// Whether to overwrite the contents of the existing directories
/// Whether to overwrite the contents of the output directories
#[arg(short, long, default_value_t = false)]
force: bool,
/// Create output directories if they don't exist
@ -50,12 +52,24 @@ pub struct BackupRestoreArgs {
make: bool,
}
#[derive(Args)]
pub struct BackupExportArgs {
/// Path to the backup inside the backup directory to export
path: PathBuf,
/// Path to store the exported archive
output: PathBuf,
/// Create output directories if they don't exist
#[arg(short, long, default_value_t = false)]
make: bool,
}
impl BackupArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
match &self.command {
BackupCommands::Create(args) => args.run(cli),
BackupCommands::List(args) => args.run(cli),
BackupCommands::Restore(args) => args.run(cli),
BackupCommands::Export(args) => args.run(cli),
}
}
}
@ -171,3 +185,48 @@ impl BackupListArgs {
Ok(())
}
}
impl BackupExportArgs {
pub fn run(&self, cli: &Cli) -> io::Result<()> {
let backup_dir = cli.backup.canonicalize()?;
if self.make {
if let Some(parent) = &self.output.parent() {
std::fs::create_dir_all(parent)?;
}
}
// Parse input path
let path = self.path.canonicalize()?;
if !path.starts_with(&backup_dir) {
return Err(other("Provided file is not inside the backup directory."));
}
let layer = if let Some(parent) = path.parent() {
// Backup files should be stored nested inside a layer's folder
if parent != backup_dir {
parent.file_name().unwrap().to_string_lossy()
} else {
return Err(other("Invalid path."));
}
} else {
return Err(other("Invalid path."));
};
let timestamp = if let Some(filename) = path.file_name() {
Utc.datetime_from_str(&filename.to_string_lossy(), Backup::FILENAME_FORMAT)
.map_err(|_| other("Invalid filename."))?
} else {
return Err(other("Invalid filename."));
};
let meta = cli.meta()?;
if let Some(res) = meta.export_backup(&layer, timestamp, &self.output) {
res
} else {
Err(other("Unknown layer"))
}
}
}