feat: possible incremental backup implementation using new abstraction

incremental-backups
Jef Roosens 2023-06-14 21:47:59 +02:00
parent b7a678e32f
commit fcc111b4ef
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
1 changed files with 204 additions and 28 deletions

View File

@ -5,6 +5,7 @@ use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[link(name = "c")]
extern "C" {
@ -14,27 +15,6 @@ extern "C" {
const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz";
pub enum BackupType {
Full,
Incremental,
}
/// Represents a successful backup
pub struct Backup {
previous: Option<Box<Backup>>,
/// When the backup was started (also corresponds to the name)
start_time: chrono::DateTime<Utc>,
/// Type of the backup
type_: BackupType,
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
}
fn files(src_dir: PathBuf) -> io::Result<HashSet<PathBuf>> {
let mut dirs = vec![src_dir.clone()];
let mut files: HashSet<PathBuf> = HashSet::new();
@ -58,8 +38,148 @@ fn files(src_dir: PathBuf) -> io::Result<HashSet<PathBuf>> {
Ok(files)
}
/// Return false only if we can say with certainty that the file wasn't modified since the given
/// timestamp, true otherwise.
fn modified_since<T: AsRef<Path>>(time: chrono::DateTime<Utc>, path: T) -> bool {
let path = path.as_ref();
if let Ok(metadata) = path.metadata() {
let last_modified = metadata.modified();
if let Ok(last_modified) = last_modified {
let t: chrono::DateTime<Utc> = last_modified.into();
let t = t.with_timezone(&Local);
return t >= time;
}
}
false
}
#[derive(PartialEq)]
pub enum BackupType {
Full,
Incremental,
}
#[derive(Debug)]
pub enum BackupError {
NoFullAncestor,
}
type BackupResult<T> = Result<T, BackupError>;
/// Represents the changes relative to the previous backup
pub struct BackupDelta {
/// What files were added/modified in each part of the tarball.
pub added: HashMap<PathBuf, HashSet<PathBuf>>,
/// What files were removed in this backup, in comparison to the previous backup. For full
/// backups, this will always be empty, as they do not consider previous backups.
/// The map stores a separate list for each top-level directory, as the contents of these
/// directories can come for different source directories.
pub removed: HashMap<PathBuf, HashSet<PathBuf>>,
}
impl BackupDelta {
pub fn new() -> Self {
BackupDelta {
added: HashMap::new(),
removed: HashMap::new(),
}
}
/// Update the current state so that its result becomes the merge of itself and the other
/// state.
pub fn merge(&mut self, delta: &BackupDelta) {
for (dir, added) in delta.added.iter() {
// Files that were removed in the current state, but added in the new state, are no
// longer removed
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.retain(|k| !added.contains(k));
}
// Newly added files are added to the state as well
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.extend(added.iter().cloned());
} else {
self.added.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in delta.removed.iter() {
// Files that were originally added, but now deleted are removed from the added list
if let Some(orig_added) = self.added.get_mut(dir) {
orig_added.retain(|k| !removed.contains(k));
}
// Newly removed files are added to the state as well
if let Some(orig_removed) = self.removed.get_mut(dir) {
orig_removed.extend(removed.iter().cloned());
} else {
self.removed.insert(dir.clone(), removed.clone());
}
}
}
/// Modify the given state by applying this delta's changes to it
pub fn apply(&self, state: &mut HashMap<PathBuf, HashSet<PathBuf>>) {
// First we add new files, then we remove the old ones
for (dir, added) in self.added.iter() {
if let Some(current) = state.get_mut(dir) {
current.extend(added.iter().cloned());
} else {
state.insert(dir.clone(), added.clone());
}
}
for (dir, removed) in self.removed.iter() {
if let Some(current) = state.get_mut(dir) {
current.retain(|k| !removed.contains(k));
}
}
}
}
/// Represents a successful backup
pub struct Backup {
previous: Option<Arc<Backup>>,
/// When the backup was started (also corresponds to the name)
start_time: chrono::DateTime<Utc>,
/// Type of the backup
type_: BackupType,
delta: BackupDelta,
}
impl Backup {
/// Create a new full backup
/// Calculate the full state of the backup by applying all its ancestors delta's in order,
/// starting from the last full ancestor.
pub fn state(&self) -> BackupResult<HashMap<PathBuf, HashSet<PathBuf>>> {
if self.type_ == BackupType::Full {
let mut state = HashMap::new();
self.delta.apply(&mut state);
Ok(state)
} else if let Some(previous) = &self.previous {
let mut state = previous.state()?;
self.delta.apply(&mut state);
Ok(state)
} else {
return Err(BackupError::NoFullAncestor);
}
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
///
/// * `backup_dir` - Directory to store archive in
/// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name
/// under which `src_dir`'s contents should be stored in the archive
///
/// # Returns
///
/// The `Backup` instance describing this new backup.
pub fn create<P: AsRef<Path>>(
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
@ -79,7 +199,7 @@ impl Backup {
let files = files(src_dir.clone())?;
for path in &files {
ar.append_path_with_name(dir_in_tar.join(&path), src_dir.join(&path))?;
ar.append_path_with_name(dir_in_tar.join(path), src_dir.join(path))?;
}
added.insert(dir_in_tar, files);
@ -89,8 +209,58 @@ impl Backup {
previous: None,
type_: BackupType::Full,
start_time,
delta: BackupDelta {
added,
removed: HashMap::new(),
},
})
}
/// Create a new incremental backup from a given previous backup
pub fn create_from<P: AsRef<Path>>(
previous: Arc<Backup>,
backup_dir: P,
dirs: Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let backup_dir = backup_dir.as_ref();
let start_time = chrono::offset::Utc::now();
let filename = format!("{}", start_time.format(FILENAME_FORMAT));
let path = backup_dir.join(filename);
let tar_gz = File::create(path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let previous_state = previous.state().unwrap();
let mut delta = BackupDelta::new();
for (dir_in_tar, src_dir) in dirs {
let files = files(src_dir.clone())?;
let added_files = files
.iter()
.filter(|p| modified_since(previous.start_time, p))
.cloned()
.collect::<HashSet<PathBuf>>();
for path in added_files.iter() {
ar.append_path_with_name(dir_in_tar.join(path), src_dir.join(path))?;
}
delta.added.insert(dir_in_tar.clone(), added_files);
if let Some(previous_files) = previous_state.get(&dir_in_tar) {
delta.removed.insert(
dir_in_tar,
previous_files.difference(&files).cloned().collect(),
);
}
}
Ok(Backup {
previous: Some(previous),
type_: BackupType::Incremental,
start_time,
delta,
})
}
}
@ -100,7 +270,7 @@ pub struct BackupManager {
config_dir: PathBuf,
world_dir: PathBuf,
max_backups: u64,
last_backup: Option<Backup>,
last_backup: Option<Arc<Backup>>,
}
impl BackupManager {
@ -124,10 +294,16 @@ impl BackupManager {
(PathBuf::from("config"), self.config_dir.clone()),
(PathBuf::from("worlds"), self.world_dir.clone()),
];
if let Some(last_backup) = &self.last_backup {
todo!();
let clone = last_backup.clone();
self.last_backup = Some(Arc::new(Backup::create_from(
clone,
&self.backup_dir,
dirs,
)?));
} else {
self.last_backup = Some(Backup::create(&self.backup_dir, dirs)?);
self.last_backup = Some(Arc::new(Backup::create(&self.backup_dir, dirs)?));
}
Ok(())