alex/src/backup/mod.rs

315 lines
10 KiB
Rust

mod delta;
mod io_ext;
pub mod manager;
mod path;
mod state;
use delta::Delta;
pub use manager::Manager;
pub use manager::ManagerConfig;
pub use manager::MetaManager;
pub use state::State;
use chrono::Utc;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
use path::PathExt;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fmt;
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
const BYTE_SUFFIXES: [&str; 5] = ["B", "KiB", "MiB", "GiB", "TiB"];
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub enum BackupType {
Full,
Incremental,
}
/// Represents a successful backup
#[derive(Serialize, Deserialize, Debug)]
pub struct Backup<T: Clone> {
/// When the backup was started (also corresponds to the name)
pub start_time: chrono::DateTime<Utc>,
/// When the backup finished
pub end_time: chrono::DateTime<Utc>,
pub size: usize,
/// Type of the backup
pub type_: BackupType,
pub delta: Delta,
/// Additional metadata that can be associated with a given backup
pub metadata: Option<T>,
}
impl Backup<()> {
pub const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz";
/// Return the path to a backup file by properly formatting the data.
pub fn path<P: AsRef<Path>>(backup_dir: P, start_time: chrono::DateTime<Utc>) -> PathBuf {
let backup_dir = backup_dir.as_ref();
let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT));
backup_dir.join(filename)
}
/// Extract an archive.
///
/// # Arguments
///
/// * `backup_path` - Path to the archive to extract
/// * `dirs` - list of tuples `(path_in_tar, dst_dir)` with `dst_dir` the directory on-disk
/// where the files stored under `path_in_tar` inside the tarball should be extracted to.
pub fn extract_archive<P: AsRef<Path>>(
archive_path: P,
dirs: &Vec<(PathBuf, PathBuf)>,
) -> io::Result<()> {
let tar_gz = File::open(archive_path)?;
let enc = GzDecoder::new(tar_gz);
let mut ar = tar::Archive::new(enc);
// Unpack each file by matching it with one of the destination directories and extracting
// it to the right path
for entry in ar.entries()? {
let mut entry = entry?;
let entry_path_in_tar = entry.path()?.to_path_buf();
for (path_in_tar, dst_dir) in dirs {
if entry_path_in_tar.starts_with(path_in_tar) {
let dst_path =
dst_dir.join(entry_path_in_tar.strip_prefix(path_in_tar).unwrap());
// Ensure all parent directories are present
std::fs::create_dir_all(dst_path.parent().unwrap())?;
entry.unpack(dst_path)?;
break;
}
}
}
Ok(())
}
}
impl<T: Clone> Backup<T> {
/// Set the backup's metadata.
pub fn set_metadata(&mut self, metadata: T) {
self.metadata = Some(metadata);
}
/// Create a new Full backup, populated with the given directories.
///
/// # Arguments
///
/// * `backup_dir` - Directory to store archive in
/// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name
/// under which `src_dir`'s contents should be stored in the archive
///
/// # Returns
///
/// The `Backup` instance describing this new backup.
pub fn create<P: AsRef<Path>>(
backup_dir: P,
dirs: &Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Backup::path(backup_dir, start_time);
let tar_gz = io_ext::CountingWrite::new(File::create(path)?);
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = Delta::new();
for (dir_in_tar, src_dir) in dirs {
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(src_dir).unwrap();
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar.to_path_buf(), added_files);
}
let mut enc = ar.into_inner()?;
// The docs recommend running try_finish before unwrapping using finish
enc.try_finish()?;
let tar_gz = enc.finish()?;
Ok(Backup {
type_: BackupType::Full,
start_time,
end_time: chrono::Utc::now(),
size: tar_gz.bytes_written(),
delta,
metadata: None,
})
}
/// Create a new Incremental backup from the given state, populated with the given directories.
///
/// # Arguments
///
/// * `previous_state` - State the file system was in during the previous backup in the chain
/// * `previous_start_time` - Start time of the previous backup; used to filter files
/// * `backup_dir` - Directory to store archive in
/// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name
/// under which `src_dir`'s contents should be stored in the archive
///
/// # Returns
///
/// The `Backup` instance describing this new backup.
pub fn create_from<P: AsRef<Path>>(
previous_state: State,
previous_start_time: chrono::DateTime<Utc>,
backup_dir: P,
dirs: &Vec<(PathBuf, PathBuf)>,
) -> io::Result<Self> {
let start_time = chrono::offset::Utc::now();
let path = Backup::path(backup_dir, start_time);
let tar_gz = io_ext::CountingWrite::new(File::create(path)?);
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut ar = tar::Builder::new(enc);
let mut delta = Delta::new();
for (dir_in_tar, src_dir) in dirs {
let mut all_files: HashSet<PathBuf> = HashSet::new();
let mut added_files: HashSet<PathBuf> = HashSet::new();
for entry in src_dir.read_dir_recursive()?.ignored("cache").files() {
let path = entry?.path();
let stripped = path.strip_prefix(src_dir).unwrap();
if !path.not_modified_since(previous_start_time) {
ar.append_path_with_name(&path, dir_in_tar.join(stripped))?;
added_files.insert(stripped.to_path_buf());
}
all_files.insert(stripped.to_path_buf());
}
delta.added.insert(dir_in_tar.clone(), added_files);
if let Some(previous_files) = previous_state.get(dir_in_tar) {
delta.removed.insert(
dir_in_tar.to_path_buf(),
previous_files.difference(&all_files).cloned().collect(),
);
}
}
let mut enc = ar.into_inner()?;
// The docs recommend running try_finish before unwrapping using finish
enc.try_finish()?;
let tar_gz = enc.finish()?;
Ok(Backup {
type_: BackupType::Incremental,
start_time,
end_time: chrono::Utc::now(),
size: tar_gz.bytes_written(),
delta,
metadata: None,
})
}
/// Restore the backup by extracting its contents to the respective directories.
///
/// # Arguments
///
/// * `backup_dir` - Backup directory where the file is stored
/// * `dirs` - list of tuples `(path_in_tar, dst_dir)` with `dst_dir` the directory on-disk
/// where the files stored under `path_in_tar` inside the tarball should be extracted to.
pub fn restore<P: AsRef<Path>>(
&self,
backup_dir: P,
dirs: &Vec<(PathBuf, PathBuf)>,
) -> io::Result<()> {
let backup_path = Backup::path(backup_dir, self.start_time);
Backup::extract_archive(backup_path, dirs)?;
// Remove any files
for (path_in_tar, dst_dir) in dirs {
if let Some(removed) = self.delta.removed.get(path_in_tar) {
for path in removed {
let dst_path = dst_dir.join(path);
std::fs::remove_file(dst_path)?;
}
}
}
Ok(())
}
pub fn open<P: AsRef<Path>>(&self, backup_dir: P) -> io::Result<tar::Archive<GzDecoder<File>>> {
let path = Backup::path(backup_dir, self.start_time);
let tar_gz = File::open(path)?;
let enc = GzDecoder::new(tar_gz);
Ok(tar::Archive::new(enc))
}
/// Open this backup's archive and append all its files that are part of the provided state to
/// the archive file.
pub fn append<P: AsRef<Path>>(
&self,
backup_dir: P,
state: &State,
ar: &mut tar::Builder<GzEncoder<File>>,
) -> io::Result<()> {
let mut own_ar = self.open(backup_dir)?;
for entry in own_ar.entries()? {
let entry = entry?;
let entry_path_in_tar = entry.path()?.to_path_buf();
if state.contains(&entry_path_in_tar) {
let header = entry.header().clone();
ar.append(&header, entry)?;
}
}
Ok(())
}
}
impl<T: Clone> fmt::Display for Backup<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let letter = match self.type_ {
BackupType::Full => 'F',
BackupType::Incremental => 'I',
};
// Pretty-print size
// If your backup is a petabyte or larger, this will crash and you need to re-evaluate your
// life choices
let index = self.size.ilog(1024) as usize;
let size = self.size as f64 / (1024.0_f64.powi(index as i32));
let duration = self.end_time - self.start_time;
write!(
f,
"{} ({}, {}m{}s, {:.2}{}, {})",
self.start_time.format(Backup::FILENAME_FORMAT),
letter,
duration.num_seconds() / 60,
duration.num_seconds() % 60,
size,
BYTE_SUFFIXES[index],
self.delta
)
}
}