mod delta; mod io_ext; pub mod manager; mod path; mod state; use delta::Delta; pub use manager::Manager; pub use manager::ManagerConfig; pub use manager::MetaManager; pub use state::State; use chrono::Utc; use flate2::read::GzDecoder; use flate2::write::GzEncoder; use flate2::Compression; use path::PathExt; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::fmt; use std::fs::File; use std::io; use std::path::{Path, PathBuf}; const BYTE_SUFFIXES: [&str; 5] = ["B", "KiB", "MiB", "GiB", "TiB"]; #[derive(Debug, PartialEq, Serialize, Deserialize)] pub enum BackupType { Full, Incremental, } /// Represents a successful backup #[derive(Serialize, Deserialize, Debug)] pub struct Backup { /// When the backup was started (also corresponds to the name) pub start_time: chrono::DateTime, /// When the backup finished pub end_time: chrono::DateTime, pub size: usize, /// Type of the backup pub type_: BackupType, pub delta: Delta, /// Additional metadata that can be associated with a given backup pub metadata: Option, } impl Backup<()> { pub const FILENAME_FORMAT: &str = "%Y-%m-%d_%H-%M-%S.tar.gz"; /// Return the path to a backup file by properly formatting the data. pub fn path>(backup_dir: P, start_time: chrono::DateTime) -> PathBuf { let backup_dir = backup_dir.as_ref(); let filename = format!("{}", start_time.format(Self::FILENAME_FORMAT)); backup_dir.join(filename) } /// Extract an archive. /// /// # Arguments /// /// * `backup_path` - Path to the archive to extract /// * `dirs` - list of tuples `(path_in_tar, dst_dir)` with `dst_dir` the directory on-disk /// where the files stored under `path_in_tar` inside the tarball should be extracted to. pub fn extract_archive>( archive_path: P, dirs: &Vec<(PathBuf, PathBuf)>, ) -> io::Result<()> { let tar_gz = File::open(archive_path)?; let enc = GzDecoder::new(tar_gz); let mut ar = tar::Archive::new(enc); // Unpack each file by matching it with one of the destination directories and extracting // it to the right path for entry in ar.entries()? { let mut entry = entry?; let entry_path_in_tar = entry.path()?.to_path_buf(); for (path_in_tar, dst_dir) in dirs { if entry_path_in_tar.starts_with(path_in_tar) { let dst_path = dst_dir.join(entry_path_in_tar.strip_prefix(path_in_tar).unwrap()); // Ensure all parent directories are present std::fs::create_dir_all(dst_path.parent().unwrap())?; entry.unpack(dst_path)?; break; } } } Ok(()) } } impl Backup { /// Set the backup's metadata. pub fn set_metadata(&mut self, metadata: T) { self.metadata = Some(metadata); } /// Create a new Full backup, populated with the given directories. /// /// # Arguments /// /// * `backup_dir` - Directory to store archive in /// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name /// under which `src_dir`'s contents should be stored in the archive /// /// # Returns /// /// The `Backup` instance describing this new backup. pub fn create>( backup_dir: P, dirs: &Vec<(PathBuf, PathBuf)>, ) -> io::Result { let start_time = chrono::offset::Utc::now(); let path = Backup::path(backup_dir, start_time); let tar_gz = io_ext::CountingWrite::new(File::create(path)?); let enc = GzEncoder::new(tar_gz, Compression::default()); let mut ar = tar::Builder::new(enc); let mut delta = Delta::new(); for (dir_in_tar, src_dir) in dirs { let mut added_files: HashSet = HashSet::new(); for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { let path = entry?.path(); let stripped = path.strip_prefix(src_dir).unwrap(); ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; added_files.insert(stripped.to_path_buf()); } delta.added.insert(dir_in_tar.to_path_buf(), added_files); } let mut enc = ar.into_inner()?; // The docs recommend running try_finish before unwrapping using finish enc.try_finish()?; let tar_gz = enc.finish()?; Ok(Backup { type_: BackupType::Full, start_time, end_time: chrono::Utc::now(), size: tar_gz.bytes_written(), delta, metadata: None, }) } /// Create a new Incremental backup from the given state, populated with the given directories. /// /// # Arguments /// /// * `previous_state` - State the file system was in during the previous backup in the chain /// * `previous_start_time` - Start time of the previous backup; used to filter files /// * `backup_dir` - Directory to store archive in /// * `dirs` - list of tuples `(path_in_tar, src_dir)` with `path_in_tar` the directory name /// under which `src_dir`'s contents should be stored in the archive /// /// # Returns /// /// The `Backup` instance describing this new backup. pub fn create_from>( previous_state: State, previous_start_time: chrono::DateTime, backup_dir: P, dirs: &Vec<(PathBuf, PathBuf)>, ) -> io::Result { let start_time = chrono::offset::Utc::now(); let path = Backup::path(backup_dir, start_time); let tar_gz = io_ext::CountingWrite::new(File::create(path)?); let enc = GzEncoder::new(tar_gz, Compression::default()); let mut ar = tar::Builder::new(enc); let mut delta = Delta::new(); for (dir_in_tar, src_dir) in dirs { let mut all_files: HashSet = HashSet::new(); let mut added_files: HashSet = HashSet::new(); for entry in src_dir.read_dir_recursive()?.ignored("cache").files() { let path = entry?.path(); let stripped = path.strip_prefix(src_dir).unwrap(); if !path.not_modified_since(previous_start_time) { ar.append_path_with_name(&path, dir_in_tar.join(stripped))?; added_files.insert(stripped.to_path_buf()); } all_files.insert(stripped.to_path_buf()); } delta.added.insert(dir_in_tar.clone(), added_files); if let Some(previous_files) = previous_state.get(dir_in_tar) { delta.removed.insert( dir_in_tar.to_path_buf(), previous_files.difference(&all_files).cloned().collect(), ); } } let mut enc = ar.into_inner()?; // The docs recommend running try_finish before unwrapping using finish enc.try_finish()?; let tar_gz = enc.finish()?; Ok(Backup { type_: BackupType::Incremental, start_time, end_time: chrono::Utc::now(), size: tar_gz.bytes_written(), delta, metadata: None, }) } /// Restore the backup by extracting its contents to the respective directories. /// /// # Arguments /// /// * `backup_dir` - Backup directory where the file is stored /// * `dirs` - list of tuples `(path_in_tar, dst_dir)` with `dst_dir` the directory on-disk /// where the files stored under `path_in_tar` inside the tarball should be extracted to. pub fn restore>( &self, backup_dir: P, dirs: &Vec<(PathBuf, PathBuf)>, ) -> io::Result<()> { let backup_path = Backup::path(backup_dir, self.start_time); Backup::extract_archive(backup_path, dirs)?; // Remove any files for (path_in_tar, dst_dir) in dirs { if let Some(removed) = self.delta.removed.get(path_in_tar) { for path in removed { let dst_path = dst_dir.join(path); std::fs::remove_file(dst_path)?; } } } Ok(()) } pub fn open>(&self, backup_dir: P) -> io::Result>> { let path = Backup::path(backup_dir, self.start_time); let tar_gz = File::open(path)?; let enc = GzDecoder::new(tar_gz); Ok(tar::Archive::new(enc)) } /// Open this backup's archive and append all its files that are part of the provided state to /// the archive file. pub fn append>( &self, backup_dir: P, state: &State, ar: &mut tar::Builder>, ) -> io::Result<()> { let mut own_ar = self.open(backup_dir)?; for entry in own_ar.entries()? { let entry = entry?; let entry_path_in_tar = entry.path()?.to_path_buf(); if state.contains(&entry_path_in_tar) { let header = entry.header().clone(); ar.append(&header, entry)?; } } Ok(()) } } impl fmt::Display for Backup { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let letter = match self.type_ { BackupType::Full => 'F', BackupType::Incremental => 'I', }; // Pretty-print size // If your backup is a petabyte or larger, this will crash and you need to re-evaluate your // life choices let index = self.size.ilog(1024) as usize; let size = self.size as f64 / (1024.0_f64.powi(index as i32)); let duration = self.end_time - self.start_time; write!( f, "{} ({}, {}m{}s, {:.2}{}, {})", self.start_time.format(Backup::FILENAME_FORMAT), letter, duration.num_seconds() / 60, duration.num_seconds() % 60, size, BYTE_SUFFIXES[index], self.delta ) } }