From 67b4640e569e477196e7e5b2eddf18425e2e0a7a Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 15 Jun 2024 18:12:14 +0200 Subject: [PATCH] feat: add package cleaning --- server/src/db/query/package.rs | 47 +++++++++++++++++++++++++++------- server/src/repo/manager2.rs | 34 +++++++++++++++++++----- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/server/src/db/query/package.rs b/server/src/db/query/package.rs index 5e400ea..0115f5b 100644 --- a/server/src/db/query/package.rs +++ b/server/src/db/query/package.rs @@ -2,7 +2,7 @@ use crate::db::{self, *}; use futures::Stream; use sea_orm::{sea_query::IntoCondition, *}; -use sea_query::{Alias, Expr, Query}; +use sea_query::{Alias, Expr, Query, SelectStatement}; use serde::Deserialize; #[derive(Deserialize)] @@ -218,11 +218,15 @@ pub async fn full(conn: &DbConn, id: i32) -> Result> { #[derive(FromQueryResult)] pub struct PkgToRemove { - repo_id: i32, - id: i32, + pub repo_id: i32, + pub id: i32, } -pub fn to_be_removed_query(conn: &DbConn) -> SelectorRaw> { +fn stale_pkgs_query(include_repo: bool) -> SelectStatement { + // In each repository, only one version of a package can exist for any given arch. Because ids + // are monotonically increasing, we know that the row that represents the actual package + // currently in the repository is the row with the largest id whose state is "committed". This + // query finds this id for each (repo, arch, name) tuple. let mut max_id_query = Query::select(); max_id_query .from(db::package::Entity) @@ -243,12 +247,23 @@ pub fn to_be_removed_query(conn: &DbConn) -> SelectorRaw SelectorRaw SelectorRaw> { + let query = stale_pkgs_query(true); let builder = conn.get_database_backend(); let sql = builder.build(&query); PkgToRemove::find_by_statement(sql) } + +pub async fn delete_stale_pkgs(conn: &DbConn, max_id: i32) -> crate::Result<()> { + Ok(db::Package::delete_many() + .filter(db::package::Column::Id.lte(max_id)) + .filter(db::package::Column::Id.in_subquery(stale_pkgs_query(false))) + .exec(conn) + .await + .map(|_| ())?) +} diff --git a/server/src/repo/manager2.rs b/server/src/repo/manager2.rs index b0df209..67d36eb 100644 --- a/server/src/repo/manager2.rs +++ b/server/src/repo/manager2.rs @@ -1,5 +1,5 @@ use super::{archive, package}; -use crate::db; +use crate::db::{self, query::package::delete_stale_pkgs}; use std::{ collections::HashMap, @@ -103,16 +103,38 @@ impl RepoMgr { } /// Clean any remaining old package files from the database and file system - pub async fn clean(&self) -> crate::Result<()> { - let mut pkgs = db::query::package::to_be_removed_query(&self.conn) + pub async fn remove_stale_pkgs(&self) -> crate::Result<()> { + let mut pkgs = db::query::package::stale_pkgs(&self.conn) .stream(&self.conn) .await?; + let mut max_id = -1; + let mut removed_pkgs = 0; + + // TODO track largest ID seen, then perform similar query to above except we remove the + // matched IDs, but only if they're smaller than or equal to the largest seen ID so we + // don't remove newly added packages while let Some(pkg) = pkgs.next().await.transpose()? { - // TODO remove package from file system and database + // Failing to remove the package file isn't the biggest problem + let _ = tokio::fs::remove_file( + self.repos_dir + .join(pkg.repo_id.to_string()) + .join(pkg.id.to_string()), + ) + .await; + + if pkg.id > max_id { + max_id = pkg.id; + } + + removed_pkgs += 1; } - // TODO log indicating how many packages were cleaned + if removed_pkgs > 0 { + db::query::package::delete_stale_pkgs(&self.conn, max_id).await?; + } + + tracing::info!("Removed {removed_pkgs} stale package(s)"); Ok(()) } @@ -233,7 +255,7 @@ impl RepoMgr { // TODO move this so that we only clean if entire queue is empty, not just // queue for specific repo - let _ = self.clean().await; + let _ = self.remove_stale_pkgs().await; } } }