From e3b0f4f0a1c7cf55c8c35d3e370877b7c4920411 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 27 Jun 2024 11:39:04 +0200 Subject: [PATCH 1/2] feat: chunk large database inserts --- server/src/db/query/package.rs | 44 ++++++++++++++++++++++------------ server/src/main.rs | 1 + server/src/util.rs | 23 ++++++++++++++++++ 3 files changed, 53 insertions(+), 15 deletions(-) create mode 100644 server/src/util.rs diff --git a/server/src/db/query/package.rs b/server/src/db/query/package.rs index ad9d74a..9a8be5f 100644 --- a/server/src/db/query/package.rs +++ b/server/src/db/query/package.rs @@ -4,6 +4,9 @@ use sea_orm::{sea_query::IntoCondition, *}; use sea_query::{Alias, Expr, Query, SelectStatement}; use serde::Deserialize; +/// How many fields may be inserted at once into the database. +const PACKAGE_INSERT_LIMIT: usize = 1000; + #[derive(Deserialize)] pub struct Filter { repo: Option, @@ -160,23 +163,34 @@ pub async fn insert( .iter() .map(|s| (PackageRelatedEnum::Optdepend, s)), ); + let related = crate::util::Chunked::new(related, PACKAGE_INSERT_LIMIT); - PackageRelated::insert_many(related.map(|(t, s)| package_related::ActiveModel { - package_id: Set(pkg_entry.id), - r#type: Set(t), - name: Set(s.to_string()), - })) - .on_empty_do_nothing() - .exec(&txn) - .await?; + for chunk in related { + PackageRelated::insert_many( + chunk + .into_iter() + .map(|(t, s)| package_related::ActiveModel { + package_id: Set(pkg_entry.id), + r#type: Set(t), + name: Set(s.to_string()), + }), + ) + .on_empty_do_nothing() + .exec(&txn) + .await?; + } - PackageFile::insert_many(pkg.files.iter().map(|s| package_file::ActiveModel { - package_id: Set(pkg_entry.id), - path: Set(s.display().to_string()), - })) - .on_empty_do_nothing() - .exec(&txn) - .await?; + let files = crate::util::Chunked::new(pkg.files, PACKAGE_INSERT_LIMIT); + + for chunk in files { + PackageFile::insert_many(chunk.into_iter().map(|s| package_file::ActiveModel { + package_id: Set(pkg_entry.id), + path: Set(s.display().to_string()), + })) + .on_empty_do_nothing() + .exec(&txn) + .await?; + } txn.commit().await?; diff --git a/server/src/main.rs b/server/src/main.rs index 5a91fdb..cb66668 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -3,6 +3,7 @@ mod config; pub mod db; mod error; mod repo; +mod util; mod web; pub use config::{Config, DbConfig, FsConfig}; diff --git a/server/src/util.rs b/server/src/util.rs new file mode 100644 index 0000000..9aad122 --- /dev/null +++ b/server/src/util.rs @@ -0,0 +1,23 @@ +pub struct Chunked { + iter: I, + chunk_size: usize, +} + +impl Chunked { + pub fn new>(into: T, chunk_size: usize) -> Self { + Self { + iter: into.into_iter(), + chunk_size, + } + } +} + +// https://users.rust-lang.org/t/how-to-breakup-an-iterator-into-chunks/87915/5 +impl Iterator for Chunked { + type Item = Vec; + + fn next(&mut self) -> Option { + Some(self.iter.by_ref().take(self.chunk_size).collect()) + .filter(|chunk: &Vec<_>| !chunk.is_empty()) + } +} From 86ab143271ecb866a8cce1db2234e5d0d587dbd3 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 27 Jun 2024 13:52:07 +0200 Subject: [PATCH 2/2] fix(package): ignore all files that start with a dot --- server/src/repo/package.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/server/src/repo/package.rs b/server/src/repo/package.rs index 70466ba..996f933 100644 --- a/server/src/repo/package.rs +++ b/server/src/repo/package.rs @@ -13,8 +13,6 @@ use libarchive::{ }; use sea_orm::ActiveValue::Set; -const IGNORED_FILES: [&str; 5] = [".BUILDINFO", ".INSTALL", ".MTREE", ".PKGINFO", ".CHANGELOG"]; - #[derive(Debug, Clone)] pub struct Package { pub path: PathBuf, @@ -158,11 +156,9 @@ impl Package { let entry = entry?; let path_name = entry.pathname(); - if !IGNORED_FILES.iter().any(|p| p == &path_name) { + if !path_name.starts_with('.') { files.push(PathBuf::from(path_name)); - } - - if path_name == ".PKGINFO" { + } else if path_name == ".PKGINFO" { info = Some(PkgInfo::parse(entry)?); } }