From 358896c7d5a372dd8c13cfcfdf1b17254e074d3d Mon Sep 17 00:00:00 2001 From: Froidefond Julien Date: Thu, 12 Mar 2026 22:15:41 +0100 Subject: [PATCH] =?UTF-8?q?perf(indexer):=20=C3=A9liminer=20le=20pre-count?= =?UTF-8?q?=20WalkDir=20en=20mode=20incr=C3=A9mental=20+=20concurrence=20a?= =?UTF-8?q?daptative?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Incremental rebuild: remplace le WalkDir de comptage par un COUNT(*) SQL → incrémental 67s → 25s (-62%) sur disque externe - Full rebuild: conserve le WalkDir (DB vidée avant le comptage) - Concurrence par défaut: num_cpus/2 clampé [2,8] au lieu de 2 fixe - Ajoute num_cpus comme dépendance workspace - Backoffice jobs: un seul formulaire avec formAction par bouton (icônes rétablies) - infra/perf.sh: corrige l'endpoint /index/jobs/:id (pas /details), exporte BASE_API/TOKEN Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 17 +++++ Cargo.toml | 1 + apps/backoffice/app/jobs/page.tsx | 114 +++++++++--------------------- apps/indexer/Cargo.toml | 1 + apps/indexer/src/analyzer.rs | 5 +- apps/indexer/src/job.rs | 55 ++++++++------ 6 files changed, 92 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2cee2d..694b41f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -863,6 +863,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1171,6 +1177,7 @@ dependencies = [ "futures", "image", "notify", + "num_cpus", "parsers", "rand 0.8.5", "rayon", @@ -1639,6 +1646,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.21.3" diff --git a/Cargo.toml b/Cargo.toml index 6064a7d..2744d41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } uuid = { version = "1.12", features = ["serde", "v4"] } natord = "1.0" +num_cpus = "1.16" pdfium-render = { version = "0.8", default-features = false, features = ["pdfium_latest", "image_latest", "thread_safe"] } unrar = "0.5" walkdir = "2.5" diff --git a/apps/backoffice/app/jobs/page.tsx b/apps/backoffice/app/jobs/page.tsx index 8944f43..8dea8dc 100644 --- a/apps/backoffice/app/jobs/page.tsx +++ b/apps/backoffice/app/jobs/page.tsx @@ -2,7 +2,7 @@ import { revalidatePath } from "next/cache"; import { redirect } from "next/navigation"; import { listJobs, fetchLibraries, rebuildIndex, rebuildThumbnails, regenerateThumbnails, IndexJobDto, LibraryDto } from "../../lib/api"; import { JobsList } from "../components/JobsList"; -import { Card, CardHeader, CardTitle, CardDescription, CardContent, Button, FormField, FormSelect, FormRow } from "../components/ui"; +import { Card, CardHeader, CardTitle, CardContent, Button, FormField, FormSelect, FormRow } from "../components/ui"; export const dynamic = "force-dynamic"; @@ -57,100 +57,54 @@ export default async function JobsPage({ searchParams }: { searchParams: Promise Index Jobs - + Queue New Job - Rebuild index, full rebuild, generate missing thumbnails, or regenerate all thumbnails - -
+ + - + {libraries.map((lib) => ( - + ))} - - - - -
- - - - - {libraries.map((lib) => ( - - ))} - - - - -
- -
- - - - - {libraries.map((lib) => ( - - ))} - - - - -
- -
- - - - - {libraries.map((lib) => ( - - ))} - - - +
+ + + + +
- - ThumbnailConfig { } async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize { - let default_concurrency = 2; + // Default: half the logical CPUs, clamped between 2 and 8. + // Archive extraction is I/O bound but benefits from moderate parallelism. + let cpus = num_cpus::get(); + let default_concurrency = (cpus / 2).clamp(2, 8); let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#) .fetch_optional(pool) .await; diff --git a/apps/indexer/src/job.rs b/apps/indexer/src/job.rs index c1758b0..2096d8f 100644 --- a/apps/indexer/src/job.rs +++ b/apps/indexer/src/job.rs @@ -238,27 +238,42 @@ pub async fn process_job( .await? }; - // Count total files for progress estimation - let library_paths: Vec = libraries - .iter() - .map(|library| { - crate::utils::remap_libraries_path(&library.get::("root_path")) - }) - .collect(); + // Count total files for progress estimation. + // For incremental rebuilds, use the DB count (instant) — the filesystem will be walked + // once during discovery anyway, no need for a second full WalkDir pass. + // For full rebuilds, the DB is already cleared, so we must walk the filesystem. + let library_ids: Vec = libraries.iter().map(|r| r.get("id")).collect(); - let total_files: usize = library_paths - .par_iter() - .map(|root_path| { - walkdir::WalkDir::new(root_path) - .into_iter() - .filter_map(Result::ok) - .filter(|entry| { - entry.file_type().is_file() - && parsers::detect_format(entry.path()).is_some() - }) - .count() - }) - .sum(); + let total_files: usize = if !is_full_rebuild { + let count: i64 = sqlx::query_scalar( + "SELECT COUNT(*) FROM book_files bf JOIN books b ON b.id = bf.book_id WHERE b.library_id = ANY($1)" + ) + .bind(&library_ids) + .fetch_one(&state.pool) + .await + .unwrap_or(0); + count as usize + } else { + let library_paths: Vec = libraries + .iter() + .map(|library| { + crate::utils::remap_libraries_path(&library.get::("root_path")) + }) + .collect(); + library_paths + .par_iter() + .map(|root_path| { + walkdir::WalkDir::new(root_path) + .into_iter() + .filter_map(Result::ok) + .filter(|entry| { + entry.file_type().is_file() + && parsers::detect_format(entry.path()).is_some() + }) + .count() + }) + .sum() + }; info!( "[JOB] Found {} libraries, {} total files to index",