feat: enhance thumbnail management with full rebuild functionality

- Extend thumbnail regeneration logic to support full rebuilds, allowing for the deletion of orphaned thumbnails. - Implement database updates to clear thumbnail paths for books during regeneration and full rebuild processes. - Improve logging to provide detailed insights on the number of deleted thumbnails and cleared database entries. - Refactor code for better organization and clarity in handling thumbnail files.
feat: enhance library scanning and metadata parsing
2026-03-08 21:10:34 +01:00 · 2026-03-08 21:07:03 +01:00 · 2026-03-08 21:03:04 +01:00
4 changed files with 261 additions and 89 deletions
--- a/apps/api/src/main.rs
+++ b/apps/api/src/main.rs
@@ -32,6 +32,8 @@ use stripstream_core::config::ApiConfig;
 use sqlx::postgres::PgPoolOptions;
 use tokio::sync::{Mutex, Semaphore};
 use tracing::info;
+use sqlx::{Pool, Postgres, Row};
+use serde_json::Value;

 #[derive(Clone)]
 struct AppState {
@@ -66,6 +68,25 @@ impl Metrics {
    }
 }

+async fn load_concurrent_renders(pool: &Pool<Postgres>) -> usize {
+    let default_concurrency = 8;
+    let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
+        .fetch_optional(pool)
+        .await;
+
+    match row {
+        Ok(Some(row)) => {
+            let value: Value = row.get("value");
+            value
+                .get("concurrent_renders")
+                .and_then(|v: &Value| v.as_u64())
+                .map(|v| v as usize)
+                .unwrap_or(default_concurrency)
+        }
+        _ => default_concurrency,
+    }
+}
+
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    tracing_subscriber::fmt()
@@ -80,13 +101,17 @@ async fn main() -> anyhow::Result<()> {
        .connect(&config.database_url)
        .await?;

+    // Load concurrent_renders from settings, default to 8
+    let concurrent_renders = load_concurrent_renders(&pool).await;
+    info!("Using concurrent_renders limit: {}", concurrent_renders);
+
    let state = AppState {
        pool,
        bootstrap_token: Arc::from(config.api_bootstrap_token),
        meili_url: Arc::from(config.meili_url),
        meili_master_key: Arc::from(config.meili_master_key),
        page_cache: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(512).expect("non-zero")))),
-        page_render_limit: Arc::new(Semaphore::new(8)),
+        page_render_limit: Arc::new(Semaphore::new(concurrent_renders)),
        metrics: Arc::new(Metrics::new()),
        read_rate_limit: Arc::new(Mutex::new(ReadRateLimit {
            window_started_at: Instant::now(),
--- a/apps/api/src/thumbnails.rs
+++ b/apps/api/src/thumbnails.rs
@@ -1,4 +1,6 @@
 use std::path::Path;
+use std::sync::atomic::{AtomicI32, Ordering};
+use std::sync::Arc;

 use anyhow::Context;
 use axum::{
@@ -6,6 +8,7 @@ use axum::{
    http::StatusCode,
    Json,
 };
+use futures::stream::{self, StreamExt};
 use image::GenericImageView;
 use serde::Deserialize;
 use sqlx::Row;
@@ -24,6 +27,25 @@ struct ThumbnailConfig {
    directory: String,
 }

+async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize {
+    let default_concurrency = 4;
+    let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
+        .fetch_optional(pool)
+        .await;
+
+    match row {
+        Ok(Some(row)) => {
+            let value: serde_json::Value = row.get("value");
+            value
+                .get("concurrent_renders")
+                .and_then(|v| v.as_u64())
+                .map(|v| v as usize)
+                .unwrap_or(default_concurrency)
+        }
+        _ => default_concurrency,
+    }
+}
+
 async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
    let fallback = ThumbnailConfig {
        enabled: true,
@@ -115,8 +137,74 @@ async fn run_checkup(state: AppState, job_id: Uuid) {
        }
    };

-    // Regenerate: clear existing thumbnails in scope so they get regenerated
-    if job_type == "thumbnail_regenerate" {
+    // Regenerate or full_rebuild: clear existing thumbnails in scope so they get regenerated
+    if job_type == "thumbnail_regenerate" || job_type == "full_rebuild" {
+        let config = load_thumbnail_config(pool).await;
+        
+        if job_type == "full_rebuild" {
+            // For full_rebuild: delete orphaned thumbnail files (books were deleted, new ones have new UUIDs)
+            // Get all existing book IDs to keep their thumbnails
+            let existing_book_ids: std::collections::HashSet<Uuid> = sqlx::query_scalar(
+                r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL)"#,
+            )
+            .bind(library_id)
+            .fetch_all(pool)
+            .await
+            .unwrap_or_default()
+            .into_iter()
+            .collect();
+
+            // Delete thumbnail files that don't correspond to existing books
+            let thumbnail_dir = Path::new(&config.directory);
+            if thumbnail_dir.exists() {
+                let mut deleted_count = 0;
+                if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
+                    for entry in entries.flatten() {
+                        if let Some(file_name) = entry.file_name().to_str() {
+                            if file_name.ends_with(".webp") {
+                                if let Some(book_id_str) = file_name.strip_suffix(".webp") {
+                                    if let Ok(book_id) = Uuid::parse_str(book_id_str) {
+                                        if !existing_book_ids.contains(&book_id) {
+                                            if let Err(e) = std::fs::remove_file(entry.path()) {
+                                                warn!("Failed to delete orphaned thumbnail {}: {}", entry.path().display(), e);
+                                            } else {
+                                                deleted_count += 1;
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                info!("thumbnails full_rebuild: deleted {} orphaned thumbnail files", deleted_count);
+            }
+        } else {
+            // For regenerate: delete thumbnail files for books with thumbnails
+            let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
+                r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
+            )
+            .bind(library_id)
+            .fetch_all(pool)
+            .await
+            .unwrap_or_default();
+
+            let mut deleted_count = 0;
+            for book_id in &book_ids_to_clear {
+                let filename = format!("{}.webp", book_id);
+                let thumbnail_path = Path::new(&config.directory).join(&filename);
+                if thumbnail_path.exists() {
+                    if let Err(e) = std::fs::remove_file(&thumbnail_path) {
+                        warn!("Failed to delete thumbnail file {}: {}", thumbnail_path.display(), e);
+                    } else {
+                        deleted_count += 1;
+                    }
+                }
+            }
+            info!("thumbnails regenerate: deleted {} thumbnail files", deleted_count);
+        }
+
+        // Clear thumbnail_path in database
        let cleared = sqlx::query(
            r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#,
        )
@@ -124,7 +212,7 @@ async fn run_checkup(state: AppState, job_id: Uuid) {
        .execute(pool)
        .await;
        if let Ok(res) = cleared {
-            info!("thumbnails regenerate: cleared {} books", res.rows_affected());
+            info!("thumbnails {}: cleared {} books in database", job_type, res.rows_affected());
        }
    }

@@ -156,7 +244,24 @@ async fn run_checkup(state: AppState, job_id: Uuid) {
    .execute(pool)
    .await;

-    for (i, &book_id) in book_ids.iter().enumerate() {
+    let concurrency = load_thumbnail_concurrency(pool).await;
+    let processed_count = Arc::new(AtomicI32::new(0));
+    let pool_clone = pool.clone();
+    let job_id_clone = job_id;
+    let config_clone = config.clone();
+    let state_clone = state.clone();
+
+    let total_clone = total;
+    stream::iter(book_ids)
+        .for_each_concurrent(concurrency, |book_id| {
+            let processed_count = processed_count.clone();
+            let pool = pool_clone.clone();
+            let job_id = job_id_clone;
+            let config = config_clone.clone();
+            let state = state_clone.clone();
+            let total = total_clone;
+
+            async move {
                match pages::render_book_page_1(&state, book_id, config.width, config.quality).await {
                    Ok(page_bytes) => {
                        match generate_thumbnail(&page_bytes, &config) {
@@ -165,19 +270,19 @@ async fn run_checkup(state: AppState, job_id: Uuid) {
                                    if sqlx::query("UPDATE books SET thumbnail_path = $1 WHERE id = $2")
                                        .bind(&path)
                                        .bind(book_id)
-                                .execute(pool)
+                                        .execute(&pool)
                                        .await
                                        .is_ok()
                                    {
-                                let processed = (i + 1) as i32;
-                                let percent = ((i + 1) as f64 / total as f64 * 100.0) as i32;
+                                        let processed = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
+                                        let percent = (processed as f64 / total as f64 * 100.0) as i32;
                                        let _ = sqlx::query(
                                            "UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
                                        )
                                        .bind(job_id)
                                        .bind(processed)
                                        .bind(percent)
-                                .execute(pool)
+                                        .execute(&pool)
                                        .await;
                                    }
                                }
@@ -188,6 +293,8 @@ async fn run_checkup(state: AppState, job_id: Uuid) {
                    Err(e) => warn!("render page 1 failed for book {}: {:?}", book_id, e),
                }
            }
+        })
+        .await;

    let _ = sqlx::query(
        "UPDATE index_jobs SET status = 'success', finished_at = NOW(), progress_percent = 100, current_file = NULL WHERE id = $1",
--- a/apps/backoffice/app/settings/SettingsPage.tsx
+++ b/apps/backoffice/app/settings/SettingsPage.tsx
@@ -247,7 +247,7 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
            <Icon name="performance" size="md" />
            Performance Limits
          </CardTitle>
-          <CardDescription>Configure API performance and rate limiting</CardDescription>
+          <CardDescription>Configure API performance, rate limiting, and thumbnail generation concurrency</CardDescription>
        </CardHeader>
        <CardContent>
          <div className="space-y-4">
@@ -266,6 +266,9 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
                  }}
                  onBlur={() => handleUpdateSetting("limits", settings.limits)}
                />
+                <p className="text-xs text-muted-foreground mt-1">
+                  Maximum number of page renders and thumbnail generations running in parallel
+                </p>
              </FormField>
              <FormField className="flex-1">
                <label className="text-sm font-medium text-muted-foreground mb-1 block">Timeout (seconds)</label>
@@ -299,7 +302,7 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
              </FormField>
            </FormRow>
            <p className="text-sm text-muted-foreground">
-              Note: Changes to limits require a server restart to take effect.
+              Note: Changes to limits require a server restart to take effect. The "Concurrent Renders" setting controls both page rendering and thumbnail generation parallelism.
            </p>
          </div>
        </CardContent>
@@ -424,7 +427,7 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
            </div>

            <p className="text-sm text-muted-foreground">
-              Note: Thumbnail settings are used during indexing. Existing thumbnails will not be regenerated automatically.
+              Note: Thumbnail settings are used during indexing. Existing thumbnails will not be regenerated automatically. The concurrency for thumbnail generation is controlled by the "Concurrent Renders" setting in Performance Limits above.
            </p>
          </div>
        </CardContent>
--- a/apps/indexer/src/main.rs
+++ b/apps/indexer/src/main.rs
@@ -3,13 +3,13 @@ use axum::{extract::State, routing::get, Json, Router};
 use chrono::{DateTime, Utc};
 use axum::http::StatusCode;
 use notify::{Event, RecommendedWatcher, RecursiveMode, Watcher};
-use parsers::{detect_format, parse_metadata, BookFormat};
+use parsers::{detect_format, parse_metadata, BookFormat, ParsedMetadata};
 use rayon::prelude::*;
 use serde::Serialize;
 use sha2::{Digest, Sha256};
 use sqlx::{postgres::PgPoolOptions, Row};
 use std::{collections::HashMap, path::Path, time::Duration};
-use stripstream_core::config::{IndexerConfig, ThumbnailConfig};
+use stripstream_core::config::IndexerConfig;
 use tokio::sync::mpsc;
 use tracing::{error, info, trace, warn};
 use uuid::Uuid;
@@ -38,7 +38,6 @@ struct AppState {
    pool: sqlx::PgPool,
    meili_url: String,
    meili_master_key: String,
-    thumbnail_config: ThumbnailConfig,
    api_base_url: String,
    api_bootstrap_token: String,
 }
@@ -69,7 +68,6 @@ async fn main() -> anyhow::Result<()> {
        pool,
        meili_url: config.meili_url.clone(),
        meili_master_key: config.meili_master_key.clone(),
-        thumbnail_config: config.thumbnail_config.clone(),
        api_base_url: config.api_base_url.clone(),
        api_bootstrap_token: config.api_bootstrap_token.clone(),
    };
@@ -878,28 +876,78 @@ async fn scan_library(
    let mut files_to_insert: Vec<FileInsert> = Vec::with_capacity(BATCH_SIZE);
    let mut errors_to_insert: Vec<ErrorInsert> = Vec::with_capacity(BATCH_SIZE);

+    // Step 1: Collect all book files first
+    #[derive(Clone)]
+    struct FileInfo {
+        path: std::path::PathBuf,
+        format: BookFormat,
+        abs_path: String,
+        file_name: String,
+        metadata: std::fs::Metadata,
+        mtime: DateTime<Utc>,
+        fingerprint: String,
+        lookup_path: String,
+    }
+
+    let mut file_infos: Vec<FileInfo> = Vec::new();
    for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
        if !entry.file_type().is_file() {
            continue;
        }

-        let path = entry.path();
-        let Some(format) = detect_format(path) else {
+        let path = entry.path().to_path_buf();
+        let Some(format) = detect_format(&path) else {
            trace!("[SCAN] Skipping non-book file: {}", path.display());
            continue;
        };

        info!("[SCAN] Found book file: {} (format: {:?})", path.display(), format);
        stats.scanned_files += 1;
-        library_processed_count += 1;
-        *total_processed_count += 1;
+        
        let abs_path_local = path.to_string_lossy().to_string();
        let abs_path = unmap_libraries_path(&abs_path_local);
        let file_name = path.file_name()
            .map(|s| s.to_string_lossy().to_string())
            .unwrap_or_else(|| abs_path.clone());

-        let start_time = std::time::Instant::now();
+        let metadata = std::fs::metadata(&path)
+            .with_context(|| format!("cannot stat {}", path.display()))?;
+        let mtime: DateTime<Utc> = metadata
+            .modified()
+            .map(DateTime::<Utc>::from)
+            .unwrap_or_else(|_| Utc::now());
+        let fingerprint = compute_fingerprint(&path, metadata.len(), &mtime)?;
+        let lookup_path = remap_libraries_path(&abs_path);
+
+        file_infos.push(FileInfo {
+            path,
+            format,
+            abs_path,
+            file_name,
+            metadata,
+            mtime,
+            fingerprint,
+            lookup_path,
+        });
+    }
+
+    info!("[SCAN] Collected {} files, starting parallel parsing", file_infos.len());
+
+    // Step 2: Parse metadata in parallel
+    let parsed_results: Vec<(FileInfo, anyhow::Result<ParsedMetadata>)> = file_infos
+        .into_par_iter()
+        .map(|file_info| {
+            let parse_result = parse_metadata(&file_info.path, file_info.format, root);
+            (file_info, parse_result)
+        })
+        .collect();
+
+    info!("[SCAN] Completed parallel parsing, processing {} results", parsed_results.len());
+
+    // Step 3: Process results sequentially for batch inserts
+    for (file_info, parse_result) in parsed_results {
+        library_processed_count += 1;
+        *total_processed_count += 1;

        // Update progress in DB every 1 second or every 10 files
        let should_update_progress = last_progress_update.elapsed() > Duration::from_secs(1) || library_processed_count % 10 == 0;
@@ -914,7 +962,7 @@ async fn scan_library(
                "UPDATE index_jobs SET current_file = $2, processed_files = $3, progress_percent = $4 WHERE id = $1"
            )
            .bind(job_id)
-            .bind(&file_name)
+            .bind(&file_info.file_name)
            .bind(*total_processed_count)
            .bind(progress_percent)
            .execute(&state.pool)
@@ -935,32 +983,23 @@ async fn scan_library(
            }
        }

-        let seen_key = remap_libraries_path(&abs_path);
+        let seen_key = remap_libraries_path(&file_info.abs_path);
        seen.insert(seen_key.clone(), true);

-        let metadata = std::fs::metadata(path)
-            .with_context(|| format!("cannot stat {}", path.display()))?;
-        let mtime: DateTime<Utc> = metadata
-            .modified()
-            .map(DateTime::<Utc>::from)
-            .unwrap_or_else(|_| Utc::now());
-        let fingerprint = compute_fingerprint(path, metadata.len(), &mtime)?;
-
-        let lookup_path = remap_libraries_path(&abs_path);
-        if let Some((file_id, book_id, old_fingerprint)) = existing.get(&lookup_path).cloned() {
-            if !is_full_rebuild && old_fingerprint == fingerprint {
-                trace!("[PROCESS] Skipping unchanged file: {}", file_name);
+        if let Some((file_id, book_id, old_fingerprint)) = existing.get(&file_info.lookup_path).cloned() {
+            if !is_full_rebuild && old_fingerprint == file_info.fingerprint {
+                trace!("[PROCESS] Skipping unchanged file: {}", file_info.file_name);
                continue;
            }

-            info!("[PROCESS] Updating existing file: {} (full_rebuild={}, fingerprint_match={})", file_name, is_full_rebuild, old_fingerprint == fingerprint);
+            info!("[PROCESS] Updating existing file: {} (full_rebuild={}, fingerprint_match={})", file_info.file_name, is_full_rebuild, old_fingerprint == file_info.fingerprint);

-            match parse_metadata(path, format, root) {
+            match parse_result {
                Ok(parsed) => {
                    books_to_update.push(BookUpdate {
                        book_id,
                        title: parsed.title,
-                        kind: kind_from_format(format).to_string(),
+                        kind: kind_from_format(file_info.format).to_string(),
                        series: parsed.series,
                        volume: parsed.volume,
                        page_count: parsed.page_count,
@@ -968,29 +1007,29 @@ async fn scan_library(

                    files_to_update.push(FileUpdate {
                        file_id,
-                        format: format.as_str().to_string(),
-                        size_bytes: metadata.len() as i64,
-                        mtime,
-                        fingerprint,
+                        format: file_info.format.as_str().to_string(),
+                        size_bytes: file_info.metadata.len() as i64,
+                        mtime: file_info.mtime,
+                        fingerprint: file_info.fingerprint,
                    });

                    stats.indexed_files += 1;
                }
                Err(err) => {
-                    warn!("[PARSER] Failed to parse {}: {}", file_name, err);
+                    warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
                    stats.errors += 1;
                    
                    files_to_update.push(FileUpdate {
                        file_id,
-                        format: format.as_str().to_string(),
-                        size_bytes: metadata.len() as i64,
-                        mtime,
-                        fingerprint: fingerprint.clone(),
+                        format: file_info.format.as_str().to_string(),
+                        size_bytes: file_info.metadata.len() as i64,
+                        mtime: file_info.mtime,
+                        fingerprint: file_info.fingerprint.clone(),
                    });
                    
                    errors_to_insert.push(ErrorInsert {
                        job_id,
-                        file_path: abs_path.clone(),
+                        file_path: file_info.abs_path.clone(),
                        error_message: err.to_string(),
                    });
                    
@@ -1014,17 +1053,17 @@ async fn scan_library(
        }

        // New file (thumbnails generated by API after job handoff)
-        info!("[PROCESS] Inserting new file: {}", file_name);
+        info!("[PROCESS] Inserting new file: {}", file_info.file_name);
        let book_id = Uuid::new_v4();

-        match parse_metadata(path, format, root) {
+        match parse_result {
            Ok(parsed) => {
                let file_id = Uuid::new_v4();

                books_to_insert.push(BookInsert {
                    book_id,
                    library_id,
-                    kind: kind_from_format(format).to_string(),
+                    kind: kind_from_format(file_info.format).to_string(),
                    title: parsed.title,
                    series: parsed.series,
                    volume: parsed.volume,
@@ -1035,11 +1074,11 @@ async fn scan_library(
                files_to_insert.push(FileInsert {
                    file_id,
                    book_id,
-                    format: format.as_str().to_string(),
-                    abs_path: abs_path.clone(),
-                    size_bytes: metadata.len() as i64,
-                    mtime,
-                    fingerprint,
+                    format: file_info.format.as_str().to_string(),
+                    abs_path: file_info.abs_path.clone(),
+                    size_bytes: file_info.metadata.len() as i64,
+                    mtime: file_info.mtime,
+                    fingerprint: file_info.fingerprint,
                    parse_status: "ok".to_string(),
                    parse_error: None,
                });
@@ -1047,7 +1086,7 @@ async fn scan_library(
                stats.indexed_files += 1;
            }
            Err(err) => {
-                warn!("[PARSER] Failed to parse {}: {}", file_name, err);
+                warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
                stats.errors += 1;
                let book_id = Uuid::new_v4();
                let file_id = Uuid::new_v4();
@@ -1055,8 +1094,8 @@ async fn scan_library(
                books_to_insert.push(BookInsert {
                    book_id,
                    library_id,
-                    kind: kind_from_format(format).to_string(),
-                    title: file_display_name(path),
+                    kind: kind_from_format(file_info.format).to_string(),
+                    title: file_display_name(&file_info.path),
                    series: None,
                    volume: None,
                    page_count: None,
@@ -1066,18 +1105,18 @@ async fn scan_library(
                files_to_insert.push(FileInsert {
                    file_id,
                    book_id,
-                    format: format.as_str().to_string(),
-                    abs_path: abs_path.clone(),
-                    size_bytes: metadata.len() as i64,
-                    mtime,
-                    fingerprint,
+                    format: file_info.format.as_str().to_string(),
+                    abs_path: file_info.abs_path.clone(),
+                    size_bytes: file_info.metadata.len() as i64,
+                    mtime: file_info.mtime,
+                    fingerprint: file_info.fingerprint,
                    parse_status: "error".to_string(),
                    parse_error: Some(err.to_string()),
                });

                errors_to_insert.push(ErrorInsert {
                    job_id,
-                    file_path: abs_path,
+                    file_path: file_info.abs_path,
                    error_message: err.to_string(),
                });
            }
@@ -1087,8 +1126,6 @@ async fn scan_library(
        if books_to_insert.len() >= BATCH_SIZE || files_to_insert.len() >= BATCH_SIZE {
            flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
        }
-        
-        trace!("[DONE] Processed file {} (total time: {:?})", file_name, start_time.elapsed());
    }

    // Final flush of any remaining items