perf(api,indexer): optimiser pages, thumbnails, watcher et robustesse fd

- Pages: mode Original (zero-transcoding), ETag/304, cache index CBZ, préfetch next 2 pages, filtre Triangle par défaut - Thumbnails: DCT scaling JPEG via jpeg-decoder (decode 7x plus rapide), img.thumbnail() pour resize, support format Original, fix JPEG RGBA8 - API fallback thumbnail: OutputFormat::Original + DCT scaling au lieu de WebP full-decode, retour (bytes, content_type) dynamique - Watcher: remplacement notify par poll léger sans inotify/fd, skip poll quand job actif, snapshots en mémoire - Jobs: mutex exclusif corrigé (tous statuts actifs, tous types exclusifs) - Robustesse: suppression fs::canonicalize (problèmes fd Docker), list_folders avec erreurs explicites, has_children default true - Backoffice: FormRow items-start pour alignement inputs avec helper text, labels settings clarifiés Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 23:07:42 +01:00
parent fe54f55f47
commit 6947af10fe
15 changed files with 711 additions and 395 deletions
--- a/apps/indexer/src/analyzer.rs
+++ b/apps/indexer/src/analyzer.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use futures::stream::{self, StreamExt};
-use image::GenericImageView;
+use image::{GenericImageView, ImageEncoder};
 use parsers::{analyze_book, BookFormat};
 use sqlx::Row;
 use std::path::Path;
@@ -14,6 +14,7 @@ use crate::{job::is_job_cancelled, utils, AppState};
 #[derive(Clone)]
 struct ThumbnailConfig {
    enabled: bool,
+    format: Option<String>,
    width: u32,
    height: u32,
    quality: u8,
@@ -24,6 +25,7 @@ struct ThumbnailConfig {
 async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
    let fallback = ThumbnailConfig {
        enabled: true,
+        format: Some("webp".to_string()),
        width: 300,
        height: 400,
        quality: 80,
@@ -51,6 +53,11 @@ async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
                    .get("enabled")
                    .and_then(|v| v.as_bool())
                    .unwrap_or(fallback.enabled),
+                format: value
+                    .get("format")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+                    .or_else(|| fallback.format.clone()),
                width: value
                    .get("width")
                    .and_then(|v| v.as_u64())
@@ -100,22 +107,143 @@ async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize {
    }
 }

+/// Detect the image format from raw bytes and return the corresponding file extension.
+fn detect_image_ext(data: &[u8]) -> &'static str {
+    match image::guess_format(data) {
+        Ok(image::ImageFormat::Png) => "png",
+        Ok(image::ImageFormat::WebP) => "webp",
+        _ => "jpg", // JPEG is the most common in comic archives
+    }
+}
+
+/// Fast JPEG decode with DCT scaling: decodes directly at reduced resolution (1/8, 1/4, 1/2).
+/// Returns (DynamicImage, original_width, original_height) or None if not JPEG / decode fails.
+fn fast_jpeg_decode(image_bytes: &[u8], target_w: u32, target_h: u32) -> Option<(image::DynamicImage, u32, u32)> {
+    // Only attempt for JPEG
+    if image::guess_format(image_bytes).ok()? != image::ImageFormat::Jpeg {
+        return None;
+    }
+
+    let mut decoder = jpeg_decoder::Decoder::new(std::io::Cursor::new(image_bytes));
+    // Read header to get original dimensions
+    decoder.read_info().ok()?;
+    let info = decoder.info()?;
+    let orig_w = info.width as u32;
+    let orig_h = info.height as u32;
+
+    // Request DCT-scaled decode (picks smallest scale >= requested size)
+    decoder.scale(target_w as u16, target_h as u16).ok()?;
+
+    let pixels = decoder.decode().ok()?;
+    let info = decoder.info()?;
+    let dec_w = info.width as u32;
+    let dec_h = info.height as u32;
+
+    let img = match info.pixel_format {
+        jpeg_decoder::PixelFormat::RGB24 => {
+            let buf = image::RgbImage::from_raw(dec_w, dec_h, pixels)?;
+            image::DynamicImage::ImageRgb8(buf)
+        }
+        jpeg_decoder::PixelFormat::L8 => {
+            let buf = image::GrayImage::from_raw(dec_w, dec_h, pixels)?;
+            image::DynamicImage::ImageLuma8(buf)
+        }
+        _ => return None,
+    };
+    Some((img, orig_w, orig_h))
+}
+
 fn generate_thumbnail(image_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::Result<Vec<u8>> {
-    let img = image::load_from_memory(image_bytes)
-        .map_err(|e| anyhow::anyhow!("failed to load image: {}", e))?;
-    let (orig_w, orig_h) = img.dimensions();
-    let ratio_w = config.width as f32 / orig_w as f32;
-    let ratio_h = config.height as f32 / orig_h as f32;
-    let ratio = ratio_w.min(ratio_h);
-    let new_w = (orig_w as f32 * ratio) as u32;
-    let new_h = (orig_h as f32 * ratio) as u32;
-    let resized = img.resize(new_w, new_h, image::imageops::FilterType::Triangle);
-    let rgba = resized.to_rgba8();
-    let (w, h) = rgba.dimensions();
-    let rgb_data: Vec<u8> = rgba.pixels().flat_map(|p| [p[0], p[1], p[2]]).collect();
-    let quality = config.quality as f32;
-    let webp_data = webp::Encoder::new(&rgb_data, webp::PixelLayout::Rgb, w, h).encode(quality);
-    Ok(webp_data.to_vec())
+    let t0 = std::time::Instant::now();
+
+    // Try fast JPEG DCT-scaled decode first (decodes directly at ~target size)
+    let (img, orig_w, orig_h) = if let Some(result) = fast_jpeg_decode(image_bytes, config.width, config.height) {
+        result
+    } else {
+        // Fallback for PNG/WebP/other formats
+        let img = image::load_from_memory(image_bytes)
+            .map_err(|e| anyhow::anyhow!("failed to load image: {}", e))?;
+        let (ow, oh) = img.dimensions();
+        (img, ow, oh)
+    };
+    let t_decode = t0.elapsed();
+
+    // Don't upscale — clamp to original size
+    let target_w = config.width.min(orig_w);
+    let target_h = config.height.min(orig_h);
+
+    let t1 = std::time::Instant::now();
+    // thumbnail() is optimized for large downscale ratios (uses fast sampling)
+    let resized = img.thumbnail(target_w, target_h);
+    let (w, h) = resized.dimensions();
+    let t_resize = t1.elapsed();
+
+    let format = config.format.as_deref().unwrap_or("webp");
+    info!(
+        "[THUMBNAIL] {}x{} -> {}x{}  decode={:.0}ms  resize={:.0}ms  encode_format={}",
+        orig_w, orig_h, w, h,
+        t_decode.as_secs_f64() * 1000.0,
+        t_resize.as_secs_f64() * 1000.0,
+        format,
+    );
+
+    let t2 = std::time::Instant::now();
+    let result = match format {
+        "original" => {
+            // Re-encode in source format (fast JPEG encode instead of slow WebP)
+            let source_format = image::guess_format(image_bytes).unwrap_or(image::ImageFormat::Jpeg);
+            match source_format {
+                image::ImageFormat::Png => {
+                    let rgba = resized.to_rgba8();
+                    let mut buf = Vec::new();
+                    let encoder = image::codecs::png::PngEncoder::new(&mut buf);
+                    encoder.write_image(&rgba, w, h, image::ColorType::Rgba8.into())
+                        .map_err(|e| anyhow::anyhow!("png encode failed: {}", e))?;
+                    Ok(buf)
+                }
+                _ => {
+                    let rgb = resized.to_rgb8();
+                    let mut buf = Vec::new();
+                    let mut encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, config.quality);
+                    encoder.encode(&rgb, w, h, image::ColorType::Rgb8.into())
+                        .map_err(|e| anyhow::anyhow!("jpeg encode failed: {}", e))?;
+                    Ok(buf)
+                }
+            }
+        }
+        "jpeg" | "jpg" => {
+            let rgb = resized.to_rgb8();
+            let mut buf = Vec::new();
+            let mut encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, config.quality);
+            encoder.encode(&rgb, w, h, image::ColorType::Rgb8.into())
+                .map_err(|e| anyhow::anyhow!("jpeg encode failed: {}", e))?;
+            Ok(buf)
+        }
+        "png" => {
+            let rgba = resized.to_rgba8();
+            let mut buf = Vec::new();
+            let encoder = image::codecs::png::PngEncoder::new(&mut buf);
+            encoder.write_image(&rgba, w, h, image::ColorType::Rgba8.into())
+                .map_err(|e| anyhow::anyhow!("png encode failed: {}", e))?;
+            Ok(buf)
+        }
+        _ => {
+            // WebP (default)
+            let rgb = resized.to_rgb8();
+            let rgb_data: &[u8] = rgb.as_raw();
+            let quality = config.quality as f32;
+            let webp_data = webp::Encoder::new(rgb_data, webp::PixelLayout::Rgb, w, h).encode(quality);
+            Ok(webp_data.to_vec())
+        }
+    };
+    let t_encode = t2.elapsed();
+    info!(
+        "[THUMBNAIL] encode={:.0}ms  total={:.0}ms  output_size={}KB",
+        t_encode.as_secs_f64() * 1000.0,
+        t0.elapsed().as_secs_f64() * 1000.0,
+        result.as_ref().map(|b| b.len() / 1024).unwrap_or(0),
+    );
+    result
 }

 /// Save raw image bytes (as extracted from the archive) without any processing.
@@ -127,23 +255,32 @@ fn save_raw_image(book_id: Uuid, raw_bytes: &[u8], directory: &str) -> anyhow::R
    Ok(path.to_string_lossy().to_string())
 }

-/// Resize the raw image and save it as a WebP thumbnail, overwriting the raw file.
-fn resize_raw_to_webp(
+/// Resize the raw image and save it as a thumbnail, overwriting the raw file.
+fn resize_raw_to_thumbnail(
    book_id: Uuid,
    raw_path: &str,
    config: &ThumbnailConfig,
 ) -> anyhow::Result<String> {
    let raw_bytes = std::fs::read(raw_path)
        .map_err(|e| anyhow::anyhow!("failed to read raw image {}: {}", raw_path, e))?;
-    let webp_bytes = generate_thumbnail(&raw_bytes, config)?;
+    info!("[THUMBNAIL] book={} raw_size={}KB", book_id, raw_bytes.len() / 1024);
+    let thumb_bytes = generate_thumbnail(&raw_bytes, config)?;

-    let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
-    std::fs::write(&webp_path, &webp_bytes)?;
+    let format = config.format.as_deref().unwrap_or("webp");
+    let ext = match format {
+        "original" => detect_image_ext(&raw_bytes),
+        "jpeg" | "jpg" => "jpg",
+        "png" => "png",
+        _ => "webp",
+    };

-    // Delete the raw file now that the WebP is written
+    let thumb_path = Path::new(&config.directory).join(format!("{}.{}", book_id, ext));
+    std::fs::write(&thumb_path, &thumb_bytes)?;
+
+    // Delete the raw file now that the thumbnail is written
    let _ = std::fs::remove_file(raw_path);

-    Ok(webp_path.to_string_lossy().to_string())
+    Ok(thumb_path.to_string_lossy().to_string())
 }

 fn book_format_from_str(s: &str) -> Option<BookFormat> {
@@ -465,7 +602,7 @@ pub async fn analyze_library_books(

                let raw_path_clone = raw_path.clone();
                let thumb_result = tokio::task::spawn_blocking(move || {
-                    resize_raw_to_webp(book_id, &raw_path_clone, &config)
+                    resize_raw_to_thumbnail(book_id, &raw_path_clone, &config)
                })
                .await;

@@ -554,18 +691,17 @@ pub async fn regenerate_thumbnails(

    let mut deleted_count = 0usize;
    for book_id in &book_ids_to_clear {
-        // Delete WebP thumbnail
-        let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
-        if webp_path.exists() {
-            if let Err(e) = std::fs::remove_file(&webp_path) {
-                warn!("[ANALYZER] Failed to delete thumbnail {}: {}", webp_path.display(), e);
-            } else {
-                deleted_count += 1;
+        // Delete thumbnail in any format (webp, jpg, png) + raw
+        for ext in &["webp", "jpg", "png", "raw"] {
+            let path = Path::new(&config.directory).join(format!("{}.{}", book_id, ext));
+            if path.exists() {
+                if let Err(e) = std::fs::remove_file(&path) {
+                    warn!("[ANALYZER] Failed to delete thumbnail {}: {}", path.display(), e);
+                } else if *ext != "raw" {
+                    deleted_count += 1;
+                }
            }
        }
-        // Delete raw file if it exists (interrupted previous run)
-        let raw_path = Path::new(&config.directory).join(format!("{}.raw", book_id));
-        let _ = std::fs::remove_file(&raw_path);
    }
    info!("[ANALYZER] Deleted {} thumbnail files for regeneration", deleted_count);

@@ -599,14 +735,10 @@ pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
        for entry in entries.flatten() {
            let file_name = entry.file_name();
            let file_name = file_name.to_string_lossy();
-            // Clean up both .webp and orphaned .raw files
-            let stem = if let Some(s) = file_name.strip_suffix(".webp") {
-                Some(s.to_string())
-            } else if let Some(s) = file_name.strip_suffix(".raw") {
-                Some(s.to_string())
-            } else {
-                None
-            };
+            // Clean up all thumbnail formats and orphaned .raw files
+            let stem = [".webp", ".jpg", ".png", ".raw"]
+                .iter()
+                .find_map(|ext| file_name.strip_suffix(ext).map(|s| s.to_string()));
            if let Some(book_id_str) = stem {
                if let Ok(book_id) = Uuid::parse_str(&book_id_str) {
                    if !existing_book_ids.contains(&book_id) {
--- a/apps/indexer/src/job.rs
+++ b/apps/indexer/src/job.rs
@@ -37,27 +37,57 @@ pub async fn cleanup_stale_jobs(pool: &PgPool) -> Result<()> {
    Ok(())
 }

+/// Job types that modify book/thumbnail data and must not run concurrently.
+const EXCLUSIVE_JOB_TYPES: &[&str] = &[
+    "rebuild",
+    "full_rebuild",
+    "scan",
+    "thumbnail_rebuild",
+    "thumbnail_regenerate",
+];
+
+/// Active statuses (job is still in progress, not just queued).
+const ACTIVE_STATUSES: &[&str] = &[
+    "running",
+    "extracting_pages",
+    "generating_thumbnails",
+];
+
 pub async fn claim_next_job(pool: &PgPool) -> Result<Option<(Uuid, Option<Uuid>)>> {
    let mut tx = pool.begin().await?;

+    // Check if any exclusive job is currently active
+    let has_active_exclusive: bool = sqlx::query_scalar(
+        r#"
+        SELECT EXISTS(
+            SELECT 1 FROM index_jobs
+            WHERE status = ANY($1)
+              AND type = ANY($2)
+        )
+        "#,
+    )
+    .bind(ACTIVE_STATUSES)
+    .bind(EXCLUSIVE_JOB_TYPES)
+    .fetch_one(&mut *tx)
+    .await?;
+
    let row = sqlx::query(
        r#"
        SELECT j.id, j.type, j.library_id
        FROM index_jobs j
        WHERE j.status = 'pending'
          AND (
-            (j.type IN ('rebuild', 'full_rebuild') AND NOT EXISTS (
-                SELECT 1 FROM index_jobs
-                WHERE status = 'running'
-                  AND type IN ('rebuild', 'full_rebuild')
-            ))
+            -- Exclusive jobs: only if no other exclusive job is active
+            (j.type = ANY($1) AND NOT $2::bool)
            OR
-            j.type NOT IN ('rebuild', 'full_rebuild')
+            -- Non-exclusive jobs (cbr_to_cbz): can always run
+            j.type != ALL($1)
          )
        ORDER BY
            CASE j.type
                WHEN 'full_rebuild' THEN 1
                WHEN 'rebuild' THEN 2
+                WHEN 'scan' THEN 2
                ELSE 3
            END,
            j.created_at ASC
@@ -65,6 +95,8 @@ pub async fn claim_next_job(pool: &PgPool) -> Result<Option<(Uuid, Option<Uuid>)
        LIMIT 1
        "#,
    )
+    .bind(EXCLUSIVE_JOB_TYPES)
+    .bind(has_active_exclusive)
    .fetch_optional(&mut *tx)
    .await?;

@@ -74,30 +106,8 @@ pub async fn claim_next_job(pool: &PgPool) -> Result<Option<(Uuid, Option<Uuid>)
    };

    let id: Uuid = row.get("id");
-    let job_type: String = row.get("type");
    let library_id: Option<Uuid> = row.get("library_id");

-    if job_type == "rebuild" || job_type == "full_rebuild" {
-        let has_running_rebuild: bool = sqlx::query_scalar(
-            r#"
-            SELECT EXISTS(
-                SELECT 1 FROM index_jobs
-                WHERE status = 'running'
-                  AND type IN ('rebuild', 'full_rebuild')
-                  AND id != $1
-            )
-            "#,
-        )
-        .bind(id)
-        .fetch_one(&mut *tx)
-        .await?;
-
-        if has_running_rebuild {
-            tx.rollback().await?;
-            return Ok(None);
-        }
-    }
-
    sqlx::query(
        "UPDATE index_jobs SET status = 'running', started_at = NOW(), error_opt = NULL WHERE id = $1",
    )
--- a/apps/indexer/src/watcher.rs
+++ b/apps/indexer/src/watcher.rs
@@ -1,147 +1,179 @@
 use anyhow::Result;
-use notify::{Event, RecommendedWatcher, RecursiveMode, Watcher};
 use sqlx::Row;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
 use std::time::Duration;
-use tokio::sync::mpsc;
-use tracing::{error, info, trace};
+use tracing::{error, info, trace, warn};
 use uuid::Uuid;
+use walkdir::WalkDir;

 use crate::utils;
 use crate::AppState;

-pub async fn run_file_watcher(state: AppState) -> Result<()> {
-    let (tx, mut rx) = mpsc::channel::<(Uuid, String)>(100);
+/// Check if any indexing job is currently active.
+async fn has_active_jobs(pool: &sqlx::PgPool) -> bool {
+    sqlx::query_scalar::<_, bool>(
+        "SELECT EXISTS(SELECT 1 FROM index_jobs WHERE status IN ('pending', 'running', 'extracting_pages', 'generating_thumbnails'))",
+    )
+    .fetch_one(pool)
+    .await
+    .unwrap_or(false)
+}

-    // Start watcher refresh loop
-    let refresh_interval = Duration::from_secs(30);
+/// Snapshot: set of book file paths found in a library.
+type LibrarySnapshot = HashSet<String>;
+
+/// Walk a library directory and collect all book file paths.
+/// Walks sequentially to limit open file descriptors.
+fn snapshot_library(root_path: &str) -> LibrarySnapshot {
+    let mut files = HashSet::new();
+    let walker = WalkDir::new(root_path)
+        .follow_links(true)
+        .into_iter()
+        .filter_map(|e| e.ok());
+
+    for entry in walker {
+        if entry.file_type().is_file() && parsers::detect_format(entry.path()).is_some() {
+            files.insert(entry.path().to_string_lossy().to_string());
+        }
+    }
+    files
+}
+
+pub async fn run_file_watcher(state: AppState) -> Result<()> {
+    let poll_interval = Duration::from_secs(30);
    let pool = state.pool.clone();

-    tokio::spawn(async move {
-        let mut watched_libraries: HashMap<Uuid, String> = HashMap::new();
+    // Stored snapshots per library — used to detect additions/removals between polls
+    let mut snapshots: HashMap<Uuid, LibrarySnapshot> = HashMap::new();
+    // Track which libraries we're watching (to detect config changes)
+    let mut watched_libraries: HashMap<Uuid, String> = HashMap::new();

-        loop {
-            // Get libraries with watcher enabled
-            match sqlx::query(
-                "SELECT id, root_path FROM libraries WHERE watcher_enabled = TRUE AND enabled = TRUE"
-            )
-            .fetch_all(&pool)
-            .await
-            {
-                Ok(rows) => {
-                    let current_libraries: HashMap<Uuid, String> = rows
-                        .into_iter()
-                        .map(|row| {
-                            let id: Uuid = row.get("id");
-                            let root_path: String = row.get("root_path");
-                            let local_path = utils::remap_libraries_path(&root_path);
-                            (id, local_path)
-                        })
-                        .collect();
+    loop {
+        tokio::time::sleep(poll_interval).await;

-                    // Check if we need to recreate watcher
-                    let needs_restart = watched_libraries.len() != current_libraries.len()
-                        || watched_libraries.iter().any(|(id, path)| {
-                            current_libraries.get(id) != Some(path)
-                        });
-
-                    if needs_restart {
-                        info!("[WATCHER] Restarting watcher for {} libraries", current_libraries.len());
-
-                        if !current_libraries.is_empty() {
-                            let tx_clone = tx.clone();
-                            let libraries_clone = current_libraries.clone();
-
-                            match setup_watcher(libraries_clone, tx_clone) {
-                                Ok(_new_watcher) => {
-                                    watched_libraries = current_libraries;
-                                    info!("[WATCHER] Watching {} libraries", watched_libraries.len());
-                                }
-                                Err(err) => {
-                                    error!("[WATCHER] Failed to setup watcher: {}", err);
-                                }
-                            }
-                        }
-                    }
-                }
-                Err(err) => {
-                    error!("[WATCHER] Failed to fetch libraries: {}", err);
-                }
-            }
-
-            tokio::time::sleep(refresh_interval).await;
+        // Skip if any job is active — avoid competing for file descriptors
+        if has_active_jobs(&pool).await {
+            trace!("[WATCHER] Skipping poll — job active");
+            continue;
        }
-    });

-    // Process watcher events
-    while let Some((library_id, file_path)) = rx.recv().await {
-        info!("[WATCHER] File changed in library {}: {}", library_id, file_path);
-
-        // Check if there's already a pending job for this library
-        match sqlx::query_scalar::<_, bool>(
-            "SELECT EXISTS(SELECT 1 FROM index_jobs WHERE library_id = $1 AND status IN ('pending', 'running'))"
+        // Fetch enabled libraries with watcher
+        let rows = match sqlx::query(
+            "SELECT id, root_path FROM libraries WHERE watcher_enabled = TRUE AND enabled = TRUE",
        )
-        .bind(library_id)
-        .fetch_one(&state.pool)
+        .fetch_all(&pool)
        .await
        {
-            Ok(exists) => {
-                if !exists {
-                    // Create a quick scan job
+            Ok(rows) => rows,
+            Err(err) => {
+                error!("[WATCHER] Failed to fetch libraries: {}", err);
+                continue;
+            }
+        };
+
+        let current_libraries: HashMap<Uuid, String> = rows
+            .into_iter()
+            .map(|row| {
+                let id: Uuid = row.get("id");
+                let root_path: String = row.get("root_path");
+                let local_path = utils::remap_libraries_path(&root_path);
+                (id, local_path)
+            })
+            .collect();
+
+        // If library config changed, reset snapshots for removed/changed libraries
+        if current_libraries != watched_libraries {
+            let removed: Vec<Uuid> = watched_libraries
+                .keys()
+                .filter(|id| !current_libraries.contains_key(id))
+                .copied()
+                .collect();
+            for id in removed {
+                snapshots.remove(&id);
+            }
+            if !current_libraries.is_empty() {
+                info!(
+                    "[WATCHER] Watching {} libraries (lightweight poll, {}s interval)",
+                    current_libraries.len(),
+                    poll_interval.as_secs()
+                );
+            } else {
+                info!("[WATCHER] No libraries to watch");
+            }
+            watched_libraries = current_libraries.clone();
+        }
+
+        // Poll each library sequentially to limit concurrent file descriptor usage
+        for (library_id, root_path) in &current_libraries {
+            if !Path::new(root_path).is_dir() {
+                warn!("[WATCHER] Library {} path not accessible: {}", library_id, root_path);
+                continue;
+            }
+
+            // Re-check between libraries in case a job was created
+            if has_active_jobs(&pool).await {
+                trace!("[WATCHER] Job became active during poll, stopping");
+                break;
+            }
+
+            let root_owned = root_path.clone();
+            let new_snapshot = tokio::task::spawn_blocking(move || snapshot_library(&root_owned))
+                .await
+                .unwrap_or_default();
+
+            let changed = match snapshots.get(library_id) {
+                Some(old_snapshot) => *old_snapshot != new_snapshot,
+                None => {
+                    // First scan — store baseline, don't trigger a job
+                    trace!(
+                        "[WATCHER] Initial snapshot for library {}: {} files",
+                        library_id,
+                        new_snapshot.len()
+                    );
+                    snapshots.insert(*library_id, new_snapshot);
+                    continue;
+                }
+            };
+
+            if changed {
+                info!(
+                    "[WATCHER] Changes detected in library {} ({})",
+                    library_id, root_path
+                );
+
+                // Check if a job already exists for this library
+                let job_exists = sqlx::query_scalar::<_, bool>(
+                    "SELECT EXISTS(SELECT 1 FROM index_jobs WHERE library_id = $1 AND status IN ('pending', 'running', 'extracting_pages', 'generating_thumbnails'))",
+                )
+                .bind(library_id)
+                .fetch_one(&pool)
+                .await
+                .unwrap_or(true);
+
+                if !job_exists {
                    let job_id = Uuid::new_v4();
                    match sqlx::query(
-                        "INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')"
+                        "INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')",
                    )
                    .bind(job_id)
                    .bind(library_id)
-                    .execute(&state.pool)
+                    .execute(&pool)
                    .await
                    {
-                        Ok(_) => info!("[WATCHER] Created job {} for library {}", job_id, library_id),
+                        Ok(_) => info!(
+                            "[WATCHER] Created rebuild job {} for library {}",
+                            job_id, library_id
+                        ),
                        Err(err) => error!("[WATCHER] Failed to create job: {}", err),
                    }
                } else {
-                    trace!("[WATCHER] Job already pending for library {}, skipping", library_id);
+                    trace!("[WATCHER] Job already active for library {}, skipping", library_id);
                }
            }
-            Err(err) => error!("[WATCHER] Failed to check existing jobs: {}", err),
+
+            // Update snapshot
+            snapshots.insert(*library_id, new_snapshot);
        }
    }
-
-    Ok(())
-}
-
-fn setup_watcher(
-    libraries: HashMap<Uuid, String>,
-    tx: mpsc::Sender<(Uuid, String)>,
-) -> Result<RecommendedWatcher> {
-    let libraries_for_closure = libraries.clone();
-    
-    let mut watcher = notify::recommended_watcher(move |res: Result<Event, notify::Error>| {
-        match res {
-            Ok(event) => {
-                if event.kind.is_modify() || event.kind.is_create() || event.kind.is_remove() {
-                    for path in event.paths {
-                        if let Some((library_id, _)) = libraries_for_closure.iter().find(|(_, root)| {
-                            path.starts_with(root)
-                        }) {
-                            let path_str = path.to_string_lossy().to_string();
-                            if parsers::detect_format(&path).is_some() {
-                                let _ = tx.try_send((*library_id, path_str));
-                            }
-                        }
-                    }
-                }
-            }
-            Err(err) => error!("[WATCHER] Event error: {}", err),
-        }
-    })?;
-
-    // Actually watch the library directories
-    for root_path in libraries.values() {
-        info!("[WATCHER] Watching directory: {}", root_path);
-        watcher.watch(std::path::Path::new(root_path), RecursiveMode::Recursive)?;
-    }
-
-    Ok(watcher)
 }