stripstream-librarian/apps/indexer/src/analyzer.rs

use anyhow::Result;
use futures::stream::{self, StreamExt};
use image::{GenericImageView, ImageEncoder};
use parsers::{analyze_book, BookFormat};
use sqlx::Row;
use std::path::Path;
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
use std::sync::Arc;
use tracing::{debug, info, warn};
use uuid::Uuid;

use crate::{job::is_job_cancelled, utils, AppState};

#[derive(Clone)]
struct ThumbnailConfig {
    enabled: bool,
    format: Option<String>,
    width: u32,
    height: u32,
    quality: u8,
    directory: String,
    timeout_secs: u64,
}

async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
    let fallback = ThumbnailConfig {
        enabled: true,
        format: Some("webp".to_string()),
        width: 300,
        height: 400,
        quality: 80,
        directory: "/data/thumbnails".to_string(),
        timeout_secs: 120,
    };
    let thumb_row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'thumbnail'"#)
        .fetch_optional(pool)
        .await;
    let limits_row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
        .fetch_optional(pool)
        .await;

    let timeout_secs = limits_row
        .ok()
        .flatten()
        .and_then(|r| r.get::<serde_json::Value, _>("value").get("timeout_seconds").and_then(|v| v.as_u64()))
        .unwrap_or(fallback.timeout_secs);

    match thumb_row {
        Ok(Some(row)) => {
            let value: serde_json::Value = row.get("value");
            ThumbnailConfig {
                enabled: value
                    .get("enabled")
                    .and_then(|v| v.as_bool())
                    .unwrap_or(fallback.enabled),
                format: value
                    .get("format")
                    .and_then(|v| v.as_str())
                    .map(|s| s.to_string())
                    .or_else(|| fallback.format.clone()),
                width: value
                    .get("width")
                    .and_then(|v| v.as_u64())
                    .map(|v| v as u32)
                    .unwrap_or(fallback.width),
                height: value
                    .get("height")
                    .and_then(|v| v.as_u64())
                    .map(|v| v as u32)
                    .unwrap_or(fallback.height),
                quality: value
                    .get("quality")
                    .and_then(|v| v.as_u64())
                    .map(|v| v as u8)
                    .unwrap_or(fallback.quality),
                directory: value
                    .get("directory")
                    .and_then(|v| v.as_str())
                    .map(|s| s.to_string())
                    .unwrap_or_else(|| fallback.directory.clone()),
                timeout_secs,
            }
        }
        _ => ThumbnailConfig { timeout_secs, ..fallback },
    }
}

async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize {
    // Default: half the logical CPUs, clamped between 2 and 8.
    // Archive extraction is I/O bound but benefits from moderate parallelism.
    let cpus = num_cpus::get();
    let default_concurrency = (cpus / 2).clamp(1, 2);
    let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
        .fetch_optional(pool)
        .await;

    match row {
        Ok(Some(row)) => {
            let value: serde_json::Value = row.get("value");
            value
                .get("concurrent_renders")
                .and_then(|v| v.as_u64())
                .map(|v| v as usize)
                .unwrap_or(default_concurrency)
        }
        _ => default_concurrency,
    }
}

/// Detect the image format from raw bytes and return the corresponding file extension.
fn detect_image_ext(data: &[u8]) -> &'static str {
    match image::guess_format(data) {
        Ok(image::ImageFormat::Png) => "png",
        Ok(image::ImageFormat::WebP) => "webp",
        _ => "jpg", // JPEG is the most common in comic archives
    }
}

/// Fast JPEG decode with DCT scaling: decodes directly at reduced resolution (1/8, 1/4, 1/2).
/// Returns (DynamicImage, original_width, original_height) or None if not JPEG / decode fails.
fn fast_jpeg_decode(image_bytes: &[u8], target_w: u32, target_h: u32) -> Option<(image::DynamicImage, u32, u32)> {
    // Only attempt for JPEG
    if image::guess_format(image_bytes).ok()? != image::ImageFormat::Jpeg {
        return None;
    }

    let mut decoder = jpeg_decoder::Decoder::new(std::io::Cursor::new(image_bytes));
    // Read header to get original dimensions
    decoder.read_info().ok()?;
    let info = decoder.info()?;
    let orig_w = info.width as u32;
    let orig_h = info.height as u32;

    // Request DCT-scaled decode (picks smallest scale >= requested size)
    decoder.scale(target_w as u16, target_h as u16).ok()?;

    let pixels = decoder.decode().ok()?;
    let info = decoder.info()?;
    let dec_w = info.width as u32;
    let dec_h = info.height as u32;

    let img = match info.pixel_format {
        jpeg_decoder::PixelFormat::RGB24 => {
            let buf = image::RgbImage::from_raw(dec_w, dec_h, pixels)?;
            image::DynamicImage::ImageRgb8(buf)
        }
        jpeg_decoder::PixelFormat::L8 => {
            let buf = image::GrayImage::from_raw(dec_w, dec_h, pixels)?;
            image::DynamicImage::ImageLuma8(buf)
        }
        _ => return None,
    };
    Some((img, orig_w, orig_h))
}

fn generate_thumbnail(image_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::Result<Vec<u8>> {
    let t0 = std::time::Instant::now();

    // Try fast JPEG DCT-scaled decode first (decodes directly at ~target size)
    let (img, orig_w, orig_h) = if let Some(result) = fast_jpeg_decode(image_bytes, config.width, config.height) {
        result
    } else {
        // Fallback for PNG/WebP/other formats
        let img = image::load_from_memory(image_bytes)
            .map_err(|e| anyhow::anyhow!("failed to load image: {}", e))?;
        let (ow, oh) = img.dimensions();
        (img, ow, oh)
    };
    let t_decode = t0.elapsed();

    // Don't upscale — clamp to original size
    let target_w = config.width.min(orig_w);
    let target_h = config.height.min(orig_h);

    let t1 = std::time::Instant::now();
    // thumbnail() is optimized for large downscale ratios (uses fast sampling)
    let resized = img.thumbnail(target_w, target_h);
    let (w, h) = resized.dimensions();
    let t_resize = t1.elapsed();

    let format = config.format.as_deref().unwrap_or("webp");
    debug!(
        target: "thumbnail",
        "[THUMBNAIL] {}x{} -> {}x{}  decode={:.0}ms  resize={:.0}ms  encode_format={}",
        orig_w, orig_h, w, h,
        t_decode.as_secs_f64() * 1000.0,
        t_resize.as_secs_f64() * 1000.0,
        format,
    );

    let t2 = std::time::Instant::now();
    let result = match format {
        "original" => {
            // Re-encode in source format (fast JPEG encode instead of slow WebP)
            let source_format = image::guess_format(image_bytes).unwrap_or(image::ImageFormat::Jpeg);
            match source_format {
                image::ImageFormat::Png => {
                    let rgba = resized.to_rgba8();
                    let mut buf = Vec::new();
                    let encoder = image::codecs::png::PngEncoder::new(&mut buf);
                    encoder.write_image(&rgba, w, h, image::ColorType::Rgba8.into())
                        .map_err(|e| anyhow::anyhow!("png encode failed: {}", e))?;
                    Ok(buf)
                }
                _ => {
                    let rgb = resized.to_rgb8();
                    let mut buf = Vec::new();
                    let mut encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, config.quality);
                    encoder.encode(&rgb, w, h, image::ColorType::Rgb8.into())
                        .map_err(|e| anyhow::anyhow!("jpeg encode failed: {}", e))?;
                    Ok(buf)
                }
            }
        }
        "jpeg" | "jpg" => {
            let rgb = resized.to_rgb8();
            let mut buf = Vec::new();
            let mut encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, config.quality);
            encoder.encode(&rgb, w, h, image::ColorType::Rgb8.into())
                .map_err(|e| anyhow::anyhow!("jpeg encode failed: {}", e))?;
            Ok(buf)
        }
        "png" => {
            let rgba = resized.to_rgba8();
            let mut buf = Vec::new();
            let encoder = image::codecs::png::PngEncoder::new(&mut buf);
            encoder.write_image(&rgba, w, h, image::ColorType::Rgba8.into())
                .map_err(|e| anyhow::anyhow!("png encode failed: {}", e))?;
            Ok(buf)
        }
        _ => {
            // WebP (default)
            let rgb = resized.to_rgb8();
            let rgb_data: &[u8] = rgb.as_raw();
            let quality = config.quality as f32;
            let webp_data = webp::Encoder::new(rgb_data, webp::PixelLayout::Rgb, w, h).encode(quality);
            Ok(webp_data.to_vec())
        }
    };
    let t_encode = t2.elapsed();
    debug!(
        target: "thumbnail",
        "[THUMBNAIL] encode={:.0}ms  total={:.0}ms  output_size={}KB",
        t_encode.as_secs_f64() * 1000.0,
        t0.elapsed().as_secs_f64() * 1000.0,
        result.as_ref().map(|b| b.len() / 1024).unwrap_or(0),
    );
    result
}

/// Save raw image bytes (as extracted from the archive) without any processing.
fn save_raw_image(book_id: Uuid, raw_bytes: &[u8], directory: &str) -> anyhow::Result<String> {
    let dir = Path::new(directory);
    std::fs::create_dir_all(dir)?;
    let path = dir.join(format!("{}.raw", book_id));
    std::fs::write(&path, raw_bytes)?;
    Ok(path.to_string_lossy().to_string())
}

/// Resize the raw image and save it as a thumbnail, overwriting the raw file.
fn resize_raw_to_thumbnail(
    book_id: Uuid,
    raw_path: &str,
    config: &ThumbnailConfig,
) -> anyhow::Result<String> {
    let raw_bytes = std::fs::read(raw_path)
        .map_err(|e| anyhow::anyhow!("failed to read raw image {}: {}", raw_path, e))?;
    debug!(target: "thumbnail", "[THUMBNAIL] book={} raw_size={}KB", book_id, raw_bytes.len() / 1024);
    let thumb_bytes = generate_thumbnail(&raw_bytes, config)?;

    let format = config.format.as_deref().unwrap_or("webp");
    let ext = match format {
        "original" => detect_image_ext(&raw_bytes),
        "jpeg" | "jpg" => "jpg",
        "png" => "png",
        _ => "webp",
    };

    let thumb_path = Path::new(&config.directory).join(format!("{}.{}", book_id, ext));
    std::fs::write(&thumb_path, &thumb_bytes)?;

    // Delete the raw file now that the thumbnail is written
    let _ = std::fs::remove_file(raw_path);

    Ok(thumb_path.to_string_lossy().to_string())
}

fn book_format_from_str(s: &str) -> Option<BookFormat> {
    match s {
        "cbz" => Some(BookFormat::Cbz),
        "cbr" => Some(BookFormat::Cbr),
        "pdf" => Some(BookFormat::Pdf),
        "epub" => Some(BookFormat::Epub),
        _ => None,
    }
}

/// Phase 2 — Two-sub-phase analysis:
///
/// **Sub-phase A (extracting_pages)**: open each archive once, extract (page_count, raw_image_bytes),
/// save the raw bytes to `{directory}/{book_id}.raw`. I/O bound — runs at `concurrent_renders`.
///
/// **Sub-phase B (generating_thumbnails)**: load each `.raw` file, resize and encode as WebP,
/// overwrite as `{directory}/{book_id}.webp`. CPU bound — runs at `concurrent_renders`.
///
/// `thumbnail_only` = true: only process books missing thumbnail (page_count may already be set).
/// `thumbnail_only` = false: process books missing page_count.
pub async fn analyze_library_books(
    state: &AppState,
    job_id: Uuid,
    library_id: Option<Uuid>,
    thumbnail_only: bool,
) -> Result<()> {
    let config = load_thumbnail_config(&state.pool).await;

    if !config.enabled {
        info!("[ANALYZER] Thumbnails disabled, skipping analysis phase");
        return Ok(());
    }

    let concurrency = load_thumbnail_concurrency(&state.pool).await;

    let query_filter = if thumbnail_only {
        "b.thumbnail_path IS NULL"
    } else {
        "b.page_count IS NULL"
    };

    let sql = format!(
        r#"
        SELECT b.id AS book_id, bf.abs_path, bf.format, (b.thumbnail_path IS NULL) AS needs_thumbnail
        FROM books b
        JOIN book_files bf ON bf.book_id = b.id
        WHERE (b.library_id = $1 OR $1 IS NULL)
          AND {}
        "#,
        query_filter
    );

    let rows = sqlx::query(&sql)
        .bind(library_id)
        .fetch_all(&state.pool)
        .await?;

    if rows.is_empty() {
        info!("[ANALYZER] No books to analyze");
        return Ok(());
    }

    let total = rows.len() as i32;
    info!(
        "[ANALYZER] Analyzing {} books (thumbnail_only={}, concurrency={})",
        total, thumbnail_only, concurrency
    );

    let cancelled_flag = Arc::new(AtomicBool::new(false));
    let cancel_pool = state.pool.clone();
    let cancel_flag_for_poller = cancelled_flag.clone();
    let cancel_handle = tokio::spawn(async move {
        loop {
            tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
            match is_job_cancelled(&cancel_pool, job_id).await {
                Ok(true) => {
                    cancel_flag_for_poller.store(true, Ordering::Relaxed);
                    break;
                }
                Ok(false) => {}
                Err(_) => break,
            }
        }
    });

    #[derive(Clone)]
    struct BookTask {
        book_id: Uuid,
        abs_path: String,
        format: String,
        needs_thumbnail: bool,
    }

    let tasks: Vec<BookTask> = rows
        .into_iter()
        .map(|row| BookTask {
            book_id: row.get("book_id"),
            abs_path: row.get("abs_path"),
            format: row.get("format"),
            needs_thumbnail: row.get("needs_thumbnail"),
        })
        .collect();

    // -------------------------------------------------------------------------
    // Sub-phase A: extract first page from each archive and store raw image
    // Processed in batches of 500 to limit memory — raw_bytes are freed between batches.
    // The collected results (Uuid, String, i32) are lightweight (~100 bytes each).
    // -------------------------------------------------------------------------
    const BATCH_SIZE: usize = 200;

    let phase_a_start = std::time::Instant::now();
    let _ = sqlx::query(
        "UPDATE index_jobs SET status = 'extracting_pages', total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
    )
    .bind(job_id)
    .bind(total)
    .execute(&state.pool)
    .await;

    let extracted_count = Arc::new(AtomicI32::new(0));
    let mut all_extracted: Vec<(Uuid, String, i32)> = Vec::new();

    let num_batches = (tasks.len() + BATCH_SIZE - 1) / BATCH_SIZE;
    let task_chunks: Vec<Vec<BookTask>> = tasks
        .into_iter()
        .collect::<Vec<_>>()
        .chunks(BATCH_SIZE)
        .map(|c| c.to_vec())
        .collect();

    for (batch_idx, batch_tasks) in task_chunks.into_iter().enumerate() {
        if cancelled_flag.load(Ordering::Relaxed) {
            break;
        }

        info!(
            "[ANALYZER] Extraction batch {}/{} — {} books",
            batch_idx + 1, num_batches, batch_tasks.len()
        );

        let batch_extracted: Vec<(Uuid, String, i32)> = stream::iter(batch_tasks)
            .map(|task| {
                let pool = state.pool.clone();
                let config = config.clone();
                let cancelled = cancelled_flag.clone();
                let extracted_count = extracted_count.clone();

                async move {
                    if cancelled.load(Ordering::Relaxed) {
                        return None;
                    }

                    let local_path = utils::remap_libraries_path(&task.abs_path);
                    let path = std::path::Path::new(&local_path);
                    let book_id = task.book_id;
                    let needs_thumbnail = task.needs_thumbnail;

                    // Remove macOS Apple Double resource fork files (._*) that were indexed before the scanner filter was added
                    if path
                        .file_name()
                        .and_then(|n| n.to_str())
                        .map(|n| n.starts_with("._"))
                        .unwrap_or(false)
                    {
                        warn!("[ANALYZER] Removing macOS resource fork from DB: {}", local_path);
                        let _ = sqlx::query("DELETE FROM book_files WHERE book_id = $1")
                            .bind(book_id)
                            .execute(&pool)
                            .await;
                        let _ = sqlx::query(
                            "DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)",
                        )
                        .bind(book_id)
                        .execute(&pool)
                        .await;
                        return None;
                    }

                    let format = match book_format_from_str(&task.format) {
                        Some(f) => f,
                        None => {
                            warn!("[ANALYZER] Unknown format '{}' for book {}", task.format, book_id);
                            return None;
                        }
                    };

                    let pdf_scale = config.width.max(config.height);
                    let path_owned = path.to_path_buf();
                    let timeout_secs = config.timeout_secs;
                    let file_name = path.file_name()
                        .map(|n| n.to_string_lossy().to_string())
                        .unwrap_or_else(|| local_path.clone());

                    debug!(target: "extraction", "[EXTRACTION] Starting: {} ({})", file_name, task.format);
                    let extract_start = std::time::Instant::now();

                    let analyze_result = tokio::time::timeout(
                        std::time::Duration::from_secs(timeout_secs),
                        tokio::task::spawn_blocking(move || analyze_book(&path_owned, format, pdf_scale)),
                    )
                    .await;

                    let (page_count, raw_bytes) = match analyze_result {
                        Ok(Ok(Ok(result))) => result,
                        Ok(Ok(Err(e))) => {
                            warn!(target: "extraction", "[EXTRACTION] Failed: {} — {}", file_name, e);
                            let _ = sqlx::query(
                                "UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE book_id = $1",
                            )
                            .bind(book_id)
                            .bind(e.to_string())
                            .execute(&pool)
                            .await;
                            return None;
                        }
                        Ok(Err(e)) => {
                            warn!(target: "extraction", "[EXTRACTION] spawn error: {} — {}", file_name, e);
                            return None;
                        }
                        Err(_) => {
                            warn!(target: "extraction", "[EXTRACTION] Timeout ({}s): {}", timeout_secs, file_name);
                            let _ = sqlx::query(
                                "UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE book_id = $1",
                            )
                            .bind(book_id)
                            .bind(format!("analyze_book timed out after {}s", timeout_secs))
                            .execute(&pool)
                            .await;
                            return None;
                        }
                    };

                    let extract_elapsed = extract_start.elapsed();
                    debug!(
                        target: "extraction",
                        "[EXTRACTION] Done: {} — {} pages, image={}KB in {:.0}ms",
                        file_name, page_count, raw_bytes.len() / 1024,
                        extract_elapsed.as_secs_f64() * 1000.0,
                    );

                    // If thumbnail already exists, just update page_count and skip thumbnail generation
                    if !needs_thumbnail {
                        debug!(target: "extraction", "[EXTRACTION] Page count only: {} — {} pages", file_name, page_count);
                        if let Err(e) = sqlx::query("UPDATE books SET page_count = $1 WHERE id = $2")
                            .bind(page_count)
                            .bind(book_id)
                            .execute(&pool)
                            .await
                        {
                            warn!(target: "extraction", "[EXTRACTION] DB page_count update failed for {}: {}", file_name, e);
                        }
                        let processed = extracted_count.fetch_add(1, Ordering::Relaxed) + 1;
                        let percent = (processed as f64 / total as f64 * 50.0) as i32;
                        let _ = sqlx::query(
                            "UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
                        )
                        .bind(job_id)
                        .bind(processed)
                        .bind(percent)
                        .execute(&pool)
                        .await;

                        if processed % 25 == 0 || processed == total {
                            info!(
                                target: "extraction",
                                "[EXTRACTION] Progress: {}/{} books extracted ({}%)",
                                processed, total, percent
                            );
                        }
                        return None; // don't enqueue for thumbnail sub-phase
                    }

                    // Save raw bytes to disk (no resize, no encode) — moves raw_bytes, no clone
                    let raw_path = match tokio::task::spawn_blocking({
                        let dir = config.directory.clone();
                        move || save_raw_image(book_id, &raw_bytes, &dir)
                    })
                    .await
                    {
                        Ok(Ok(p)) => p,
                        Ok(Err(e)) => {
                            warn!("[ANALYZER] save_raw_image failed for book {}: {}", book_id, e);
                            return None;
                        }
                        Err(e) => {
                            warn!("[ANALYZER] spawn_blocking save_raw error for book {}: {}", book_id, e);
                            return None;
                        }
                    };

                    // Update page_count in DB
                    if let Err(e) = sqlx::query("UPDATE books SET page_count = $1 WHERE id = $2")
                        .bind(page_count)
                        .bind(book_id)
                        .execute(&pool)
                        .await
                    {
                        warn!("[ANALYZER] DB page_count update failed for book {}: {}", book_id, e);
                        return None;
                    }

                    let processed = extracted_count.fetch_add(1, Ordering::Relaxed) + 1;
                    let percent = (processed as f64 / total as f64 * 50.0) as i32; // first 50%
                    let _ = sqlx::query(
                        "UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
                    )
                    .bind(job_id)
                    .bind(processed)
                    .bind(percent)
                    .execute(&pool)
                    .await;

                    if processed % 25 == 0 || processed == total {
                        info!(
                            target: "extraction",
                            "[EXTRACTION] Progress: {}/{} books extracted ({}%)",
                            processed, total, percent
                        );
                    }

                    Some((book_id, raw_path, page_count))
                }
            })
            .buffer_unordered(concurrency)
            .filter_map(|x| async move { x })
            .collect()
            .await;

        // Collect lightweight results; raw_bytes already saved to disk and freed
        all_extracted.extend(batch_extracted);

        // Log RSS to track memory growth between batches
        if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
            for line in status.lines() {
                if line.starts_with("VmRSS:") {
                    info!("[ANALYZER] Memory after batch {}/{}: {}", batch_idx + 1, num_batches, line.trim());
                    break;
                }
            }
        }
    }

    if cancelled_flag.load(Ordering::Relaxed) {
        cancel_handle.abort();
        info!("[ANALYZER] Job {} cancelled during extraction phase", job_id);
        return Err(anyhow::anyhow!("Job cancelled by user"));
    }

    let extracted_total = all_extracted.len() as i32;
    let phase_a_elapsed = phase_a_start.elapsed();
    info!(
        "[ANALYZER] Sub-phase A complete: {}/{} books extracted in {:.1}s ({:.0} ms/book, {} batches)",
        extracted_total,
        total,
        phase_a_elapsed.as_secs_f64(),
        if extracted_total > 0 { phase_a_elapsed.as_millis() as f64 / extracted_total as f64 } else { 0.0 },
        num_batches,
    );

    // -------------------------------------------------------------------------
    // Sub-phase B: resize raw images and encode as WebP
    // CPU bound — can run at higher concurrency than I/O phase
    // -------------------------------------------------------------------------
    let phase_b_start = std::time::Instant::now();
    let _ = sqlx::query(
        "UPDATE index_jobs SET status = 'generating_thumbnails', generating_thumbnails_started_at = NOW(), total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
    )
    .bind(job_id)
    .bind(extracted_total)
    .execute(&state.pool)
    .await;

    let resize_count = Arc::new(AtomicI32::new(0));

    stream::iter(all_extracted)
        .for_each_concurrent(concurrency, |(book_id, raw_path, page_count)| {
            let pool = state.pool.clone();
            let config = config.clone();
            let cancelled = cancelled_flag.clone();
            let resize_count = resize_count.clone();

            async move {
                if cancelled.load(Ordering::Relaxed) {
                    return;
                }

                let raw_path_clone = raw_path.clone();
                let thumb_result = tokio::task::spawn_blocking(move || {
                    resize_raw_to_thumbnail(book_id, &raw_path_clone, &config)
                })
                .await;

                let thumb_path = match thumb_result {
                    Ok(Ok(p)) => p,
                    Ok(Err(e)) => {
                        warn!("[ANALYZER] resize_raw_to_webp failed for book {}: {}", book_id, e);
                        // page_count is already set; thumbnail stays NULL
                        return;
                    }
                    Err(e) => {
                        warn!("[ANALYZER] spawn_blocking resize error for book {}: {}", book_id, e);
                        return;
                    }
                };

                if let Err(e) = sqlx::query(
                    "UPDATE books SET page_count = $1, thumbnail_path = $2 WHERE id = $3",
                )
                .bind(page_count)
                .bind(&thumb_path)
                .bind(book_id)
                .execute(&pool)
                .await
                {
                    warn!("[ANALYZER] DB thumbnail update failed for book {}: {}", book_id, e);
                    return;
                }

                let processed = resize_count.fetch_add(1, Ordering::Relaxed) + 1;
                let percent =
                    50 + (processed as f64 / extracted_total as f64 * 50.0) as i32; // last 50%
                let _ = sqlx::query(
                    "UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
                )
                .bind(job_id)
                .bind(processed)
                .bind(percent)
                .execute(&pool)
                .await;

                if processed % 25 == 0 || processed == extracted_total {
                    info!(
                        target: "thumbnail",
                        "[THUMBNAIL] Progress: {}/{} thumbnails generated ({}%)",
                        processed, extracted_total, percent
                    );
                }
            }
        })
        .await;

    cancel_handle.abort();

    if cancelled_flag.load(Ordering::Relaxed) {
        info!("[ANALYZER] Job {} cancelled during resize phase", job_id);
        return Err(anyhow::anyhow!("Job cancelled by user"));
    }

    let final_count = resize_count.load(Ordering::Relaxed);
    let phase_b_elapsed = phase_b_start.elapsed();
    info!(
        "[ANALYZER] Sub-phase B complete: {}/{} thumbnails generated in {:.1}s ({:.0} ms/book)",
        final_count,
        extracted_total,
        phase_b_elapsed.as_secs_f64(),
        if final_count > 0 { phase_b_elapsed.as_millis() as f64 / final_count as f64 } else { 0.0 }
    );
    info!(
        "[ANALYZER] Total: {:.1}s (extraction {:.1}s + resize {:.1}s)",
        (phase_a_elapsed + phase_b_elapsed).as_secs_f64(),
        phase_a_elapsed.as_secs_f64(),
        phase_b_elapsed.as_secs_f64(),
    );

    Ok(())
}

/// Clear thumbnail files and DB references for books in scope, then re-analyze.
pub async fn regenerate_thumbnails(
    state: &AppState,
    job_id: Uuid,
    library_id: Option<Uuid>,
) -> Result<()> {
    let config = load_thumbnail_config(&state.pool).await;

    let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
        r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
    )
    .bind(library_id)
    .fetch_all(&state.pool)
    .await
    .unwrap_or_default();

    let mut deleted_count = 0usize;
    for book_id in &book_ids_to_clear {
        // Delete thumbnail in any format (webp, jpg, png) + raw
        for ext in &["webp", "jpg", "png", "raw"] {
            let path = Path::new(&config.directory).join(format!("{}.{}", book_id, ext));
            if path.exists() {
                if let Err(e) = std::fs::remove_file(&path) {
                    warn!("[ANALYZER] Failed to delete thumbnail {}: {}", path.display(), e);
                } else if *ext != "raw" {
                    deleted_count += 1;
                }
            }
        }
    }
    info!("[ANALYZER] Deleted {} thumbnail files for regeneration", deleted_count);

    sqlx::query(r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#)
        .bind(library_id)
        .execute(&state.pool)
        .await?;

    analyze_library_books(state, job_id, library_id, true).await
}

/// Delete orphaned thumbnail files (books deleted in full_rebuild get new UUIDs).
pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
    let config = load_thumbnail_config(&state.pool).await;

    let existing_book_ids: std::collections::HashSet<Uuid> =
        sqlx::query_scalar(r#"SELECT id FROM books"#)
            .fetch_all(&state.pool)
            .await
            .unwrap_or_default()
            .into_iter()
            .collect();

    let thumbnail_dir = Path::new(&config.directory);
    if !thumbnail_dir.exists() {
        return Ok(());
    }

    let mut deleted_count = 0usize;
    if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
        for entry in entries.flatten() {
            let file_name = entry.file_name();
            let file_name = file_name.to_string_lossy();
            // Clean up all thumbnail formats and orphaned .raw files
            let stem = [".webp", ".jpg", ".png", ".raw"]
                .iter()
                .find_map(|ext| file_name.strip_suffix(ext).map(|s| s.to_string()));
            if let Some(book_id_str) = stem {
                if let Ok(book_id) = Uuid::parse_str(&book_id_str) {
                    if !existing_book_ids.contains(&book_id) {
                        if let Err(e) = std::fs::remove_file(entry.path()) {
                            warn!("Failed to delete orphaned file {}: {}", entry.path().display(), e);
                        } else {
                            deleted_count += 1;
                        }
                    }
                }
            }
        }
    }

    info!("[ANALYZER] Deleted {} orphaned thumbnail files", deleted_count);
    Ok(())
}