- Added a new job status 'extracting_pages' to represent the first sub-phase of thumbnail generation. - Updated the database schema to include a timestamp for when thumbnail generation starts. - Enhanced job progress components to handle the new status, including UI updates for displaying progress and status labels. - Refactored job-related logic to accommodate the two-phase process: extracting pages and generating thumbnails. - Adjusted SQL queries and job detail responses to include the new fields and statuses. This change improves the clarity of job processing states and enhances user feedback during the thumbnail generation process.
551 lines
20 KiB
Rust
551 lines
20 KiB
Rust
use anyhow::Result;
|
|
use futures::stream::{self, StreamExt};
|
|
use image::GenericImageView;
|
|
use parsers::{analyze_book, BookFormat};
|
|
use sqlx::Row;
|
|
use std::path::Path;
|
|
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
|
|
use std::sync::Arc;
|
|
use tracing::{info, warn};
|
|
use uuid::Uuid;
|
|
|
|
use crate::{job::is_job_cancelled, utils, AppState};
|
|
|
|
#[derive(Clone)]
|
|
struct ThumbnailConfig {
|
|
enabled: bool,
|
|
width: u32,
|
|
height: u32,
|
|
quality: u8,
|
|
directory: String,
|
|
}
|
|
|
|
async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
|
|
let fallback = ThumbnailConfig {
|
|
enabled: true,
|
|
width: 300,
|
|
height: 400,
|
|
quality: 80,
|
|
directory: "/data/thumbnails".to_string(),
|
|
};
|
|
let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'thumbnail'"#)
|
|
.fetch_optional(pool)
|
|
.await;
|
|
|
|
match row {
|
|
Ok(Some(row)) => {
|
|
let value: serde_json::Value = row.get("value");
|
|
ThumbnailConfig {
|
|
enabled: value
|
|
.get("enabled")
|
|
.and_then(|v| v.as_bool())
|
|
.unwrap_or(fallback.enabled),
|
|
width: value
|
|
.get("width")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as u32)
|
|
.unwrap_or(fallback.width),
|
|
height: value
|
|
.get("height")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as u32)
|
|
.unwrap_or(fallback.height),
|
|
quality: value
|
|
.get("quality")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as u8)
|
|
.unwrap_or(fallback.quality),
|
|
directory: value
|
|
.get("directory")
|
|
.and_then(|v| v.as_str())
|
|
.map(|s| s.to_string())
|
|
.unwrap_or_else(|| fallback.directory.clone()),
|
|
}
|
|
}
|
|
_ => fallback,
|
|
}
|
|
}
|
|
|
|
async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize {
|
|
let default_concurrency = 2;
|
|
let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
|
|
.fetch_optional(pool)
|
|
.await;
|
|
|
|
match row {
|
|
Ok(Some(row)) => {
|
|
let value: serde_json::Value = row.get("value");
|
|
value
|
|
.get("concurrent_renders")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as usize)
|
|
.unwrap_or(default_concurrency)
|
|
}
|
|
_ => default_concurrency,
|
|
}
|
|
}
|
|
|
|
fn generate_thumbnail(image_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::Result<Vec<u8>> {
|
|
let img = image::load_from_memory(image_bytes)
|
|
.map_err(|e| anyhow::anyhow!("failed to load image: {}", e))?;
|
|
let (orig_w, orig_h) = img.dimensions();
|
|
let ratio_w = config.width as f32 / orig_w as f32;
|
|
let ratio_h = config.height as f32 / orig_h as f32;
|
|
let ratio = ratio_w.min(ratio_h);
|
|
let new_w = (orig_w as f32 * ratio) as u32;
|
|
let new_h = (orig_h as f32 * ratio) as u32;
|
|
let resized = img.resize(new_w, new_h, image::imageops::FilterType::Triangle);
|
|
let rgba = resized.to_rgba8();
|
|
let (w, h) = rgba.dimensions();
|
|
let rgb_data: Vec<u8> = rgba.pixels().flat_map(|p| [p[0], p[1], p[2]]).collect();
|
|
let quality = config.quality as f32;
|
|
let webp_data = webp::Encoder::new(&rgb_data, webp::PixelLayout::Rgb, w, h).encode(quality);
|
|
Ok(webp_data.to_vec())
|
|
}
|
|
|
|
/// Save raw image bytes (as extracted from the archive) without any processing.
|
|
fn save_raw_image(book_id: Uuid, raw_bytes: &[u8], directory: &str) -> anyhow::Result<String> {
|
|
let dir = Path::new(directory);
|
|
std::fs::create_dir_all(dir)?;
|
|
let path = dir.join(format!("{}.raw", book_id));
|
|
std::fs::write(&path, raw_bytes)?;
|
|
Ok(path.to_string_lossy().to_string())
|
|
}
|
|
|
|
/// Resize the raw image and save it as a WebP thumbnail, overwriting the raw file.
|
|
fn resize_raw_to_webp(
|
|
book_id: Uuid,
|
|
raw_path: &str,
|
|
config: &ThumbnailConfig,
|
|
) -> anyhow::Result<String> {
|
|
let raw_bytes = std::fs::read(raw_path)
|
|
.map_err(|e| anyhow::anyhow!("failed to read raw image {}: {}", raw_path, e))?;
|
|
let webp_bytes = generate_thumbnail(&raw_bytes, config)?;
|
|
|
|
let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
|
|
std::fs::write(&webp_path, &webp_bytes)?;
|
|
|
|
// Delete the raw file now that the WebP is written
|
|
let _ = std::fs::remove_file(raw_path);
|
|
|
|
Ok(webp_path.to_string_lossy().to_string())
|
|
}
|
|
|
|
fn book_format_from_str(s: &str) -> Option<BookFormat> {
|
|
match s {
|
|
"cbz" => Some(BookFormat::Cbz),
|
|
"cbr" => Some(BookFormat::Cbr),
|
|
"pdf" => Some(BookFormat::Pdf),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Phase 2 — Two-sub-phase analysis:
|
|
///
|
|
/// **Sub-phase A (extracting_pages)**: open each archive once, extract (page_count, raw_image_bytes),
|
|
/// save the raw bytes to `{directory}/{book_id}.raw`. I/O bound — runs at `concurrent_renders`.
|
|
///
|
|
/// **Sub-phase B (generating_thumbnails)**: load each `.raw` file, resize and encode as WebP,
|
|
/// overwrite as `{directory}/{book_id}.webp`. CPU bound — runs at `concurrent_renders`.
|
|
///
|
|
/// `thumbnail_only` = true: only process books missing thumbnail (page_count may already be set).
|
|
/// `thumbnail_only` = false: process books missing page_count.
|
|
pub async fn analyze_library_books(
|
|
state: &AppState,
|
|
job_id: Uuid,
|
|
library_id: Option<Uuid>,
|
|
thumbnail_only: bool,
|
|
) -> Result<()> {
|
|
let config = load_thumbnail_config(&state.pool).await;
|
|
|
|
if !config.enabled {
|
|
info!("[ANALYZER] Thumbnails disabled, skipping analysis phase");
|
|
return Ok(());
|
|
}
|
|
|
|
let concurrency = load_thumbnail_concurrency(&state.pool).await;
|
|
|
|
let query_filter = if thumbnail_only {
|
|
"b.thumbnail_path IS NULL"
|
|
} else {
|
|
"b.page_count IS NULL"
|
|
};
|
|
|
|
let sql = format!(
|
|
r#"
|
|
SELECT b.id AS book_id, bf.abs_path, bf.format
|
|
FROM books b
|
|
JOIN book_files bf ON bf.book_id = b.id
|
|
WHERE (b.library_id = $1 OR $1 IS NULL)
|
|
AND {}
|
|
"#,
|
|
query_filter
|
|
);
|
|
|
|
let rows = sqlx::query(&sql)
|
|
.bind(library_id)
|
|
.fetch_all(&state.pool)
|
|
.await?;
|
|
|
|
if rows.is_empty() {
|
|
info!("[ANALYZER] No books to analyze");
|
|
return Ok(());
|
|
}
|
|
|
|
let total = rows.len() as i32;
|
|
info!(
|
|
"[ANALYZER] Analyzing {} books (thumbnail_only={}, concurrency={})",
|
|
total, thumbnail_only, concurrency
|
|
);
|
|
|
|
let cancelled_flag = Arc::new(AtomicBool::new(false));
|
|
let cancel_pool = state.pool.clone();
|
|
let cancel_flag_for_poller = cancelled_flag.clone();
|
|
let cancel_handle = tokio::spawn(async move {
|
|
loop {
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
|
match is_job_cancelled(&cancel_pool, job_id).await {
|
|
Ok(true) => {
|
|
cancel_flag_for_poller.store(true, Ordering::Relaxed);
|
|
break;
|
|
}
|
|
Ok(false) => {}
|
|
Err(_) => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
struct BookTask {
|
|
book_id: Uuid,
|
|
abs_path: String,
|
|
format: String,
|
|
}
|
|
|
|
let tasks: Vec<BookTask> = rows
|
|
.into_iter()
|
|
.map(|row| BookTask {
|
|
book_id: row.get("book_id"),
|
|
abs_path: row.get("abs_path"),
|
|
format: row.get("format"),
|
|
})
|
|
.collect();
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Sub-phase A: extract first page from each archive and store raw image
|
|
// I/O bound — limited by HDD throughput, runs at `concurrency`
|
|
// -------------------------------------------------------------------------
|
|
let phase_a_start = std::time::Instant::now();
|
|
let _ = sqlx::query(
|
|
"UPDATE index_jobs SET status = 'extracting_pages', total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
|
)
|
|
.bind(job_id)
|
|
.bind(total)
|
|
.execute(&state.pool)
|
|
.await;
|
|
|
|
let extracted_count = Arc::new(AtomicI32::new(0));
|
|
|
|
// Collected results: (book_id, raw_path, page_count)
|
|
let extracted: Vec<(Uuid, String, i32)> = stream::iter(tasks)
|
|
.map(|task| {
|
|
let pool = state.pool.clone();
|
|
let config = config.clone();
|
|
let cancelled = cancelled_flag.clone();
|
|
let extracted_count = extracted_count.clone();
|
|
|
|
async move {
|
|
if cancelled.load(Ordering::Relaxed) {
|
|
return None;
|
|
}
|
|
|
|
let local_path = utils::remap_libraries_path(&task.abs_path);
|
|
let path = std::path::Path::new(&local_path);
|
|
let book_id = task.book_id;
|
|
|
|
let format = match book_format_from_str(&task.format) {
|
|
Some(f) => f,
|
|
None => {
|
|
warn!("[ANALYZER] Unknown format '{}' for book {}", task.format, book_id);
|
|
return None;
|
|
}
|
|
};
|
|
|
|
let pdf_scale = config.width.max(config.height);
|
|
let path_owned = path.to_path_buf();
|
|
let analyze_result =
|
|
tokio::task::spawn_blocking(move || analyze_book(&path_owned, format, pdf_scale))
|
|
.await;
|
|
|
|
let (page_count, raw_bytes) = match analyze_result {
|
|
Ok(Ok(result)) => result,
|
|
Ok(Err(e)) => {
|
|
warn!("[ANALYZER] analyze_book failed for book {}: {}", book_id, e);
|
|
let _ = sqlx::query(
|
|
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE book_id = $1",
|
|
)
|
|
.bind(book_id)
|
|
.bind(e.to_string())
|
|
.execute(&pool)
|
|
.await;
|
|
return None;
|
|
}
|
|
Err(e) => {
|
|
warn!("[ANALYZER] spawn_blocking error for book {}: {}", book_id, e);
|
|
return None;
|
|
}
|
|
};
|
|
|
|
// Save raw bytes to disk (no resize, no encode)
|
|
let raw_path = match tokio::task::spawn_blocking({
|
|
let dir = config.directory.clone();
|
|
let bytes = raw_bytes.clone();
|
|
move || save_raw_image(book_id, &bytes, &dir)
|
|
})
|
|
.await
|
|
{
|
|
Ok(Ok(p)) => p,
|
|
Ok(Err(e)) => {
|
|
warn!("[ANALYZER] save_raw_image failed for book {}: {}", book_id, e);
|
|
return None;
|
|
}
|
|
Err(e) => {
|
|
warn!("[ANALYZER] spawn_blocking save_raw error for book {}: {}", book_id, e);
|
|
return None;
|
|
}
|
|
};
|
|
|
|
// Update page_count in DB
|
|
if let Err(e) = sqlx::query("UPDATE books SET page_count = $1 WHERE id = $2")
|
|
.bind(page_count)
|
|
.bind(book_id)
|
|
.execute(&pool)
|
|
.await
|
|
{
|
|
warn!("[ANALYZER] DB page_count update failed for book {}: {}", book_id, e);
|
|
return None;
|
|
}
|
|
|
|
let processed = extracted_count.fetch_add(1, Ordering::Relaxed) + 1;
|
|
let percent = (processed as f64 / total as f64 * 50.0) as i32; // first 50%
|
|
let _ = sqlx::query(
|
|
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
|
)
|
|
.bind(job_id)
|
|
.bind(processed)
|
|
.bind(percent)
|
|
.execute(&pool)
|
|
.await;
|
|
|
|
Some((book_id, raw_path, page_count))
|
|
}
|
|
})
|
|
.buffer_unordered(concurrency)
|
|
.filter_map(|x| async move { x })
|
|
.collect()
|
|
.await;
|
|
|
|
if cancelled_flag.load(Ordering::Relaxed) {
|
|
cancel_handle.abort();
|
|
info!("[ANALYZER] Job {} cancelled during extraction phase", job_id);
|
|
return Err(anyhow::anyhow!("Job cancelled by user"));
|
|
}
|
|
|
|
let extracted_total = extracted.len() as i32;
|
|
let phase_a_elapsed = phase_a_start.elapsed();
|
|
info!(
|
|
"[ANALYZER] Sub-phase A complete: {}/{} books extracted in {:.1}s ({:.0} ms/book)",
|
|
extracted_total,
|
|
total,
|
|
phase_a_elapsed.as_secs_f64(),
|
|
if extracted_total > 0 { phase_a_elapsed.as_millis() as f64 / extracted_total as f64 } else { 0.0 }
|
|
);
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Sub-phase B: resize raw images and encode as WebP
|
|
// CPU bound — can run at higher concurrency than I/O phase
|
|
// -------------------------------------------------------------------------
|
|
let phase_b_start = std::time::Instant::now();
|
|
let _ = sqlx::query(
|
|
"UPDATE index_jobs SET status = 'generating_thumbnails', generating_thumbnails_started_at = NOW(), total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
|
)
|
|
.bind(job_id)
|
|
.bind(extracted_total)
|
|
.execute(&state.pool)
|
|
.await;
|
|
|
|
let resize_count = Arc::new(AtomicI32::new(0));
|
|
|
|
stream::iter(extracted)
|
|
.for_each_concurrent(concurrency, |(book_id, raw_path, page_count)| {
|
|
let pool = state.pool.clone();
|
|
let config = config.clone();
|
|
let cancelled = cancelled_flag.clone();
|
|
let resize_count = resize_count.clone();
|
|
|
|
async move {
|
|
if cancelled.load(Ordering::Relaxed) {
|
|
return;
|
|
}
|
|
|
|
let raw_path_clone = raw_path.clone();
|
|
let thumb_result = tokio::task::spawn_blocking(move || {
|
|
resize_raw_to_webp(book_id, &raw_path_clone, &config)
|
|
})
|
|
.await;
|
|
|
|
let thumb_path = match thumb_result {
|
|
Ok(Ok(p)) => p,
|
|
Ok(Err(e)) => {
|
|
warn!("[ANALYZER] resize_raw_to_webp failed for book {}: {}", book_id, e);
|
|
// page_count is already set; thumbnail stays NULL
|
|
return;
|
|
}
|
|
Err(e) => {
|
|
warn!("[ANALYZER] spawn_blocking resize error for book {}: {}", book_id, e);
|
|
return;
|
|
}
|
|
};
|
|
|
|
if let Err(e) = sqlx::query(
|
|
"UPDATE books SET page_count = $1, thumbnail_path = $2 WHERE id = $3",
|
|
)
|
|
.bind(page_count)
|
|
.bind(&thumb_path)
|
|
.bind(book_id)
|
|
.execute(&pool)
|
|
.await
|
|
{
|
|
warn!("[ANALYZER] DB thumbnail update failed for book {}: {}", book_id, e);
|
|
return;
|
|
}
|
|
|
|
let processed = resize_count.fetch_add(1, Ordering::Relaxed) + 1;
|
|
let percent =
|
|
50 + (processed as f64 / extracted_total as f64 * 50.0) as i32; // last 50%
|
|
let _ = sqlx::query(
|
|
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
|
)
|
|
.bind(job_id)
|
|
.bind(processed)
|
|
.bind(percent)
|
|
.execute(&pool)
|
|
.await;
|
|
}
|
|
})
|
|
.await;
|
|
|
|
cancel_handle.abort();
|
|
|
|
if cancelled_flag.load(Ordering::Relaxed) {
|
|
info!("[ANALYZER] Job {} cancelled during resize phase", job_id);
|
|
return Err(anyhow::anyhow!("Job cancelled by user"));
|
|
}
|
|
|
|
let final_count = resize_count.load(Ordering::Relaxed);
|
|
let phase_b_elapsed = phase_b_start.elapsed();
|
|
info!(
|
|
"[ANALYZER] Sub-phase B complete: {}/{} thumbnails generated in {:.1}s ({:.0} ms/book)",
|
|
final_count,
|
|
extracted_total,
|
|
phase_b_elapsed.as_secs_f64(),
|
|
if final_count > 0 { phase_b_elapsed.as_millis() as f64 / final_count as f64 } else { 0.0 }
|
|
);
|
|
info!(
|
|
"[ANALYZER] Total: {:.1}s (extraction {:.1}s + resize {:.1}s)",
|
|
(phase_a_elapsed + phase_b_elapsed).as_secs_f64(),
|
|
phase_a_elapsed.as_secs_f64(),
|
|
phase_b_elapsed.as_secs_f64(),
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Clear thumbnail files and DB references for books in scope, then re-analyze.
|
|
pub async fn regenerate_thumbnails(
|
|
state: &AppState,
|
|
job_id: Uuid,
|
|
library_id: Option<Uuid>,
|
|
) -> Result<()> {
|
|
let config = load_thumbnail_config(&state.pool).await;
|
|
|
|
let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
|
|
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
|
|
)
|
|
.bind(library_id)
|
|
.fetch_all(&state.pool)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let mut deleted_count = 0usize;
|
|
for book_id in &book_ids_to_clear {
|
|
// Delete WebP thumbnail
|
|
let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
|
|
if webp_path.exists() {
|
|
if let Err(e) = std::fs::remove_file(&webp_path) {
|
|
warn!("[ANALYZER] Failed to delete thumbnail {}: {}", webp_path.display(), e);
|
|
} else {
|
|
deleted_count += 1;
|
|
}
|
|
}
|
|
// Delete raw file if it exists (interrupted previous run)
|
|
let raw_path = Path::new(&config.directory).join(format!("{}.raw", book_id));
|
|
let _ = std::fs::remove_file(&raw_path);
|
|
}
|
|
info!("[ANALYZER] Deleted {} thumbnail files for regeneration", deleted_count);
|
|
|
|
sqlx::query(r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#)
|
|
.bind(library_id)
|
|
.execute(&state.pool)
|
|
.await?;
|
|
|
|
analyze_library_books(state, job_id, library_id, true).await
|
|
}
|
|
|
|
/// Delete orphaned thumbnail files (books deleted in full_rebuild get new UUIDs).
|
|
pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
|
|
let config = load_thumbnail_config(&state.pool).await;
|
|
|
|
let existing_book_ids: std::collections::HashSet<Uuid> =
|
|
sqlx::query_scalar(r#"SELECT id FROM books"#)
|
|
.fetch_all(&state.pool)
|
|
.await
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.collect();
|
|
|
|
let thumbnail_dir = Path::new(&config.directory);
|
|
if !thumbnail_dir.exists() {
|
|
return Ok(());
|
|
}
|
|
|
|
let mut deleted_count = 0usize;
|
|
if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
|
|
for entry in entries.flatten() {
|
|
let file_name = entry.file_name();
|
|
let file_name = file_name.to_string_lossy();
|
|
// Clean up both .webp and orphaned .raw files
|
|
let stem = if let Some(s) = file_name.strip_suffix(".webp") {
|
|
Some(s.to_string())
|
|
} else if let Some(s) = file_name.strip_suffix(".raw") {
|
|
Some(s.to_string())
|
|
} else {
|
|
None
|
|
};
|
|
if let Some(book_id_str) = stem {
|
|
if let Ok(book_id) = Uuid::parse_str(&book_id_str) {
|
|
if !existing_book_ids.contains(&book_id) {
|
|
if let Err(e) = std::fs::remove_file(entry.path()) {
|
|
warn!("Failed to delete orphaned file {}: {}", entry.path().display(), e);
|
|
} else {
|
|
deleted_count += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
info!("[ANALYZER] Deleted {} orphaned thumbnail files", deleted_count);
|
|
Ok(())
|
|
}
|