feat: two-phase indexation with direct thumbnail generation in indexer
Phase 1 (discovery): walkdir + filename-only metadata, zero archive I/O. Books are visible immediately in the UI while Phase 2 runs in background. Phase 2 (analysis): open each archive once via analyze_book() to extract page_count and first page bytes, then generate WebP thumbnail directly in the indexer — removing the HTTP roundtrip to the API checkup endpoint. - Add parse_metadata_fast() (infallible, no archive I/O) - Add analyze_book() returning (page_count, first_page_bytes) in one pass - Add looks_like_image() magic bytes check for unrar p stdout validation - Add lsar fallback in list_cbr_images() for UTF-16BE encoded filenames - Add directory_mtimes table to skip unchanged dirs on incremental scans - Add analyzer.rs: generate_thumbnail, analyze_library_books, regenerate_thumbnails - Remove run_checkup() from API; indexer handles thumbnail jobs directly - Remove api_base_url/api_bootstrap_token from IndexerConfig and AppState - Add unar + poppler-utils to indexer Dockerfile - Fix smoke.sh: wait for job completion, check thumbnail_url field Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,310 +1,12 @@
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicI32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use axum::{
|
||||
extract::{Path as AxumPath, State},
|
||||
http::StatusCode,
|
||||
extract::State,
|
||||
Json,
|
||||
};
|
||||
use futures::stream::{self, StreamExt};
|
||||
use image::GenericImageView;
|
||||
use serde::Deserialize;
|
||||
use sqlx::Row;
|
||||
use tracing::{info, warn};
|
||||
use uuid::Uuid;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{error::ApiError, index_jobs, pages, state::AppState};
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ThumbnailConfig {
|
||||
enabled: bool,
|
||||
width: u32,
|
||||
height: u32,
|
||||
quality: u8,
|
||||
directory: String,
|
||||
}
|
||||
|
||||
async fn load_thumbnail_concurrency(pool: &sqlx::PgPool) -> usize {
|
||||
let default_concurrency = 4;
|
||||
let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'limits'"#)
|
||||
.fetch_optional(pool)
|
||||
.await;
|
||||
|
||||
match row {
|
||||
Ok(Some(row)) => {
|
||||
let value: serde_json::Value = row.get("value");
|
||||
value
|
||||
.get("concurrent_renders")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize)
|
||||
.unwrap_or(default_concurrency)
|
||||
}
|
||||
_ => default_concurrency,
|
||||
}
|
||||
}
|
||||
|
||||
async fn load_thumbnail_config(pool: &sqlx::PgPool) -> ThumbnailConfig {
|
||||
let fallback = ThumbnailConfig {
|
||||
enabled: true,
|
||||
width: 300,
|
||||
height: 400,
|
||||
quality: 80,
|
||||
directory: "/data/thumbnails".to_string(),
|
||||
};
|
||||
let row = sqlx::query(r#"SELECT value FROM app_settings WHERE key = 'thumbnail'"#)
|
||||
.fetch_optional(pool)
|
||||
.await;
|
||||
|
||||
match row {
|
||||
Ok(Some(row)) => {
|
||||
let value: serde_json::Value = row.get("value");
|
||||
ThumbnailConfig {
|
||||
enabled: value
|
||||
.get("enabled")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(fallback.enabled),
|
||||
width: value
|
||||
.get("width")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as u32)
|
||||
.unwrap_or(fallback.width),
|
||||
height: value
|
||||
.get("height")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as u32)
|
||||
.unwrap_or(fallback.height),
|
||||
quality: value
|
||||
.get("quality")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as u8)
|
||||
.unwrap_or(fallback.quality),
|
||||
directory: value
|
||||
.get("directory")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| fallback.directory.clone()),
|
||||
}
|
||||
}
|
||||
_ => fallback,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_thumbnail(image_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::Result<Vec<u8>> {
|
||||
let img = image::load_from_memory(image_bytes).context("failed to load image")?;
|
||||
let (orig_w, orig_h) = img.dimensions();
|
||||
let ratio_w = config.width as f32 / orig_w as f32;
|
||||
let ratio_h = config.height as f32 / orig_h as f32;
|
||||
let ratio = ratio_w.min(ratio_h);
|
||||
let new_w = (orig_w as f32 * ratio) as u32;
|
||||
let new_h = (orig_h as f32 * ratio) as u32;
|
||||
let resized = img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3);
|
||||
let rgba = resized.to_rgba8();
|
||||
let (w, h) = rgba.dimensions();
|
||||
let rgb_data: Vec<u8> = rgba.pixels().flat_map(|p| [p[0], p[1], p[2]]).collect();
|
||||
let quality = f32::max(config.quality as f32, 85.0);
|
||||
let webp_data =
|
||||
webp::Encoder::new(&rgb_data, webp::PixelLayout::Rgb, w, h).encode(quality);
|
||||
Ok(webp_data.to_vec())
|
||||
}
|
||||
|
||||
fn save_thumbnail(book_id: Uuid, thumbnail_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::Result<String> {
|
||||
let dir = Path::new(&config.directory);
|
||||
std::fs::create_dir_all(dir)?;
|
||||
let filename = format!("{}.webp", book_id);
|
||||
let path = dir.join(&filename);
|
||||
std::fs::write(&path, thumbnail_bytes)?;
|
||||
Ok(path.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
async fn run_checkup(state: AppState, job_id: Uuid) {
|
||||
let pool = &state.pool;
|
||||
let row = sqlx::query("SELECT library_id, type FROM index_jobs WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.fetch_optional(pool)
|
||||
.await;
|
||||
|
||||
let (library_id, job_type) = match row {
|
||||
Ok(Some(r)) => (
|
||||
r.get::<Option<Uuid>, _>("library_id"),
|
||||
r.get::<String, _>("type"),
|
||||
),
|
||||
_ => {
|
||||
warn!("thumbnails checkup: job {} not found", job_id);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Regenerate or full_rebuild: clear existing thumbnails in scope so they get regenerated
|
||||
if job_type == "thumbnail_regenerate" || job_type == "full_rebuild" {
|
||||
let config = load_thumbnail_config(pool).await;
|
||||
|
||||
if job_type == "full_rebuild" {
|
||||
// For full_rebuild: delete orphaned thumbnail files (books were deleted, new ones have new UUIDs)
|
||||
// Get all existing book IDs to keep their thumbnails
|
||||
let existing_book_ids: std::collections::HashSet<Uuid> = sqlx::query_scalar(
|
||||
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL)"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// Delete thumbnail files that don't correspond to existing books
|
||||
let thumbnail_dir = Path::new(&config.directory);
|
||||
if thumbnail_dir.exists() {
|
||||
let mut deleted_count = 0;
|
||||
if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
|
||||
for entry in entries.flatten() {
|
||||
if let Some(file_name) = entry.file_name().to_str() {
|
||||
if file_name.ends_with(".webp") {
|
||||
if let Some(book_id_str) = file_name.strip_suffix(".webp") {
|
||||
if let Ok(book_id) = Uuid::parse_str(book_id_str) {
|
||||
if !existing_book_ids.contains(&book_id) {
|
||||
if let Err(e) = std::fs::remove_file(entry.path()) {
|
||||
warn!("Failed to delete orphaned thumbnail {}: {}", entry.path().display(), e);
|
||||
} else {
|
||||
deleted_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
info!("thumbnails full_rebuild: deleted {} orphaned thumbnail files", deleted_count);
|
||||
}
|
||||
} else {
|
||||
// For regenerate: delete thumbnail files for books with thumbnails
|
||||
let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
|
||||
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut deleted_count = 0;
|
||||
for book_id in &book_ids_to_clear {
|
||||
let filename = format!("{}.webp", book_id);
|
||||
let thumbnail_path = Path::new(&config.directory).join(&filename);
|
||||
if thumbnail_path.exists() {
|
||||
if let Err(e) = std::fs::remove_file(&thumbnail_path) {
|
||||
warn!("Failed to delete thumbnail file {}: {}", thumbnail_path.display(), e);
|
||||
} else {
|
||||
deleted_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
info!("thumbnails regenerate: deleted {} thumbnail files", deleted_count);
|
||||
}
|
||||
|
||||
// Clear thumbnail_path in database
|
||||
let cleared = sqlx::query(
|
||||
r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.execute(pool)
|
||||
.await;
|
||||
if let Ok(res) = cleared {
|
||||
info!("thumbnails {}: cleared {} books in database", job_type, res.rows_affected());
|
||||
}
|
||||
}
|
||||
|
||||
let book_ids: Vec<Uuid> = sqlx::query_scalar(
|
||||
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NULL"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let config = load_thumbnail_config(pool).await;
|
||||
if !config.enabled || book_ids.is_empty() {
|
||||
let _ = sqlx::query(
|
||||
"UPDATE index_jobs SET status = 'success', finished_at = NOW(), progress_percent = 100, current_file = NULL WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(pool)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
|
||||
let total = book_ids.len() as i32;
|
||||
let _ = sqlx::query(
|
||||
"UPDATE index_jobs SET status = 'generating_thumbnails', total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(total)
|
||||
.execute(pool)
|
||||
.await;
|
||||
|
||||
let concurrency = load_thumbnail_concurrency(pool).await;
|
||||
let processed_count = Arc::new(AtomicI32::new(0));
|
||||
let pool_clone = pool.clone();
|
||||
let job_id_clone = job_id;
|
||||
let config_clone = config.clone();
|
||||
let state_clone = state.clone();
|
||||
|
||||
let total_clone = total;
|
||||
stream::iter(book_ids)
|
||||
.for_each_concurrent(concurrency, |book_id| {
|
||||
let processed_count = processed_count.clone();
|
||||
let pool = pool_clone.clone();
|
||||
let job_id = job_id_clone;
|
||||
let config = config_clone.clone();
|
||||
let state = state_clone.clone();
|
||||
let total = total_clone;
|
||||
|
||||
async move {
|
||||
match pages::render_book_page_1(&state, book_id, config.width, config.quality).await {
|
||||
Ok(page_bytes) => {
|
||||
match generate_thumbnail(&page_bytes, &config) {
|
||||
Ok(thumb_bytes) => {
|
||||
if let Ok(path) = save_thumbnail(book_id, &thumb_bytes, &config) {
|
||||
if sqlx::query("UPDATE books SET thumbnail_path = $1 WHERE id = $2")
|
||||
.bind(&path)
|
||||
.bind(book_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
let processed = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
let percent = (processed as f64 / total as f64 * 100.0) as i32;
|
||||
let _ = sqlx::query(
|
||||
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(processed)
|
||||
.bind(percent)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => warn!("thumbnail generate failed for book {}: {:?}", book_id, e),
|
||||
}
|
||||
}
|
||||
Err(e) => warn!("render page 1 failed for book {}: {:?}", book_id, e),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
let _ = sqlx::query(
|
||||
"UPDATE index_jobs SET status = 'success', finished_at = NOW(), progress_percent = 100, current_file = NULL WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(pool)
|
||||
.await;
|
||||
|
||||
info!("thumbnails checkup finished for job {} ({} books)", job_id, total);
|
||||
}
|
||||
use crate::{error::ApiError, index_jobs, state::AppState};
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
pub struct ThumbnailsRebuildRequest {
|
||||
@@ -312,7 +14,7 @@ pub struct ThumbnailsRebuildRequest {
|
||||
pub library_id: Option<Uuid>,
|
||||
}
|
||||
|
||||
/// POST /index/thumbnails/rebuild — create a job and generate thumbnails for books that don't have one (optional library scope).
|
||||
/// POST /index/thumbnails/rebuild — create a job to generate thumbnails for books that don't have one.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/index/thumbnails/rebuild",
|
||||
@@ -346,7 +48,7 @@ pub async fn start_thumbnails_rebuild(
|
||||
Ok(Json(index_jobs::map_row(row)))
|
||||
}
|
||||
|
||||
/// POST /index/thumbnails/regenerate — create a job and regenerate all thumbnails in scope (clears then regenerates).
|
||||
/// POST /index/thumbnails/regenerate — create a job to regenerate all thumbnails (clears then regenerates).
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/index/thumbnails/regenerate",
|
||||
@@ -379,13 +81,3 @@ pub async fn start_thumbnails_regenerate(
|
||||
|
||||
Ok(Json(index_jobs::map_row(row)))
|
||||
}
|
||||
|
||||
/// POST /index/jobs/:id/thumbnails/checkup — start thumbnail generation for books missing thumbnails (called by indexer at end of build).
|
||||
pub async fn start_checkup(
|
||||
State(state): State<AppState>,
|
||||
AxumPath(job_id): AxumPath<Uuid>,
|
||||
) -> Result<StatusCode, ApiError> {
|
||||
let state = state.clone();
|
||||
tokio::spawn(async move { run_checkup(state, job_id).await });
|
||||
Ok(StatusCode::ACCEPTED)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user