perf(api,indexer): optimiser pages, thumbnails, watcher et robustesse fd

- Pages: mode Original (zero-transcoding), ETag/304, cache index CBZ,
  préfetch next 2 pages, filtre Triangle par défaut
- Thumbnails: DCT scaling JPEG via jpeg-decoder (decode 7x plus rapide),
  img.thumbnail() pour resize, support format Original, fix JPEG RGBA8
- API fallback thumbnail: OutputFormat::Original + DCT scaling au lieu
  de WebP full-decode, retour (bytes, content_type) dynamique
- Watcher: remplacement notify par poll léger sans inotify/fd,
  skip poll quand job actif, snapshots en mémoire
- Jobs: mutex exclusif corrigé (tous statuts actifs, tous types exclusifs)
- Robustesse: suppression fs::canonicalize (problèmes fd Docker),
  list_folders avec erreurs explicites, has_children default true
- Backoffice: FormRow items-start pour alignement inputs avec helper text,
  labels settings clarifiés

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-14 23:07:42 +01:00
parent fe54f55f47
commit 6947af10fe
15 changed files with 711 additions and 395 deletions

View File

@@ -584,6 +584,17 @@ use axum::{
response::IntoResponse,
};
/// Detect content type from thumbnail file extension.
fn detect_thumbnail_content_type(path: &str) -> &'static str {
if path.ends_with(".jpg") || path.ends_with(".jpeg") {
"image/jpeg"
} else if path.ends_with(".png") {
"image/png"
} else {
"image/webp"
}
}
/// Get book thumbnail image
#[utoipa::path(
get,
@@ -612,9 +623,12 @@ pub async fn get_thumbnail(
let row = row.ok_or_else(|| ApiError::not_found("book not found"))?;
let thumbnail_path: Option<String> = row.get("thumbnail_path");
let data = if let Some(ref path) = thumbnail_path {
let (data, content_type) = if let Some(ref path) = thumbnail_path {
match std::fs::read(path) {
Ok(bytes) => bytes,
Ok(bytes) => {
let ct = detect_thumbnail_content_type(path);
(bytes, ct)
}
Err(_) => {
// File missing on disk (e.g. different mount in dev) — fall back to live render
crate::pages::render_book_page_1(&state, book_id, 300, 80).await?
@@ -626,7 +640,7 @@ pub async fn get_thumbnail(
};
let mut headers = HeaderMap::new();
headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("image/webp"));
headers.insert(header::CONTENT_TYPE, HeaderValue::from_static(content_type));
headers.insert(
header::CACHE_CONTROL,
HeaderValue::from_static("public, max-age=31536000, immutable"),

View File

@@ -246,9 +246,9 @@ pub async fn list_folders(
base_path.to_path_buf()
};
// Ensure the path is within the libraries root
let canonical_target = target_path.canonicalize().unwrap_or(target_path.clone());
let canonical_base = base_path.canonicalize().unwrap_or(base_path.to_path_buf());
// Ensure the path is within the libraries root (avoid canonicalize — burns fd on Docker mounts)
let canonical_target = target_path.clone();
let canonical_base = base_path.to_path_buf();
if !canonical_target.starts_with(&canonical_base) {
return Err(ApiError::bad_request("Path is outside libraries root"));
@@ -263,19 +263,31 @@ pub async fn list_folders(
0
};
if let Ok(entries) = std::fs::read_dir(&canonical_target) {
for entry in entries.flatten() {
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
let entries = std::fs::read_dir(&canonical_target)
.map_err(|e| ApiError::internal(format!("cannot read directory {}: {}", canonical_target.display(), e)))?;
for entry in entries {
let entry = match entry {
Ok(e) => e,
Err(e) => {
tracing::warn!("[FOLDERS] entry error in {}: {}", canonical_target.display(), e);
continue;
}
};
let is_dir = match entry.file_type() {
Ok(ft) => ft.is_dir(),
Err(e) => {
tracing::warn!("[FOLDERS] cannot stat {}: {}", entry.path().display(), e);
continue;
}
};
if is_dir {
let name = entry.file_name().to_string_lossy().to_string();
// Check if this folder has children
let has_children = if let Ok(sub_entries) = std::fs::read_dir(entry.path()) {
sub_entries.flatten().any(|e| {
e.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
})
} else {
false
};
// Check if this folder has children (best-effort, default to true on error)
let has_children = std::fs::read_dir(entry.path())
.map(|sub| sub.flatten().any(|e| e.file_type().map(|ft| ft.is_dir()).unwrap_or(false)))
.unwrap_or(true);
// Calculate the full path relative to libraries root
let full_path = if let Ok(relative) = entry.path().strip_prefix(&canonical_base) {
@@ -290,7 +302,6 @@ pub async fn list_folders(
depth,
has_children,
});
}
}
}

View File

@@ -156,14 +156,19 @@ fn canonicalize_library_root(root_path: &str) -> Result<PathBuf, ApiError> {
return Err(ApiError::bad_request("root_path must be absolute"));
}
let canonical = std::fs::canonicalize(path)
.map_err(|_| ApiError::bad_request("root_path does not exist or is inaccessible"))?;
if !canonical.is_dir() {
// Avoid fs::canonicalize — it opens extra file descriptors to resolve symlinks
// and can fail on Docker volume mounts (ro, cached) when fd limits are low.
if !path.exists() {
return Err(ApiError::bad_request(format!(
"root_path does not exist: {}",
root_path
)));
}
if !path.is_dir() {
return Err(ApiError::bad_request("root_path must point to a directory"));
}
Ok(canonical)
Ok(path.to_path_buf())
}
use crate::index_jobs::{IndexJobResponse, RebuildRequest};

View File

@@ -16,7 +16,7 @@ use serde::Deserialize;
use utoipa::ToSchema;
use sha2::{Digest, Sha256};
use sqlx::Row;
use tracing::{debug, error, info, instrument, warn};
use tracing::{error, info, instrument, warn};
use uuid::Uuid;
use crate::{error::ApiError, state::AppState};
@@ -32,9 +32,9 @@ fn remap_libraries_path(path: &str) -> String {
fn parse_filter(s: &str) -> image::imageops::FilterType {
match s {
"triangle" => image::imageops::FilterType::Triangle,
"lanczos3" => image::imageops::FilterType::Lanczos3,
"nearest" => image::imageops::FilterType::Nearest,
_ => image::imageops::FilterType::Lanczos3,
_ => image::imageops::FilterType::Triangle, // Triangle (bilinear) is fast and good enough for comics
}
}
@@ -64,7 +64,7 @@ fn write_to_disk_cache(cache_path: &Path, data: &[u8]) -> Result<(), std::io::Er
}
let mut file = std::fs::File::create(cache_path)?;
file.write_all(data)?;
file.sync_data()?;
// No sync_data() — this is a cache, durability is not critical
Ok(())
}
@@ -80,6 +80,8 @@ pub struct PageQuery {
#[derive(Clone, Copy, Debug)]
enum OutputFormat {
/// Serve raw bytes from the archive — no decode, no re-encode.
Original,
Jpeg,
Png,
Webp,
@@ -87,16 +89,19 @@ enum OutputFormat {
impl OutputFormat {
fn parse(value: Option<&str>) -> Result<Self, ApiError> {
match value.unwrap_or("webp") {
"jpeg" | "jpg" => Ok(Self::Jpeg),
"png" => Ok(Self::Png),
"webp" => Ok(Self::Webp),
_ => Err(ApiError::bad_request("format must be webp|jpeg|png")),
match value {
None => Ok(Self::Original),
Some("original") => Ok(Self::Original),
Some("jpeg") | Some("jpg") => Ok(Self::Jpeg),
Some("png") => Ok(Self::Png),
Some("webp") => Ok(Self::Webp),
_ => Err(ApiError::bad_request("format must be original|webp|jpeg|png")),
}
}
fn content_type(&self) -> &'static str {
match self {
Self::Original => "application/octet-stream", // will be overridden by detected type
Self::Jpeg => "image/jpeg",
Self::Png => "image/png",
Self::Webp => "image/webp",
@@ -105,6 +110,7 @@ impl OutputFormat {
fn extension(&self) -> &'static str {
match self {
Self::Original => "orig",
Self::Jpeg => "jpg",
Self::Png => "png",
Self::Webp => "webp",
@@ -112,6 +118,17 @@ impl OutputFormat {
}
}
/// Detect content type from raw image bytes.
fn detect_content_type(data: &[u8]) -> &'static str {
match image::guess_format(data) {
Ok(ImageFormat::Jpeg) => "image/jpeg",
Ok(ImageFormat::Png) => "image/png",
Ok(ImageFormat::WebP) => "image/webp",
Ok(ImageFormat::Avif) => "image/avif",
_ => "application/octet-stream",
}
}
/// Get a specific page image from a book with optional format conversion
#[utoipa::path(
get,
@@ -132,44 +149,38 @@ impl OutputFormat {
),
security(("Bearer" = []))
)]
#[instrument(skip(state), fields(book_id = %book_id, page = n))]
#[instrument(skip(state, headers), fields(book_id = %book_id, page = n))]
pub async fn get_page(
State(state): State<AppState>,
AxumPath((book_id, n)): AxumPath<(Uuid, u32)>,
Query(query): Query<PageQuery>,
headers: HeaderMap,
) -> Result<Response, ApiError> {
info!("Processing image request");
if n == 0 {
warn!("Invalid page number: 0");
return Err(ApiError::bad_request("page index starts at 1"));
}
let (default_format, default_quality, max_width, filter_str, timeout_secs, cache_dir) = {
let (default_quality, max_width, filter_str, timeout_secs, cache_dir) = {
let s = state.settings.read().await;
(s.image_format.clone(), s.image_quality, s.image_max_width, s.image_filter.clone(), s.timeout_seconds, s.cache_directory.clone())
(s.image_quality, s.image_max_width, s.image_filter.clone(), s.timeout_seconds, s.cache_directory.clone())
};
let format_str = query.format.as_deref().unwrap_or(default_format.as_str());
let format = OutputFormat::parse(Some(format_str))?;
let format = OutputFormat::parse(query.format.as_deref())?;
let quality = query.quality.unwrap_or(default_quality).clamp(1, 100);
let width = query.width.unwrap_or(0);
if width > max_width {
warn!("Invalid width: {}", width);
return Err(ApiError::bad_request(format!("width must be <= {}", max_width)));
}
let filter = parse_filter(&filter_str);
let cache_dir_path = std::path::PathBuf::from(&cache_dir);
let memory_cache_key = format!("{book_id}:{n}:{}:{quality}:{width}", format.extension());
if let Some(cached) = state.page_cache.lock().await.get(&memory_cache_key).cloned() {
state.metrics.page_cache_hits.fetch_add(1, Ordering::Relaxed);
debug!("Memory cache hit for key: {}", memory_cache_key);
return Ok(image_response(cached, format.content_type(), None));
return Ok(image_response(cached, format, None, &headers));
}
state.metrics.page_cache_misses.fetch_add(1, Ordering::Relaxed);
debug!("Memory cache miss for key: {}", memory_cache_key);
let row = sqlx::query(
r#"
@@ -191,27 +202,30 @@ pub async fn get_page(
let row = match row {
Some(r) => r,
None => {
error!("Book file not found for book_id: {}", book_id);
return Err(ApiError::not_found("book file not found"));
}
};
let abs_path: String = row.get("abs_path");
let abs_path = remap_libraries_path(&abs_path);
let input_format: String = row.get("format");
info!("Processing book file: {} (format: {})", abs_path, input_format);
let disk_cache_key = get_cache_key(&abs_path, n, format.extension(), quality, width);
let cache_path = get_cache_path(&disk_cache_key, &format, &cache_dir_path);
// If-None-Match: return 304 if the client already has this version
if let Some(if_none_match) = headers.get(header::IF_NONE_MATCH) {
let expected_etag = format!("\"{}\"", disk_cache_key);
if if_none_match.as_bytes() == expected_etag.as_bytes() {
return Ok(StatusCode::NOT_MODIFIED.into_response());
}
}
if let Some(cached_bytes) = read_from_disk_cache(&cache_path) {
info!("Disk cache hit for: {}", cache_path.display());
let bytes = Arc::new(cached_bytes);
state.page_cache.lock().await.put(memory_cache_key, bytes.clone());
return Ok(image_response(bytes, format.content_type(), Some(&disk_cache_key)));
return Ok(image_response(bytes, format, Some(&disk_cache_key), &headers));
}
debug!("Disk cache miss for: {}", cache_path.display());
let _permit = state
.page_render_limit
@@ -223,11 +237,10 @@ pub async fn get_page(
ApiError::internal("render limiter unavailable")
})?;
info!("Rendering page {} from {}", n, abs_path);
let abs_path_clone = abs_path.clone();
let format_clone = format;
let start_time = std::time::Instant::now();
let bytes = tokio::time::timeout(
Duration::from_secs(timeout_secs),
tokio::task::spawn_blocking(move || {
@@ -243,23 +256,32 @@ pub async fn get_page(
error!("Render task panicked for {} page {}: {}", abs_path, n, e);
ApiError::internal(format!("render task failed: {e}"))
})?;
let duration = start_time.elapsed();
match bytes {
Ok(data) => {
info!("Successfully rendered page {} in {:?}", n, duration);
info!("Rendered page {} in {:?}", n, duration);
if let Err(e) = write_to_disk_cache(&cache_path, &data) {
warn!("Failed to write to disk cache: {}", e);
} else {
info!("Cached rendered image to: {}", cache_path.display());
}
let bytes = Arc::new(data);
state.page_cache.lock().await.put(memory_cache_key, bytes.clone());
state.page_cache.lock().await.put(memory_cache_key.clone(), bytes.clone());
Ok(image_response(bytes, format.content_type(), Some(&disk_cache_key)))
// Prefetch next 2 pages in background (fire-and-forget)
for next_page in [n + 1, n + 2] {
let state2 = state.clone();
let abs_path2 = abs_path.clone();
let cache_dir2 = cache_dir_path.clone();
let format2 = format;
tokio::spawn(async move {
prefetch_page(state2, book_id, &abs_path2, next_page, format2, quality, width, filter, timeout_secs, &cache_dir2).await;
});
}
Ok(image_response(bytes, format, Some(&disk_cache_key), &headers))
}
Err(e) => {
error!("Failed to render page {} from {}: {:?}", n, abs_path, e);
@@ -268,11 +290,72 @@ pub async fn get_page(
}
}
fn image_response(bytes: Arc<Vec<u8>>, content_type: &str, etag_suffix: Option<&str>) -> Response {
let mut headers = HeaderMap::new();
headers.insert(header::CONTENT_TYPE, HeaderValue::from_str(content_type).unwrap_or(HeaderValue::from_static("application/octet-stream")));
headers.insert(header::CACHE_CONTROL, HeaderValue::from_static("public, max-age=31536000, immutable"));
/// Prefetch a single page into disk+memory cache (best-effort, ignores errors).
async fn prefetch_page(
state: AppState,
book_id: Uuid,
abs_path: &str,
page: u32,
format: OutputFormat,
quality: u8,
width: u32,
filter: image::imageops::FilterType,
timeout_secs: u64,
cache_dir: &Path,
) {
let mem_key = format!("{book_id}:{page}:{}:{quality}:{width}", format.extension());
// Already in memory cache?
if state.page_cache.lock().await.contains(&mem_key) {
return;
}
// Already on disk?
let disk_key = get_cache_key(abs_path, page, format.extension(), quality, width);
let cache_path = get_cache_path(&disk_key, &format, cache_dir);
if cache_path.exists() {
return;
}
// Acquire render permit (don't block too long — if busy, skip)
let permit = tokio::time::timeout(
Duration::from_millis(100),
state.page_render_limit.clone().acquire_owned(),
)
.await;
let _permit = match permit {
Ok(Ok(p)) => p,
_ => return,
};
// Fetch the book format from the path extension as a shortcut
let input_format = match abs_path.rsplit('.').next().map(|e| e.to_ascii_lowercase()) {
Some(ref e) if e == "cbz" => "cbz",
Some(ref e) if e == "cbr" => "cbr",
Some(ref e) if e == "pdf" => "pdf",
_ => return,
}
.to_string();
let abs_clone = abs_path.to_string();
let fmt = format;
let result = tokio::time::timeout(
Duration::from_secs(timeout_secs),
tokio::task::spawn_blocking(move || {
render_page(&abs_clone, &input_format, page, &fmt, quality, width, filter)
}),
)
.await;
if let Ok(Ok(Ok(data))) = result {
let _ = write_to_disk_cache(&cache_path, &data);
let bytes = Arc::new(data);
state.page_cache.lock().await.put(mem_key, bytes);
}
}
fn image_response(bytes: Arc<Vec<u8>>, format: OutputFormat, etag_suffix: Option<&str>, req_headers: &HeaderMap) -> Response {
let content_type = match format {
OutputFormat::Original => detect_content_type(&bytes),
_ => format.content_type(),
};
let etag = if let Some(suffix) = etag_suffix {
format!("\"{}\"", suffix)
} else {
@@ -280,20 +363,38 @@ fn image_response(bytes: Arc<Vec<u8>>, content_type: &str, etag_suffix: Option<&
hasher.update(&*bytes);
format!("\"{:x}\"", hasher.finalize())
};
// Check If-None-Match for 304
if let Some(if_none_match) = req_headers.get(header::IF_NONE_MATCH) {
if if_none_match.as_bytes() == etag.as_bytes() {
let mut headers = HeaderMap::new();
headers.insert(header::CACHE_CONTROL, HeaderValue::from_static("public, max-age=31536000, immutable"));
if let Ok(v) = HeaderValue::from_str(&etag) {
headers.insert(header::ETAG, v);
}
return (StatusCode::NOT_MODIFIED, headers).into_response();
}
}
let mut headers = HeaderMap::new();
headers.insert(header::CONTENT_TYPE, HeaderValue::from_str(content_type).unwrap_or(HeaderValue::from_static("application/octet-stream")));
headers.insert(header::CACHE_CONTROL, HeaderValue::from_static("public, max-age=31536000, immutable"));
if let Ok(v) = HeaderValue::from_str(&etag) {
headers.insert(header::ETAG, v);
}
(StatusCode::OK, headers, Body::from((*bytes).clone())).into_response()
// Use Bytes to avoid cloning the Vec — shares the Arc's allocation via zero-copy
let body_bytes = axum::body::Bytes::from(Arc::unwrap_or_clone(bytes));
(StatusCode::OK, headers, Body::from(body_bytes)).into_response()
}
/// Render page 1 of a book (for thumbnail fallback or thumbnail checkup). Uses thumbnail dimensions by default.
/// Render page 1 as a thumbnail fallback. Returns (bytes, content_type).
pub async fn render_book_page_1(
state: &AppState,
book_id: Uuid,
width: u32,
quality: u8,
) -> Result<Vec<u8>, ApiError> {
) -> Result<(Vec<u8>, &'static str), ApiError> {
let row = sqlx::query(
r#"SELECT abs_path, format FROM book_files WHERE book_id = $1 ORDER BY updated_at DESC LIMIT 1"#,
)
@@ -328,7 +429,7 @@ pub async fn render_book_page_1(
&abs_path_clone,
&input_format,
1,
&OutputFormat::Webp,
&OutputFormat::Original,
quality,
width,
filter,
@@ -339,7 +440,9 @@ pub async fn render_book_page_1(
.map_err(|_| ApiError::internal("page rendering timeout"))?
.map_err(|e| ApiError::internal(format!("render task failed: {e}")))?;
bytes
let bytes = bytes?;
let content_type = detect_content_type(&bytes);
Ok((bytes, content_type))
}
fn render_page(
@@ -370,43 +473,93 @@ fn render_page(
ApiError::internal(format!("page extraction failed: {e}"))
})?;
// Original mode or source matches output with no resize → return raw bytes (zero transcoding)
if matches!(out_format, OutputFormat::Original) && width == 0 {
return Ok(page_bytes);
}
if width == 0 {
if let Ok(source_fmt) = image::guess_format(&page_bytes) {
if format_matches(&source_fmt, out_format) {
return Ok(page_bytes);
}
}
}
transcode_image(&page_bytes, out_format, quality, width, filter)
}
/// Fast JPEG decode with DCT scaling: decodes directly at reduced resolution.
fn fast_jpeg_decode(input: &[u8], target_w: u32, target_h: u32) -> Option<image::DynamicImage> {
if image::guess_format(input).ok()? != ImageFormat::Jpeg {
return None;
}
let mut decoder = jpeg_decoder::Decoder::new(std::io::Cursor::new(input));
decoder.read_info().ok()?;
decoder.scale(target_w as u16, target_h as u16).ok()?;
let pixels = decoder.decode().ok()?;
let info = decoder.info()?;
let w = info.width as u32;
let h = info.height as u32;
match info.pixel_format {
jpeg_decoder::PixelFormat::RGB24 => {
let buf = image::RgbImage::from_raw(w, h, pixels)?;
Some(image::DynamicImage::ImageRgb8(buf))
}
jpeg_decoder::PixelFormat::L8 => {
let buf = image::GrayImage::from_raw(w, h, pixels)?;
Some(image::DynamicImage::ImageLuma8(buf))
}
_ => None,
}
}
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
debug!("Transcoding image: {} bytes, format: {:?}, quality: {}, width: {}", input.len(), out_format, quality, width);
let source_format = image::guess_format(input).ok();
debug!("Source format detected: {:?}", source_format);
let needs_transcode = source_format.map(|f| !format_matches(&f, out_format)).unwrap_or(true);
// Resolve "Original" to the actual source format for encoding
let effective_format = match out_format {
OutputFormat::Original => match source_format {
Some(ImageFormat::Png) => OutputFormat::Png,
Some(ImageFormat::WebP) => OutputFormat::Webp,
_ => OutputFormat::Jpeg, // default to JPEG for original resize
},
other => *other,
};
let needs_transcode = source_format.map(|f| !format_matches(&f, &effective_format)).unwrap_or(true);
if width == 0 && !needs_transcode {
debug!("No transcoding needed, returning original");
return Ok(input.to_vec());
}
debug!("Loading image from memory...");
let mut image = image::load_from_memory(input).map_err(|e| {
error!("Failed to load image from memory: {} (input size: {} bytes)", e, input.len());
ApiError::internal(format!("invalid source image: {e}"))
})?;
// For JPEG with resize: use DCT scaling to decode at ~target size (much faster)
let mut image = if width > 0 {
fast_jpeg_decode(input, width, u32::MAX)
.unwrap_or_else(|| {
image::load_from_memory(input).unwrap_or_default()
})
} else {
image::load_from_memory(input).map_err(|e| {
ApiError::internal(format!("invalid source image: {e}"))
})?
};
if width > 0 {
debug!("Resizing image to width: {}", width);
image = image.resize(width, u32::MAX, filter);
}
debug!("Converting to RGBA...");
let rgba = image.to_rgba8();
let (w, h) = rgba.dimensions();
debug!("Image dimensions: {}x{}", w, h);
let mut out = Vec::new();
match out_format {
OutputFormat::Jpeg => {
match effective_format {
OutputFormat::Jpeg | OutputFormat::Original => {
// JPEG doesn't support alpha — convert RGBA to RGB
let rgb = image::DynamicImage::ImageRgba8(rgba.clone()).to_rgb8();
let mut encoder = JpegEncoder::new_with_quality(&mut out, quality);
encoder
.encode(&rgba, w, h, ColorType::Rgba8.into())
.encode(&rgb, w, h, ColorType::Rgb8.into())
.map_err(|e| ApiError::internal(format!("jpeg encode failed: {e}")))?;
}
OutputFormat::Png => {
@@ -421,7 +574,7 @@ fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width:
.flat_map(|p| [p[0], p[1], p[2]])
.collect();
let webp_data = webp::Encoder::new(&rgb_data, webp::PixelLayout::Rgb, w, h)
.encode(f32::max(quality as f32, 85.0));
.encode(quality as f32);
out.extend_from_slice(&webp_data);
}
}

View File

@@ -39,7 +39,7 @@ impl Default for DynamicSettings {
timeout_seconds: 12,
image_format: "webp".to_string(),
image_quality: 85,
image_filter: "lanczos3".to_string(),
image_filter: "triangle".to_string(),
image_max_width: 2160,
cache_directory: std::env::var("IMAGE_CACHE_DIR")
.unwrap_or_else(|_| "/tmp/stripstream-image-cache".to_string()),