From b14accbbe0b57f3205e4f952b7afd89dcff2c4c0 Mon Sep 17 00:00:00 2001 From: Froidefond Julien Date: Mon, 16 Mar 2026 12:08:03 +0100 Subject: [PATCH] =?UTF-8?q?fix(books):=20tri=20des=20s=C3=A9ries=20par=20v?= =?UTF-8?q?olume=20+=20suppression=20de=20l'ancienne=20extract=5Fpage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ajout de `b.volume NULLS LAST` comme première clé de tri dans list_books et dans tous les ROW_NUMBER() OVER (...) des CTEs series, pour corriger l'ordre des volumes dont les titres varient en format (ex: "Round" vs "R") - Suppression de l'ancienne extract_page publique et de ses 4 helpers (extract_cbz_page_n, extract_cbz_page_n_streaming, extract_cbr_page_n, extract_pdf_page_n) remplacés par la nouvelle implémentation avec cache - Suppression de archive_index_cache dans AppState (remplacé par le cache statique CBZ_INDEX_CACHE dans parsers), import StdMutex nettoyé Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/books.rs | 5 +- apps/api/src/main.rs | 2 - apps/api/src/state.rs | 4 - crates/parsers/src/lib.rs | 247 -------------------------------------- 4 files changed, 4 insertions(+), 254 deletions(-) diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs index 3b3df33..3f72879 100644 --- a/apps/api/src/books.rs +++ b/apps/api/src/books.rs @@ -141,7 +141,7 @@ pub async fn list_books( let order_clause = if query.sort.as_deref() == Some("latest") { "b.updated_at DESC".to_string() } else { - "REGEXP_REPLACE(LOWER(b.title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(b.title), '\\d+'))[1]::int, 0), b.title ASC".to_string() + "b.volume NULLS LAST, REGEXP_REPLACE(LOWER(b.title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(b.title), '\\d+'))[1]::int, 0), b.title ASC".to_string() }; // DATA: mêmes params filtre, puis $N+1=limit $N+2=offset @@ -400,6 +400,7 @@ pub async fn list_series( ROW_NUMBER() OVER ( PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') ORDER BY + volume NULLS LAST, REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0), title ASC @@ -586,6 +587,7 @@ pub async fn list_all_series( ROW_NUMBER() OVER ( PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') ORDER BY + volume NULLS LAST, REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0), title ASC @@ -714,6 +716,7 @@ pub async fn ongoing_series( ROW_NUMBER() OVER ( PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') ORDER BY + volume NULLS LAST, REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0), title ASC diff --git a/apps/api/src/main.rs b/apps/api/src/main.rs index e34febf..9892451 100644 --- a/apps/api/src/main.rs +++ b/apps/api/src/main.rs @@ -30,7 +30,6 @@ use std::num::NonZeroUsize; use stripstream_core::config::ApiConfig; use sqlx::postgres::PgPoolOptions; use tokio::sync::{Mutex, RwLock, Semaphore}; -use std::sync::Mutex as StdMutex; use tracing::info; use crate::state::{load_concurrent_renders, load_dynamic_settings, AppState, Metrics, ReadRateLimit}; @@ -78,7 +77,6 @@ async fn main() -> anyhow::Result<()> { requests_in_window: 0, })), settings: Arc::new(RwLock::new(dynamic_settings)), - archive_index_cache: Arc::new(StdMutex::new(LruCache::new(NonZeroUsize::new(256).expect("non-zero")))), }; let admin_routes = Router::new() diff --git a/apps/api/src/state.rs b/apps/api/src/state.rs index 7620314..4648233 100644 --- a/apps/api/src/state.rs +++ b/apps/api/src/state.rs @@ -1,7 +1,6 @@ use std::sync::{ atomic::AtomicU64, Arc, - Mutex as StdMutex, }; use std::time::Instant; @@ -20,9 +19,6 @@ pub struct AppState { pub metrics: Arc, pub read_rate_limit: Arc>, pub settings: Arc>, - /// Sorted image name list per archive path — avoids re-enumerating entries on every cold render. - /// Uses StdMutex (not tokio) so it's accessible from spawn_blocking. - pub archive_index_cache: Arc>>>>, } #[derive(Clone)] diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs index 50d8d85..6f8083e 100644 --- a/crates/parsers/src/lib.rs +++ b/crates/parsers/src/lib.rs @@ -704,253 +704,6 @@ fn extract_cbr_by_name(path: &Path, image_name: &str) -> Result> { )) } -/// Extract a specific page (1-indexed) from a book archive. -/// Returns raw image bytes (original format, not transcoded). -/// `render_width` is only used for PDF; 0 means default (1200px). -/// Error message contains "out of range" when the page doesn't exist. -pub fn extract_page(path: &Path, format: BookFormat, page_number: u32, render_width: u32) -> Result> { - match format { - BookFormat::Cbz => extract_cbz_page_n(path, page_number, true), - BookFormat::Cbr => extract_cbr_page_n(path, page_number, true), - BookFormat::Pdf => extract_pdf_page_n(path, page_number, render_width), - } -} - -fn extract_cbz_page_n(path: &Path, page_number: u32, allow_fallback: bool) -> Result> { - let file = std::fs::File::open(path) - .with_context(|| format!("cannot open cbz: {}", path.display()))?; - let mut archive = match zip::ZipArchive::new(file) { - Ok(a) => a, - Err(zip_err) => { - if allow_fallback { - if let Ok(data) = extract_cbr_page_n(path, page_number, false) { - return Ok(data); - } - return extract_cbz_page_n_streaming(path, page_number); - } - return Err(anyhow::anyhow!("invalid cbz for {}: {}", path.display(), zip_err)); - } - }; - - let mut image_names: Vec = Vec::new(); - for i in 0..archive.len() { - let entry = match archive.by_index(i) { - Ok(e) => e, - Err(_) => continue, - }; - let name = entry.name().to_ascii_lowercase(); - if is_image_name(&name) { - image_names.push(entry.name().to_string()); - } - } - image_names.sort_by(|a, b| natord::compare(a, b)); - - let index = page_number as usize - 1; - let selected = image_names - .get(index) - .ok_or_else(|| { - anyhow::anyhow!( - "page {} out of range (total: {})", - page_number, - image_names.len() - ) - })? - .clone(); - - let mut entry = archive - .by_name(&selected) - .with_context(|| format!("cannot open entry {} in {}", selected, path.display()))?; - let mut buf = Vec::new(); - entry - .read_to_end(&mut buf) - .with_context(|| format!("cannot read entry {} in {}", selected, path.display()))?; - Ok(buf) -} - -fn extract_cbz_page_n_streaming(path: &Path, page_number: u32) -> Result> { - // Pass 1: collect image names via local file headers (no central directory needed) - let file = std::fs::File::open(path) - .with_context(|| format!("cannot open cbz for streaming: {}", path.display()))?; - let mut reader = std::io::BufReader::new(file); - let mut image_names: Vec = Vec::new(); - - loop { - match zip::read::read_zipfile_from_stream(&mut reader) { - Ok(Some(mut entry)) => { - let name = entry.name().to_string(); - if is_image_name(&name.to_ascii_lowercase()) { - image_names.push(name); - } - std::io::copy(&mut entry, &mut std::io::sink())?; - } - Ok(None) => break, - Err(_) => { - if !image_names.is_empty() { - break; - } - return Err(anyhow::anyhow!( - "cbz streaming read failed for {}", - path.display() - )); - } - } - } - - image_names.sort_by(|a, b| natord::compare(a, b)); - let index = page_number as usize - 1; - let target = image_names - .get(index) - .ok_or_else(|| { - anyhow::anyhow!( - "page {} out of range (total: {})", - page_number, - image_names.len() - ) - })? - .clone(); - - // Pass 2: extract the target page - let file2 = std::fs::File::open(path) - .with_context(|| format!("cannot reopen cbz: {}", path.display()))?; - let mut reader2 = std::io::BufReader::new(file2); - - loop { - match zip::read::read_zipfile_from_stream(&mut reader2) { - Ok(Some(mut entry)) => { - if entry.name() == target { - let mut buf = Vec::new(); - entry.read_to_end(&mut buf)?; - return Ok(buf); - } - std::io::copy(&mut entry, &mut std::io::sink())?; - } - Ok(None) => break, - Err(_) => break, - } - } - - Err(anyhow::anyhow!( - "page {} not found in streaming cbz: {}", - page_number, - path.display() - )) -} - -fn extract_cbr_page_n(path: &Path, page_number: u32, allow_fallback: bool) -> Result> { - let index = page_number as usize - 1; - - // Pass 1: list all image names - let mut image_names: Vec = { - let archive = unrar::Archive::new(path) - .open_for_listing() - .map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e)); - let archive = match archive { - Ok(a) => a, - Err(e) => { - let e_str = e.to_string(); - if allow_fallback - && (e_str.contains("Not a RAR archive") || e_str.contains("bad archive")) - { - return extract_cbz_page_n(path, page_number, false); - } - return Err(e); - } - }; - let mut names = Vec::new(); - for entry in archive { - let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?; - let name = entry.filename.to_string_lossy().to_string(); - if is_image_name(&name.to_ascii_lowercase()) { - names.push(name); - } - } - names - }; - - image_names.sort_by(|a, b| natord::compare(a, b)); - let target = image_names - .get(index) - .ok_or_else(|| { - anyhow::anyhow!( - "page {} out of range (total: {})", - page_number, - image_names.len() - ) - })? - .clone(); - - // Pass 2: extract only the target page - let mut archive = unrar::Archive::new(path) - .open_for_processing() - .map_err(|e| { - anyhow::anyhow!( - "unrar open for processing failed for {}: {}", - path.display(), - e - ) - })?; - - while let Some(header) = archive - .read_header() - .map_err(|e| anyhow::anyhow!("unrar read header: {}", e))? - { - let entry_name = header.entry().filename.to_string_lossy().to_string(); - if entry_name == target { - let (data, _) = header - .read() - .map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?; - return Ok(data); - } - archive = header - .skip() - .map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?; - } - - Err(anyhow::anyhow!( - "page {} not found in cbr: {}", - page_number, - path.display() - )) -} - -fn extract_pdf_page_n(path: &Path, page_number: u32, render_width: u32) -> Result> { - use pdfium_render::prelude::*; - - let pdfium = Pdfium::new( - Pdfium::bind_to_system_library() - .map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?, - ); - - let document = pdfium - .load_pdf_from_file(path, None) - .map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?; - - let page_index = (page_number - 1) as u16; - let page = document - .pages() - .get(page_index) - .map_err(|_| anyhow::anyhow!("page {} out of range", page_number))?; - - let w = if render_width > 0 { - render_width as i32 - } else { - 1200 - }; - let config = PdfRenderConfig::new().set_target_width(w); - - let bitmap = page - .render_with_config(&config) - .map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?; - - let image = bitmap.as_image(); - let mut buf = std::io::Cursor::new(Vec::new()); - image - .write_to(&mut buf, image::ImageFormat::Png) - .context("failed to encode rendered PDF page as PNG")?; - - Ok(buf.into_inner()) -} - pub fn extract_first_page(path: &Path, format: BookFormat) -> Result> { extract_page(path, format, 1, 0) }