From b14accbbe0b57f3205e4f952b7afd89dcff2c4c0 Mon Sep 17 00:00:00 2001
From: Froidefond Julien <julienfroidefond@gmail.com>
Date: Mon, 16 Mar 2026 12:08:03 +0100
Subject: [PATCH] =?UTF-8?q?fix(books):=20tri=20des=20s=C3=A9ries=20par=20v?=
 =?UTF-8?q?olume=20+=20suppression=20de=20l'ancienne=20extract=5Fpage?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Ajout de `b.volume NULLS LAST` comme première clé de tri dans list_books
  et dans tous les ROW_NUMBER() OVER (...) des CTEs series, pour corriger
  l'ordre des volumes dont les titres varient en format (ex: "Round" vs "R")
- Suppression de l'ancienne extract_page publique et de ses 4 helpers
  (extract_cbz_page_n, extract_cbz_page_n_streaming, extract_cbr_page_n,
  extract_pdf_page_n) remplacés par la nouvelle implémentation avec cache
- Suppression de archive_index_cache dans AppState (remplacé par le cache
  statique CBZ_INDEX_CACHE dans parsers), import StdMutex nettoyé

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 apps/api/src/books.rs     |   5 +-
 apps/api/src/main.rs      |   2 -
 apps/api/src/state.rs     |   4 -
 crates/parsers/src/lib.rs | 247 --------------------------------------
 4 files changed, 4 insertions(+), 254 deletions(-)

diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs
index 3b3df33..3f72879 100644
--- a/apps/api/src/books.rs
+++ b/apps/api/src/books.rs
@@ -141,7 +141,7 @@ pub async fn list_books(
     let order_clause = if query.sort.as_deref() == Some("latest") {
         "b.updated_at DESC".to_string()
     } else {
-        "REGEXP_REPLACE(LOWER(b.title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(b.title), '\\d+'))[1]::int, 0), b.title ASC".to_string()
+        "b.volume NULLS LAST, REGEXP_REPLACE(LOWER(b.title), '[0-9]+', '', 'g'), COALESCE((REGEXP_MATCH(LOWER(b.title), '\\d+'))[1]::int, 0), b.title ASC".to_string()
     };
 
     // DATA: mêmes params filtre, puis $N+1=limit $N+2=offset
@@ -400,6 +400,7 @@ pub async fn list_series(
                 ROW_NUMBER() OVER (
                     PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified')
                     ORDER BY
+                        volume NULLS LAST,
                         REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'),
                         COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0),
                         title ASC
@@ -586,6 +587,7 @@ pub async fn list_all_series(
                 ROW_NUMBER() OVER (
                     PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified')
                     ORDER BY
+                        volume NULLS LAST,
                         REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'),
                         COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0),
                         title ASC
@@ -714,6 +716,7 @@ pub async fn ongoing_series(
                 ROW_NUMBER() OVER (
                     PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified')
                     ORDER BY
+                        volume NULLS LAST,
                         REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'),
                         COALESCE((REGEXP_MATCH(LOWER(title), '\d+'))[1]::int, 0),
                         title ASC
diff --git a/apps/api/src/main.rs b/apps/api/src/main.rs
index e34febf..9892451 100644
--- a/apps/api/src/main.rs
+++ b/apps/api/src/main.rs
@@ -30,7 +30,6 @@ use std::num::NonZeroUsize;
 use stripstream_core::config::ApiConfig;
 use sqlx::postgres::PgPoolOptions;
 use tokio::sync::{Mutex, RwLock, Semaphore};
-use std::sync::Mutex as StdMutex;
 use tracing::info;
 
 use crate::state::{load_concurrent_renders, load_dynamic_settings, AppState, Metrics, ReadRateLimit};
@@ -78,7 +77,6 @@ async fn main() -> anyhow::Result<()> {
             requests_in_window: 0,
         })),
         settings: Arc::new(RwLock::new(dynamic_settings)),
-        archive_index_cache: Arc::new(StdMutex::new(LruCache::new(NonZeroUsize::new(256).expect("non-zero")))),
     };
 
     let admin_routes = Router::new()
diff --git a/apps/api/src/state.rs b/apps/api/src/state.rs
index 7620314..4648233 100644
--- a/apps/api/src/state.rs
+++ b/apps/api/src/state.rs
@@ -1,7 +1,6 @@
 use std::sync::{
     atomic::AtomicU64,
     Arc,
-    Mutex as StdMutex,
 };
 use std::time::Instant;
 
@@ -20,9 +19,6 @@ pub struct AppState {
     pub metrics: Arc<Metrics>,
     pub read_rate_limit: Arc<Mutex<ReadRateLimit>>,
     pub settings: Arc<RwLock<DynamicSettings>>,
-    /// Sorted image name list per archive path — avoids re-enumerating entries on every cold render.
-    /// Uses StdMutex (not tokio) so it's accessible from spawn_blocking.
-    pub archive_index_cache: Arc<StdMutex<LruCache<String, Arc<Vec<String>>>>>,
 }
 
 #[derive(Clone)]
diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs
index 50d8d85..6f8083e 100644
--- a/crates/parsers/src/lib.rs
+++ b/crates/parsers/src/lib.rs
@@ -704,253 +704,6 @@ fn extract_cbr_by_name(path: &Path, image_name: &str) -> Result<Vec<u8>> {
     ))
 }
 
-/// Extract a specific page (1-indexed) from a book archive.
-/// Returns raw image bytes (original format, not transcoded).
-/// `render_width` is only used for PDF; 0 means default (1200px).
-/// Error message contains "out of range" when the page doesn't exist.
-pub fn extract_page(path: &Path, format: BookFormat, page_number: u32, render_width: u32) -> Result<Vec<u8>> {
-    match format {
-        BookFormat::Cbz => extract_cbz_page_n(path, page_number, true),
-        BookFormat::Cbr => extract_cbr_page_n(path, page_number, true),
-        BookFormat::Pdf => extract_pdf_page_n(path, page_number, render_width),
-    }
-}
-
-fn extract_cbz_page_n(path: &Path, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>> {
-    let file = std::fs::File::open(path)
-        .with_context(|| format!("cannot open cbz: {}", path.display()))?;
-    let mut archive = match zip::ZipArchive::new(file) {
-        Ok(a) => a,
-        Err(zip_err) => {
-            if allow_fallback {
-                if let Ok(data) = extract_cbr_page_n(path, page_number, false) {
-                    return Ok(data);
-                }
-                return extract_cbz_page_n_streaming(path, page_number);
-            }
-            return Err(anyhow::anyhow!("invalid cbz for {}: {}", path.display(), zip_err));
-        }
-    };
-
-    let mut image_names: Vec<String> = Vec::new();
-    for i in 0..archive.len() {
-        let entry = match archive.by_index(i) {
-            Ok(e) => e,
-            Err(_) => continue,
-        };
-        let name = entry.name().to_ascii_lowercase();
-        if is_image_name(&name) {
-            image_names.push(entry.name().to_string());
-        }
-    }
-    image_names.sort_by(|a, b| natord::compare(a, b));
-
-    let index = page_number as usize - 1;
-    let selected = image_names
-        .get(index)
-        .ok_or_else(|| {
-            anyhow::anyhow!(
-                "page {} out of range (total: {})",
-                page_number,
-                image_names.len()
-            )
-        })?
-        .clone();
-
-    let mut entry = archive
-        .by_name(&selected)
-        .with_context(|| format!("cannot open entry {} in {}", selected, path.display()))?;
-    let mut buf = Vec::new();
-    entry
-        .read_to_end(&mut buf)
-        .with_context(|| format!("cannot read entry {} in {}", selected, path.display()))?;
-    Ok(buf)
-}
-
-fn extract_cbz_page_n_streaming(path: &Path, page_number: u32) -> Result<Vec<u8>> {
-    // Pass 1: collect image names via local file headers (no central directory needed)
-    let file = std::fs::File::open(path)
-        .with_context(|| format!("cannot open cbz for streaming: {}", path.display()))?;
-    let mut reader = std::io::BufReader::new(file);
-    let mut image_names: Vec<String> = Vec::new();
-
-    loop {
-        match zip::read::read_zipfile_from_stream(&mut reader) {
-            Ok(Some(mut entry)) => {
-                let name = entry.name().to_string();
-                if is_image_name(&name.to_ascii_lowercase()) {
-                    image_names.push(name);
-                }
-                std::io::copy(&mut entry, &mut std::io::sink())?;
-            }
-            Ok(None) => break,
-            Err(_) => {
-                if !image_names.is_empty() {
-                    break;
-                }
-                return Err(anyhow::anyhow!(
-                    "cbz streaming read failed for {}",
-                    path.display()
-                ));
-            }
-        }
-    }
-
-    image_names.sort_by(|a, b| natord::compare(a, b));
-    let index = page_number as usize - 1;
-    let target = image_names
-        .get(index)
-        .ok_or_else(|| {
-            anyhow::anyhow!(
-                "page {} out of range (total: {})",
-                page_number,
-                image_names.len()
-            )
-        })?
-        .clone();
-
-    // Pass 2: extract the target page
-    let file2 = std::fs::File::open(path)
-        .with_context(|| format!("cannot reopen cbz: {}", path.display()))?;
-    let mut reader2 = std::io::BufReader::new(file2);
-
-    loop {
-        match zip::read::read_zipfile_from_stream(&mut reader2) {
-            Ok(Some(mut entry)) => {
-                if entry.name() == target {
-                    let mut buf = Vec::new();
-                    entry.read_to_end(&mut buf)?;
-                    return Ok(buf);
-                }
-                std::io::copy(&mut entry, &mut std::io::sink())?;
-            }
-            Ok(None) => break,
-            Err(_) => break,
-        }
-    }
-
-    Err(anyhow::anyhow!(
-        "page {} not found in streaming cbz: {}",
-        page_number,
-        path.display()
-    ))
-}
-
-fn extract_cbr_page_n(path: &Path, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>> {
-    let index = page_number as usize - 1;
-
-    // Pass 1: list all image names
-    let mut image_names: Vec<String> = {
-        let archive = unrar::Archive::new(path)
-            .open_for_listing()
-            .map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e));
-        let archive = match archive {
-            Ok(a) => a,
-            Err(e) => {
-                let e_str = e.to_string();
-                if allow_fallback
-                    && (e_str.contains("Not a RAR archive") || e_str.contains("bad archive"))
-                {
-                    return extract_cbz_page_n(path, page_number, false);
-                }
-                return Err(e);
-            }
-        };
-        let mut names = Vec::new();
-        for entry in archive {
-            let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?;
-            let name = entry.filename.to_string_lossy().to_string();
-            if is_image_name(&name.to_ascii_lowercase()) {
-                names.push(name);
-            }
-        }
-        names
-    };
-
-    image_names.sort_by(|a, b| natord::compare(a, b));
-    let target = image_names
-        .get(index)
-        .ok_or_else(|| {
-            anyhow::anyhow!(
-                "page {} out of range (total: {})",
-                page_number,
-                image_names.len()
-            )
-        })?
-        .clone();
-
-    // Pass 2: extract only the target page
-    let mut archive = unrar::Archive::new(path)
-        .open_for_processing()
-        .map_err(|e| {
-            anyhow::anyhow!(
-                "unrar open for processing failed for {}: {}",
-                path.display(),
-                e
-            )
-        })?;
-
-    while let Some(header) = archive
-        .read_header()
-        .map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
-    {
-        let entry_name = header.entry().filename.to_string_lossy().to_string();
-        if entry_name == target {
-            let (data, _) = header
-                .read()
-                .map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?;
-            return Ok(data);
-        }
-        archive = header
-            .skip()
-            .map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
-    }
-
-    Err(anyhow::anyhow!(
-        "page {} not found in cbr: {}",
-        page_number,
-        path.display()
-    ))
-}
-
-fn extract_pdf_page_n(path: &Path, page_number: u32, render_width: u32) -> Result<Vec<u8>> {
-    use pdfium_render::prelude::*;
-
-    let pdfium = Pdfium::new(
-        Pdfium::bind_to_system_library()
-            .map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?,
-    );
-
-    let document = pdfium
-        .load_pdf_from_file(path, None)
-        .map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?;
-
-    let page_index = (page_number - 1) as u16;
-    let page = document
-        .pages()
-        .get(page_index)
-        .map_err(|_| anyhow::anyhow!("page {} out of range", page_number))?;
-
-    let w = if render_width > 0 {
-        render_width as i32
-    } else {
-        1200
-    };
-    let config = PdfRenderConfig::new().set_target_width(w);
-
-    let bitmap = page
-        .render_with_config(&config)
-        .map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?;
-
-    let image = bitmap.as_image();
-    let mut buf = std::io::Cursor::new(Vec::new());
-    image
-        .write_to(&mut buf, image::ImageFormat::Png)
-        .context("failed to encode rendered PDF page as PNG")?;
-
-    Ok(buf.into_inner())
-}
-
 pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
     extract_page(path, format, 1, 0)
 }