fix(parsers): corriger la génération de thumbnails CBR/CBZ/PDF

- CBR: contourner le bug XADRegexException de unar en appelant unar avec un symlink à nom neutre (archive.cbr) au lieu du chemin réel, qui peut contenir des caractères regex spéciaux comme [ ] ( ) - CBR/CBZ: remplacer le tri lexicographique par natord (tri naturel) pour que page2.jpg soit trié avant page10.jpg - PDF: brancher pdftoppm -scale-to sur config.width.max(config.height) au lieu d'une valeur hardcodée (800px → 400px par défaut) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix(db): ajouter 'cancelled' à la contrainte CHECK de index_jobs.status
2026-03-11 16:17:20 +01:00 · 2026-03-11 15:58:03 +01:00
7 changed files with 59 additions and 34 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1485,6 +1485,12 @@ dependencies = [
 "pxfm",
 ]

+[[package]]
+name = "natord"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
@@ -1627,6 +1633,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "lopdf",
+ "natord",
 "regex",
 "uuid",
 "walkdir",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,7 @@ tower = { version = "0.5", features = ["limit"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
 uuid = { version = "1.12", features = ["serde", "v4"] }
+natord = "1.0"
 walkdir = "2.5"
 webp = "0.3"
 utoipa = "4.0"
--- a/apps/backoffice/app/components/JobsList.tsx
+++ b/apps/backoffice/app/components/JobsList.tsx
@@ -85,18 +85,14 @@ export function JobsList({ initialJobs, libraries, highlightJobId }: JobsListPro
  }, []);

  const handleCancel = async (id: string) => {
-    try {
-      const response = await fetch(`/api/jobs/${id}/cancel`, {
-        method: "POST",
-      });
-
+    const response = await fetch(`/api/jobs/${id}/cancel`, { method: "POST" });
    if (response.ok) {
      setJobs(jobs.map(job =>
        job.id === id ? { ...job, status: "cancelled" } : job
      ));
-      }
-    } catch (error) {
-      console.error("Failed to cancel job:", error);
+    } else {
+      const data = await response.json().catch(() => ({}));
+      console.error("Failed to cancel job:", data?.error ?? response.status);
    }
  };

--- a/apps/indexer/src/analyzer.rs
+++ b/apps/indexer/src/analyzer.rs
@@ -247,8 +247,9 @@ pub async fn analyze_library_books(
                // Run blocking archive I/O on a thread pool
                let book_id = task.book_id;
                let path_owned = path.to_path_buf();
+                let pdf_scale = config.width.max(config.height);
                let analyze_result = tokio::task::spawn_blocking(move || {
-                    analyze_book(&path_owned, format)
+                    analyze_book(&path_owned, format, pdf_scale)
                })
                .await;

--- a/crates/parsers/Cargo.toml
+++ b/crates/parsers/Cargo.toml
@@ -6,6 +6,7 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+natord.workspace = true
 lopdf = "0.35"
 regex = "1"
 uuid.workspace = true
--- a/crates/parsers/src/lib.rs
+++ b/crates/parsers/src/lib.rs
@@ -153,11 +153,12 @@ pub fn parse_metadata(

 /// Open an archive once and return (page_count, first_page_bytes).
 /// This is more efficient than calling parse_metadata + extract_first_page separately.
-pub fn analyze_book(path: &Path, format: BookFormat) -> Result<(i32, Vec<u8>)> {
+/// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400).
+pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
    match format {
        BookFormat::Cbz => analyze_cbz(path),
        BookFormat::Cbr => analyze_cbr(path),
-        BookFormat::Pdf => analyze_pdf(path),
+        BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
    }
 }

@@ -174,7 +175,7 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec<u8>)> {
            image_names.push(entry.name().to_string());
        }
    }
-    image_names.sort();
+    image_names.sort_by(|a, b| natord::compare(a, b));

    let count = image_names.len() as i32;
    let first_image = image_names.first().context("no images found in cbz")?;
@@ -198,12 +199,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {

    if output.status.success() {
        let stdout = String::from_utf8_lossy(&output.stdout);
-        let images: Vec<String> = stdout
+        let mut images: Vec<String> = stdout
            .lines()
            .map(|l| l.trim().to_string())
            .filter(|line| is_image_name(&line.to_ascii_lowercase()))
            .collect();
        if !images.is_empty() {
+            images.sort_by(|a, b| natord::compare(a, b));
            return Ok(images);
        }
    }
@@ -223,12 +225,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {

    let stdout = String::from_utf8_lossy(&lsar_output.stdout);
    // lsar output: first line is archive info, then one file per line (indented)
-    let images: Vec<String> = stdout
+    let mut images: Vec<String> = stdout
        .lines()
        .skip(1) // skip the archive header line
        .map(|l| l.trim().to_string())
        .filter(|line| is_image_name(&line.to_ascii_lowercase()))
        .collect();
+    images.sort_by(|a, b| natord::compare(a, b));

    Ok(images)
 }
@@ -281,9 +284,9 @@ fn looks_like_image(bytes: &[u8]) -> bool {
    false
 }

-fn analyze_pdf(path: &Path) -> Result<(i32, Vec<u8>)> {
+fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
    let count = parse_pdf_page_count(path)?;
-    let image_bytes = extract_pdf_first_page(path)?;
+    let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?;
    Ok((count, image_bytes))
 }

@@ -354,7 +357,7 @@ pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
            let first_name = image_names.into_iter().next().context("no images found in cbr")?;
            extract_cbr_first_page(path, &first_name)
        }
-        BookFormat::Pdf => extract_pdf_first_page(path),
+        BookFormat::Pdf => extract_pdf_first_page(path, 0),
    }
 }

@@ -371,7 +374,7 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
            image_names.push(entry.name().to_string());
        }
    }
-    image_names.sort();
+    image_names.sort_by(|a, b| natord::compare(a, b));

    let first_image = image_names.first().context("no images found in cbz")?;

@@ -383,26 +386,36 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
    Ok(buf)
 }

-fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> {
-    let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
-    std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?;
+fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result<Vec<u8>> {
+    let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
+    let extract_dir = work_dir.join("out");
+    std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?;
+
+    // unar constructs internal regexes from (archive_path + "/" + internal_path).
+    // Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause
+    // XADRegexException. Work around by giving unar a safe symlink name.
+    let safe_path = work_dir.join("archive.cbr");
+    if std::os::unix::fs::symlink(path, &safe_path).is_err() {
+        // Cross-filesystem fallback: copy (slower but safe)
+        std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?;
+    }

    let output = std::process::Command::new("env")
        .args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
-        .arg(&tmp_dir)
-        .arg(path)
+        .arg(&extract_dir)
+        .arg(&safe_path)
        .output()
        .context("unar failed")?;

    if !output.status.success() {
-        let _ = std::fs::remove_dir_all(&tmp_dir);
+        let _ = std::fs::remove_dir_all(&work_dir);
        return Err(anyhow::anyhow!(
            "unar extract failed: {:?}",
            String::from_utf8_lossy(&output.stderr)
        ));
    }

-    let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
+    let mut image_files: Vec<_> = WalkDir::new(&extract_dir)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| {
@@ -411,19 +424,21 @@ fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> {
        })
        .collect();

-    image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
+    image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));

    let first_image = image_files.first().context("no images found in cbr")?;

    let data = std::fs::read(first_image.path())?;
-    let _ = std::fs::remove_dir_all(&tmp_dir);
+    let _ = std::fs::remove_dir_all(&work_dir);
    Ok(data)
 }

-fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> {
+fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result<Vec<u8>> {
    let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
    std::fs::create_dir_all(&tmp_dir)?;
    let output_prefix = tmp_dir.join("page");
+    let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale };
+    let scale_str = scale.to_string();

    let output = Command::new("pdftoppm")
        .args([
@@ -432,7 +447,7 @@ fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> {
            "-singlefile",
            "-png",
            "-scale-to",
-            "800",
+            &scale_str,
            path.to_str().unwrap(),
            output_prefix.to_str().unwrap(),
        ])
@@ -511,7 +526,7 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
            is_image_name(&name)
        })
        .collect();
-    image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
+    image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));

    let image_count = image_files.len();
    if image_count == 0 {
--- a/infra/migrations/0017_add_cancelled_status.sql
+++ b/infra/migrations/0017_add_cancelled_status.sql
@@ -0,0 +1,4 @@
+ALTER TABLE index_jobs
+  DROP CONSTRAINT IF EXISTS index_jobs_status_check,
+  ADD CONSTRAINT index_jobs_status_check
+    CHECK (status IN ('pending', 'running', 'generating_thumbnails', 'success', 'failed', 'cancelled'));