From 6abaa96fba9e19d8de4b42b2d4015f2f767f11ce Mon Sep 17 00:00:00 2001 From: Froidefond Julien Date: Wed, 11 Mar 2026 16:46:43 +0100 Subject: [PATCH] perf(parsers): remplacer tous les subprocesses par des libs in-process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CBR: remplace unrar/unar CLI par le crate `unrar` (bindings libunrar vendorisé, zéro dépendance système). Supprime XADRegexException, les forks de processus et les dossiers temporaires. PDF: remplace pdfinfo + pdftoppm par pdfium-render. Le PDF est ouvert une seule fois pour obtenir le nombre de pages ET rasteriser la première page. lopdf reste pour parse_metadata (page count seul). convert_cbr_to_cbz: reécrit sans subprocess ni dossier temporaire — les images sont lues en mémoire via unrar puis packées directement en ZIP. Dockerfile indexer: retire unrar-free, unar, poppler-utils. Télécharge libpdfium.so depuis bblanchon/pdfium-binaries au build. Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 151 ++++++++++++++- Cargo.toml | 2 + apps/indexer/Dockerfile | 17 +- crates/parsers/Cargo.toml | 7 +- crates/parsers/src/lib.rs | 378 +++++++++++++------------------------- 5 files changed, 299 insertions(+), 256 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6fd2548..e438589 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,6 +369,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "console_log" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f" +dependencies = [ + "log", + "web-sys", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1224,6 +1244,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -1291,6 +1320,16 @@ version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.16" @@ -1404,6 +1443,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "maybe-owned" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" + [[package]] name = "md-5" version = "0.10.6" @@ -1632,11 +1677,12 @@ name = "parsers" version = "0.1.0" dependencies = [ "anyhow", + "image", "lopdf", "natord", + "pdfium-render", "regex", - "uuid", - "walkdir", + "unrar", "zip 2.4.2", ] @@ -1651,6 +1697,32 @@ dependencies = [ "subtle", ] +[[package]] +name = "pdfium-render" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6553f6604a52b3203db7b4e9d51eb4dd193cf455af9e56d40cab6575b547b679" +dependencies = [ + "bitflags 2.11.0", + "bytemuck", + "bytes", + "chrono", + "console_error_panic_hook", + "console_log", + "image", + "itertools", + "js-sys", + "libloading", + "log", + "maybe-owned", + "once_cell", + "utf16string", + "vecmath", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -1678,6 +1750,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piston-float" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" + [[package]] name = "pkcs1" version = "0.7.5" @@ -2940,6 +3018,29 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unrar" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ec61343a630d2b50d13216dea5125e157d3fc180a7d3f447d22fe146b648fc" +dependencies = [ + "bitflags 2.11.0", + "regex", + "unrar_sys", + "widestring", +] + +[[package]] +name = "unrar_sys" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b77675b883cfbe6bf41e6b7a5cd6008e0a83ba497de3d96e41a064bbeead765" +dependencies = [ + "cc", + "libc", + "winapi", +] + [[package]] name = "untrusted" version = "0.9.0" @@ -2958,6 +3059,15 @@ dependencies = [ "serde", ] +[[package]] +name = "utf16string" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216" +dependencies = [ + "byteorder", +] + [[package]] name = "utf8_iter" version = "1.0.4" @@ -3028,6 +3138,15 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vecmath" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a" +dependencies = [ + "piston-float", +] + [[package]] name = "version_check" version = "0.9.5" @@ -3240,6 +3359,28 @@ dependencies = [ "wasite", ] +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -3249,6 +3390,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index cd9d4a5..6064a7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,8 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } uuid = { version = "1.12", features = ["serde", "v4"] } natord = "1.0" +pdfium-render = { version = "0.8", default-features = false, features = ["pdfium_latest", "image_latest", "thread_safe"] } +unrar = "0.5" walkdir = "2.5" webp = "0.3" utoipa = "4.0" diff --git a/apps/indexer/Dockerfile b/apps/indexer/Dockerfile index 3f97b12..9d9af9e 100644 --- a/apps/indexer/Dockerfile +++ b/apps/indexer/Dockerfile @@ -21,11 +21,24 @@ RUN --mount=type=cache,target=/sccache \ cargo build --release -p indexer FROM debian:bookworm-slim + RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates wget \ - unrar-free unar \ - poppler-utils \ && rm -rf /var/lib/apt/lists/* + +# Download pdfium shared library (replaces pdftoppm + pdfinfo subprocesses) +RUN ARCH=$(dpkg --print-architecture) && \ + case "$ARCH" in \ + amd64) PDFIUM_ARCH="linux-x64" ;; \ + arm64) PDFIUM_ARCH="linux-arm64" ;; \ + *) echo "Unsupported arch: $ARCH" && exit 1 ;; \ + esac && \ + wget -q "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-${PDFIUM_ARCH}.tgz" -O /tmp/pdfium.tgz && \ + tar -xzf /tmp/pdfium.tgz -C /tmp && \ + cp /tmp/lib/libpdfium.so /usr/local/lib/ && \ + rm -rf /tmp/pdfium.tgz /tmp/lib /tmp/include && \ + ldconfig + COPY --from=builder /app/target/release/indexer /usr/local/bin/indexer EXPOSE 7081 CMD ["/usr/local/bin/indexer"] diff --git a/crates/parsers/Cargo.toml b/crates/parsers/Cargo.toml index e00d9d4..29a02f3 100644 --- a/crates/parsers/Cargo.toml +++ b/crates/parsers/Cargo.toml @@ -6,9 +6,10 @@ license.workspace = true [dependencies] anyhow.workspace = true -natord.workspace = true +image.workspace = true lopdf = "0.35" +natord.workspace = true +pdfium-render.workspace = true regex = "1" -uuid.workspace = true -walkdir.workspace = true +unrar.workspace = true zip = { version = "2.2", default-features = false, features = ["deflate"] } diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs index de3dbbb..20536fd 100644 --- a/crates/parsers/src/lib.rs +++ b/crates/parsers/src/lib.rs @@ -1,10 +1,7 @@ use anyhow::{Context, Result}; use std::io::{Read, Write}; use std::path::{Path, PathBuf}; -use std::process::Command; use std::sync::OnceLock; -use uuid::Uuid; -use walkdir::WalkDir; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BookFormat { @@ -152,8 +149,7 @@ pub fn parse_metadata( } /// Open an archive once and return (page_count, first_page_bytes). -/// This is more efficient than calling parse_metadata + extract_first_page separately. -/// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400). +/// `pdf_render_scale`: max dimension used for PDF rasterization; 0 means use default (400). pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec)> { match format { BookFormat::Cbz => analyze_cbz(path), @@ -189,105 +185,98 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec)> { Ok((count, buf)) } -fn list_cbr_images(path: &Path) -> Result> { - // Try unrar lb first (fast) - let output = std::process::Command::new("unrar") - .arg("lb") - .arg(path) - .output() - .with_context(|| format!("failed to execute unrar lb for {}", path.display()))?; - - if output.status.success() { - let stdout = String::from_utf8_lossy(&output.stdout); - let mut images: Vec = stdout - .lines() - .map(|l| l.trim().to_string()) - .filter(|line| is_image_name(&line.to_ascii_lowercase())) - .collect(); - if !images.is_empty() { - images.sort_by(|a, b| natord::compare(a, b)); - return Ok(images); - } - } - - // Fallback: lsar (from unar package) handles UTF-16BE encoded filenames - let lsar_output = std::process::Command::new("lsar") - .arg(path) - .output() - .with_context(|| format!("failed to execute lsar for {}", path.display()))?; - - if !lsar_output.status.success() { - return Err(anyhow::anyhow!( - "both unrar lb and lsar failed for {}", - path.display() - )); - } - - let stdout = String::from_utf8_lossy(&lsar_output.stdout); - // lsar output: first line is archive info, then one file per line (indented) - let mut images: Vec = stdout - .lines() - .skip(1) // skip the archive header line - .map(|l| l.trim().to_string()) - .filter(|line| is_image_name(&line.to_ascii_lowercase())) - .collect(); - images.sort_by(|a, b| natord::compare(a, b)); - - Ok(images) -} - fn analyze_cbr(path: &Path) -> Result<(i32, Vec)> { - let mut image_names = list_cbr_images(path)?; - image_names.sort(); + // Pass 1: list all image names via unrar (in-process, no subprocess) + let mut image_names: Vec = { + let archive = unrar::Archive::new(path) + .open_for_listing() + .map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?; + let mut names = Vec::new(); + for entry in archive { + let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?; + let name = entry.filename.to_string_lossy().to_string(); + if is_image_name(&name.to_ascii_lowercase()) { + names.push(name); + } + } + names + }; - let count = image_names.len() as i32; - if count == 0 { + if image_names.is_empty() { return Err(anyhow::anyhow!("no images found in cbr: {}", path.display())); } - let first_name = &image_names[0]; + image_names.sort_by(|a, b| natord::compare(a, b)); + let count = image_names.len() as i32; + let first_name = image_names[0].clone(); - // Try unrar p to extract first image to stdout (faster — no temp dir) - let p_output = std::process::Command::new("unrar") - .args(["p", "-inul"]) - .arg(path) - .arg(first_name) - .output(); + // Pass 2: extract first image to memory + let mut archive = unrar::Archive::new(path) + .open_for_processing() + .map_err(|e| anyhow::anyhow!("unrar open for processing failed for {}: {}", path.display(), e))?; - match p_output { - Ok(out) if out.status.success() && looks_like_image(&out.stdout) => Ok((count, out.stdout)), - _ => { - // Fallback: targeted extraction with unar (handles special chars, encoding issues) - let image_bytes = extract_cbr_first_page(path, first_name)?; - Ok((count, image_bytes)) + while let Some(header) = archive + .read_header() + .map_err(|e| anyhow::anyhow!("unrar read header: {}", e))? + { + let entry_name = header.entry().filename.to_string_lossy().to_string(); + if entry_name == first_name { + let (data, _) = header + .read() + .map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?; + return Ok((count, data)); } + archive = header + .skip() + .map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?; } -} -/// Check image magic bytes to validate that bytes are a real image before decoding. -fn looks_like_image(bytes: &[u8]) -> bool { - if bytes.len() < 12 { - return false; - } - // JPEG: FF D8 FF - if bytes.starts_with(&[0xFF, 0xD8, 0xFF]) { - return true; - } - // PNG: 89 50 4E 47 0D 0A 1A 0A - if bytes.starts_with(&[0x89, 0x50, 0x4E, 0x47]) { - return true; - } - // WebP: RIFF....WEBP - if &bytes[0..4] == b"RIFF" && &bytes[8..12] == b"WEBP" { - return true; - } - false + Err(anyhow::anyhow!( + "could not find '{}' in {}", + first_name, + path.display() + )) } fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec)> { - let count = parse_pdf_page_count(path)?; - let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?; - Ok((count, image_bytes)) + use pdfium_render::prelude::*; + + // Open PDF once — get page count and render first page in a single pass + let pdfium = Pdfium::new( + Pdfium::bind_to_system_library() + .map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?, + ); + + let document = pdfium + .load_pdf_from_file(path, None) + .map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?; + + let count = document.pages().len() as i32; + if count == 0 { + return Err(anyhow::anyhow!("PDF has no pages: {}", path.display())); + } + + let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale } as i32; + let config = PdfRenderConfig::new() + .set_target_width(scale) + .set_maximum_height(scale); + + let page = document + .pages() + .get(0) + .map_err(|e| anyhow::anyhow!("cannot get first page of {}: {:?}", path.display(), e))?; + + let bitmap = page + .render_with_config(&config) + .map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?; + + let image = bitmap.as_image(); + let mut buf = std::io::Cursor::new(Vec::new()); + image + .write_to(&mut buf, image::ImageFormat::Png) + .context("failed to encode rendered PDF page as PNG")?; + + Ok((count, buf.into_inner())) } fn parse_cbz_page_count(path: &Path) -> Result { @@ -306,34 +295,23 @@ fn parse_cbz_page_count(path: &Path) -> Result { } fn parse_cbr_page_count(path: &Path) -> Result { - let images = list_cbr_images(path)?; - Ok(images.len() as i32) + let archive = unrar::Archive::new(path) + .open_for_listing() + .map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?; + let count = archive + .filter(|r| { + r.as_ref() + .map(|e| is_image_name(&e.filename.to_string_lossy().to_ascii_lowercase())) + .unwrap_or(false) + }) + .count() as i32; + Ok(count) } fn parse_pdf_page_count(path: &Path) -> Result { - let output = std::process::Command::new("pdfinfo") - .arg(path) - .output() - .with_context(|| format!("failed to execute pdfinfo for {}", path.display()))?; - - if !output.status.success() { - return Err(anyhow::anyhow!("pdfinfo failed for {}", path.display())); - } - - let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { - if line.starts_with("Pages:") { - if let Some(pages_str) = line.split_whitespace().nth(1) { - return pages_str - .parse::() - .with_context(|| format!("cannot parse page count: {}", pages_str)); - } - } - } - - Err(anyhow::anyhow!( - "could not find page count in pdfinfo output" - )) + let doc = lopdf::Document::load(path) + .with_context(|| format!("cannot open pdf: {}", path.display()))?; + Ok(doc.get_pages().len() as i32) } fn is_image_name(name: &str) -> bool { @@ -351,13 +329,8 @@ fn is_image_name(name: &str) -> bool { pub fn extract_first_page(path: &Path, format: BookFormat) -> Result> { match format { BookFormat::Cbz => extract_cbz_first_page(path), - BookFormat::Cbr => { - let mut image_names = list_cbr_images(path)?; - image_names.sort(); - let first_name = image_names.into_iter().next().context("no images found in cbr")?; - extract_cbr_first_page(path, &first_name) - } - BookFormat::Pdf => extract_pdf_first_page(path, 0), + BookFormat::Cbr => analyze_cbr(path).map(|(_, bytes)| bytes), + BookFormat::Pdf => analyze_pdf(path, 0).map(|(_, bytes)| bytes), } } @@ -386,98 +359,13 @@ fn extract_cbz_first_page(path: &Path) -> Result> { Ok(buf) } -fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result> { - let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4())); - let extract_dir = work_dir.join("out"); - std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?; - - // unar constructs internal regexes from (archive_path + "/" + internal_path). - // Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause - // XADRegexException. Work around by giving unar a safe symlink name. - let safe_path = work_dir.join("archive.cbr"); - if std::os::unix::fs::symlink(path, &safe_path).is_err() { - // Cross-filesystem fallback: copy (slower but safe) - std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?; - } - - let output = std::process::Command::new("env") - .args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"]) - .arg(&extract_dir) - .arg(&safe_path) - .output() - .context("unar failed")?; - - if !output.status.success() { - let _ = std::fs::remove_dir_all(&work_dir); - return Err(anyhow::anyhow!( - "unar extract failed: {:?}", - String::from_utf8_lossy(&output.stderr) - )); - } - - let mut image_files: Vec<_> = WalkDir::new(&extract_dir) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| { - let name = e.file_name().to_string_lossy().to_lowercase(); - is_image_name(&name) - }) - .collect(); - - image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy())); - - let first_image = image_files.first().context("no images found in cbr")?; - - let data = std::fs::read(first_image.path())?; - let _ = std::fs::remove_dir_all(&work_dir); - Ok(data) -} - -fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result> { - let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4())); - std::fs::create_dir_all(&tmp_dir)?; - let output_prefix = tmp_dir.join("page"); - let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale }; - let scale_str = scale.to_string(); - - let output = Command::new("pdftoppm") - .args([ - "-f", - "1", - "-singlefile", - "-png", - "-scale-to", - &scale_str, - path.to_str().unwrap(), - output_prefix.to_str().unwrap(), - ]) - .output() - .context("pdftoppm failed")?; - - if !output.status.success() { - let _ = std::fs::remove_dir_all(&tmp_dir); - return Err(anyhow::anyhow!("pdftoppm failed")); - } - - let image_path = output_prefix.with_extension("png"); - let data = std::fs::read(&image_path)?; - let _ = std::fs::remove_dir_all(&tmp_dir); - Ok(data) -} - /// Convert a CBR file to CBZ in-place (same directory, same stem). /// /// The conversion is safe: a `.cbz.tmp` file is written first, verified, then /// atomically renamed to `.cbz`. The original CBR is **not** deleted by this -/// function — the caller is responsible for removing it after a successful DB -/// update. +/// function — the caller is responsible for removing it after a successful DB update. /// /// Returns the path of the newly created `.cbz` file. -/// -/// # Errors -/// - Returns an error if a `.cbz` file with the same stem already exists. -/// - Returns an error if extraction, packing, or verification fails. -/// - Returns an error if `cbr_path` has no parent directory or no file stem. pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result { let parent = cbr_path .parent() @@ -489,7 +377,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result { let cbz_path = parent.join(format!("{}.cbz", stem.to_string_lossy())); let tmp_path = parent.join(format!("{}.cbz.tmp", stem.to_string_lossy())); - // Refuse if target CBZ already exists if cbz_path.exists() { return Err(anyhow::anyhow!( "CBZ file already exists: {}", @@ -497,46 +384,45 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result { )); } - // Extract CBR to a temp dir - let tmp_dir = - std::env::temp_dir().join(format!("stripstream-cbr-convert-{}", Uuid::new_v4())); - std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?; + // Extract all images from CBR into memory using unrar crate (no subprocess) + let mut images: Vec<(String, Vec)> = Vec::new(); + let mut archive = unrar::Archive::new(cbr_path) + .open_for_processing() + .map_err(|e| anyhow::anyhow!("unrar open failed for {}: {}", cbr_path.display(), e))?; - let output = std::process::Command::new("env") - .args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"]) - .arg(&tmp_dir) - .arg(cbr_path) - .output() - .context("unar failed to start")?; + while let Some(header) = archive + .read_header() + .map_err(|e| anyhow::anyhow!("unrar read header: {}", e))? + { + let entry_name = header.entry().filename.to_string_lossy().to_string(); + let file_name = Path::new(&entry_name) + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| entry_name.clone()); - if !output.status.success() { - let _ = std::fs::remove_dir_all(&tmp_dir); - return Err(anyhow::anyhow!( - "unar extraction failed: {}", - String::from_utf8_lossy(&output.stderr) - )); + if is_image_name(&entry_name.to_ascii_lowercase()) { + let (data, next) = header + .read() + .map_err(|e| anyhow::anyhow!("unrar read: {}", e))?; + images.push((file_name, data)); + archive = next; + } else { + archive = header + .skip() + .map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?; + } } - // Collect and sort image files - let mut image_files: Vec<_> = WalkDir::new(&tmp_dir) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| { - let name = e.file_name().to_string_lossy().to_lowercase(); - is_image_name(&name) - }) - .collect(); - image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy())); - - let image_count = image_files.len(); - if image_count == 0 { - let _ = std::fs::remove_dir_all(&tmp_dir); + if images.is_empty() { return Err(anyhow::anyhow!( "no images found in CBR: {}", cbr_path.display() )); } + images.sort_by(|(a, _), (b, _)| natord::compare(a, b)); + let image_count = images.len(); + // Pack images into the .cbz.tmp file let pack_result = (|| -> Result<()> { let cbz_file = std::fs::File::create(&tmp_path) @@ -545,21 +431,16 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result { let options = zip::write::SimpleFileOptions::default() .compression_method(zip::CompressionMethod::Deflated); - for entry in &image_files { - let file_name = entry.file_name().to_string_lossy().to_string(); - zip.start_file(&file_name, options) + for (file_name, data) in &images { + zip.start_file(file_name, options) .with_context(|| format!("cannot add file {} to zip", file_name))?; - let data = std::fs::read(entry.path()) - .with_context(|| format!("cannot read {}", entry.path().display()))?; - zip.write_all(&data) + zip.write_all(data) .with_context(|| format!("cannot write {} to zip", file_name))?; } zip.finish().context("cannot finalize zip")?; Ok(()) })(); - let _ = std::fs::remove_dir_all(&tmp_dir); - if let Err(err) = pack_result { let _ = std::fs::remove_file(&tmp_path); return Err(err); @@ -593,7 +474,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result { return Err(err); } - // Atomic rename .cbz.tmp → .cbz std::fs::rename(&tmp_path, &cbz_path) .with_context(|| format!("cannot rename {} to {}", tmp_path.display(), cbz_path.display()))?;