perf(parsers): remplacer tous les subprocesses par des libs in-process
CBR: remplace unrar/unar CLI par le crate `unrar` (bindings libunrar vendorisé, zéro dépendance système). Supprime XADRegexException, les forks de processus et les dossiers temporaires. PDF: remplace pdfinfo + pdftoppm par pdfium-render. Le PDF est ouvert une seule fois pour obtenir le nombre de pages ET rasteriser la première page. lopdf reste pour parse_metadata (page count seul). convert_cbr_to_cbz: reécrit sans subprocess ni dossier temporaire — les images sont lues en mémoire via unrar puis packées directement en ZIP. Dockerfile indexer: retire unrar-free, unar, poppler-utils. Télécharge libpdfium.so depuis bblanchon/pdfium-binaries au build. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
151
Cargo.lock
generated
151
Cargo.lock
generated
@@ -369,6 +369,26 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console_error_panic_hook"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console_log"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f"
|
||||
dependencies = [
|
||||
"log",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-oid"
|
||||
version = "0.9.6"
|
||||
@@ -1224,6 +1244,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.17"
|
||||
@@ -1291,6 +1320,16 @@ version = "0.2.182"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.16"
|
||||
@@ -1404,6 +1443,12 @@ version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "maybe-owned"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
@@ -1632,11 +1677,12 @@ name = "parsers"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"image",
|
||||
"lopdf",
|
||||
"natord",
|
||||
"pdfium-render",
|
||||
"regex",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
"unrar",
|
||||
"zip 2.4.2",
|
||||
]
|
||||
|
||||
@@ -1651,6 +1697,32 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfium-render"
|
||||
version = "0.8.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6553f6604a52b3203db7b4e9d51eb4dd193cf455af9e56d40cab6575b547b679"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"bytemuck",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"console_error_panic_hook",
|
||||
"console_log",
|
||||
"image",
|
||||
"itertools",
|
||||
"js-sys",
|
||||
"libloading",
|
||||
"log",
|
||||
"maybe-owned",
|
||||
"once_cell",
|
||||
"utf16string",
|
||||
"vecmath",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem-rfc7468"
|
||||
version = "0.7.0"
|
||||
@@ -1678,6 +1750,12 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "piston-float"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590"
|
||||
|
||||
[[package]]
|
||||
name = "pkcs1"
|
||||
version = "0.7.5"
|
||||
@@ -2940,6 +3018,29 @@ version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||
|
||||
[[package]]
|
||||
name = "unrar"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92ec61343a630d2b50d13216dea5125e157d3fc180a7d3f447d22fe146b648fc"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"regex",
|
||||
"unrar_sys",
|
||||
"widestring",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unrar_sys"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b77675b883cfbe6bf41e6b7a5cd6008e0a83ba497de3d96e41a064bbeead765"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
@@ -2958,6 +3059,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf16string"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
@@ -3028,6 +3138,15 @@ version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "vecmath"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a"
|
||||
dependencies = [
|
||||
"piston-float",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
@@ -3240,6 +3359,28 @@ dependencies = [
|
||||
"wasite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "widestring"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.11"
|
||||
@@ -3249,6 +3390,12 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
|
||||
@@ -33,6 +33,8 @@ tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
uuid = { version = "1.12", features = ["serde", "v4"] }
|
||||
natord = "1.0"
|
||||
pdfium-render = { version = "0.8", default-features = false, features = ["pdfium_latest", "image_latest", "thread_safe"] }
|
||||
unrar = "0.5"
|
||||
walkdir = "2.5"
|
||||
webp = "0.3"
|
||||
utoipa = "4.0"
|
||||
|
||||
@@ -21,11 +21,24 @@ RUN --mount=type=cache,target=/sccache \
|
||||
cargo build --release -p indexer
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates wget \
|
||||
unrar-free unar \
|
||||
poppler-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Download pdfium shared library (replaces pdftoppm + pdfinfo subprocesses)
|
||||
RUN ARCH=$(dpkg --print-architecture) && \
|
||||
case "$ARCH" in \
|
||||
amd64) PDFIUM_ARCH="linux-x64" ;; \
|
||||
arm64) PDFIUM_ARCH="linux-arm64" ;; \
|
||||
*) echo "Unsupported arch: $ARCH" && exit 1 ;; \
|
||||
esac && \
|
||||
wget -q "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-${PDFIUM_ARCH}.tgz" -O /tmp/pdfium.tgz && \
|
||||
tar -xzf /tmp/pdfium.tgz -C /tmp && \
|
||||
cp /tmp/lib/libpdfium.so /usr/local/lib/ && \
|
||||
rm -rf /tmp/pdfium.tgz /tmp/lib /tmp/include && \
|
||||
ldconfig
|
||||
|
||||
COPY --from=builder /app/target/release/indexer /usr/local/bin/indexer
|
||||
EXPOSE 7081
|
||||
CMD ["/usr/local/bin/indexer"]
|
||||
|
||||
@@ -6,9 +6,10 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
natord.workspace = true
|
||||
image.workspace = true
|
||||
lopdf = "0.35"
|
||||
natord.workspace = true
|
||||
pdfium-render.workspace = true
|
||||
regex = "1"
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
unrar.workspace = true
|
||||
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
use anyhow::{Context, Result};
|
||||
use std::io::{Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::sync::OnceLock;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BookFormat {
|
||||
@@ -152,8 +149,7 @@ pub fn parse_metadata(
|
||||
}
|
||||
|
||||
/// Open an archive once and return (page_count, first_page_bytes).
|
||||
/// This is more efficient than calling parse_metadata + extract_first_page separately.
|
||||
/// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400).
|
||||
/// `pdf_render_scale`: max dimension used for PDF rasterization; 0 means use default (400).
|
||||
pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||
match format {
|
||||
BookFormat::Cbz => analyze_cbz(path),
|
||||
@@ -189,105 +185,98 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec<u8>)> {
|
||||
Ok((count, buf))
|
||||
}
|
||||
|
||||
fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
|
||||
// Try unrar lb first (fast)
|
||||
let output = std::process::Command::new("unrar")
|
||||
.arg("lb")
|
||||
.arg(path)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute unrar lb for {}", path.display()))?;
|
||||
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut images: Vec<String> = stdout
|
||||
.lines()
|
||||
.map(|l| l.trim().to_string())
|
||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
||||
.collect();
|
||||
if !images.is_empty() {
|
||||
images.sort_by(|a, b| natord::compare(a, b));
|
||||
return Ok(images);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: lsar (from unar package) handles UTF-16BE encoded filenames
|
||||
let lsar_output = std::process::Command::new("lsar")
|
||||
.arg(path)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute lsar for {}", path.display()))?;
|
||||
|
||||
if !lsar_output.status.success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"both unrar lb and lsar failed for {}",
|
||||
path.display()
|
||||
));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&lsar_output.stdout);
|
||||
// lsar output: first line is archive info, then one file per line (indented)
|
||||
let mut images: Vec<String> = stdout
|
||||
.lines()
|
||||
.skip(1) // skip the archive header line
|
||||
.map(|l| l.trim().to_string())
|
||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
||||
.collect();
|
||||
images.sort_by(|a, b| natord::compare(a, b));
|
||||
|
||||
Ok(images)
|
||||
}
|
||||
|
||||
fn analyze_cbr(path: &Path) -> Result<(i32, Vec<u8>)> {
|
||||
let mut image_names = list_cbr_images(path)?;
|
||||
image_names.sort();
|
||||
// Pass 1: list all image names via unrar (in-process, no subprocess)
|
||||
let mut image_names: Vec<String> = {
|
||||
let archive = unrar::Archive::new(path)
|
||||
.open_for_listing()
|
||||
.map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?;
|
||||
let mut names = Vec::new();
|
||||
for entry in archive {
|
||||
let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?;
|
||||
let name = entry.filename.to_string_lossy().to_string();
|
||||
if is_image_name(&name.to_ascii_lowercase()) {
|
||||
names.push(name);
|
||||
}
|
||||
}
|
||||
names
|
||||
};
|
||||
|
||||
let count = image_names.len() as i32;
|
||||
if count == 0 {
|
||||
if image_names.is_empty() {
|
||||
return Err(anyhow::anyhow!("no images found in cbr: {}", path.display()));
|
||||
}
|
||||
|
||||
let first_name = &image_names[0];
|
||||
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||
let count = image_names.len() as i32;
|
||||
let first_name = image_names[0].clone();
|
||||
|
||||
// Try unrar p to extract first image to stdout (faster — no temp dir)
|
||||
let p_output = std::process::Command::new("unrar")
|
||||
.args(["p", "-inul"])
|
||||
.arg(path)
|
||||
.arg(first_name)
|
||||
.output();
|
||||
// Pass 2: extract first image to memory
|
||||
let mut archive = unrar::Archive::new(path)
|
||||
.open_for_processing()
|
||||
.map_err(|e| anyhow::anyhow!("unrar open for processing failed for {}: {}", path.display(), e))?;
|
||||
|
||||
match p_output {
|
||||
Ok(out) if out.status.success() && looks_like_image(&out.stdout) => Ok((count, out.stdout)),
|
||||
_ => {
|
||||
// Fallback: targeted extraction with unar (handles special chars, encoding issues)
|
||||
let image_bytes = extract_cbr_first_page(path, first_name)?;
|
||||
Ok((count, image_bytes))
|
||||
}
|
||||
while let Some(header) = archive
|
||||
.read_header()
|
||||
.map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
|
||||
{
|
||||
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||
if entry_name == first_name {
|
||||
let (data, _) = header
|
||||
.read()
|
||||
.map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?;
|
||||
return Ok((count, data));
|
||||
}
|
||||
archive = header
|
||||
.skip()
|
||||
.map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
|
||||
}
|
||||
|
||||
/// Check image magic bytes to validate that bytes are a real image before decoding.
|
||||
fn looks_like_image(bytes: &[u8]) -> bool {
|
||||
if bytes.len() < 12 {
|
||||
return false;
|
||||
}
|
||||
// JPEG: FF D8 FF
|
||||
if bytes.starts_with(&[0xFF, 0xD8, 0xFF]) {
|
||||
return true;
|
||||
}
|
||||
// PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if bytes.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
|
||||
return true;
|
||||
}
|
||||
// WebP: RIFF....WEBP
|
||||
if &bytes[0..4] == b"RIFF" && &bytes[8..12] == b"WEBP" {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
Err(anyhow::anyhow!(
|
||||
"could not find '{}' in {}",
|
||||
first_name,
|
||||
path.display()
|
||||
))
|
||||
}
|
||||
|
||||
fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||
let count = parse_pdf_page_count(path)?;
|
||||
let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?;
|
||||
Ok((count, image_bytes))
|
||||
use pdfium_render::prelude::*;
|
||||
|
||||
// Open PDF once — get page count and render first page in a single pass
|
||||
let pdfium = Pdfium::new(
|
||||
Pdfium::bind_to_system_library()
|
||||
.map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?,
|
||||
);
|
||||
|
||||
let document = pdfium
|
||||
.load_pdf_from_file(path, None)
|
||||
.map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?;
|
||||
|
||||
let count = document.pages().len() as i32;
|
||||
if count == 0 {
|
||||
return Err(anyhow::anyhow!("PDF has no pages: {}", path.display()));
|
||||
}
|
||||
|
||||
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale } as i32;
|
||||
let config = PdfRenderConfig::new()
|
||||
.set_target_width(scale)
|
||||
.set_maximum_height(scale);
|
||||
|
||||
let page = document
|
||||
.pages()
|
||||
.get(0)
|
||||
.map_err(|e| anyhow::anyhow!("cannot get first page of {}: {:?}", path.display(), e))?;
|
||||
|
||||
let bitmap = page
|
||||
.render_with_config(&config)
|
||||
.map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?;
|
||||
|
||||
let image = bitmap.as_image();
|
||||
let mut buf = std::io::Cursor::new(Vec::new());
|
||||
image
|
||||
.write_to(&mut buf, image::ImageFormat::Png)
|
||||
.context("failed to encode rendered PDF page as PNG")?;
|
||||
|
||||
Ok((count, buf.into_inner()))
|
||||
}
|
||||
|
||||
fn parse_cbz_page_count(path: &Path) -> Result<i32> {
|
||||
@@ -306,34 +295,23 @@ fn parse_cbz_page_count(path: &Path) -> Result<i32> {
|
||||
}
|
||||
|
||||
fn parse_cbr_page_count(path: &Path) -> Result<i32> {
|
||||
let images = list_cbr_images(path)?;
|
||||
Ok(images.len() as i32)
|
||||
let archive = unrar::Archive::new(path)
|
||||
.open_for_listing()
|
||||
.map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?;
|
||||
let count = archive
|
||||
.filter(|r| {
|
||||
r.as_ref()
|
||||
.map(|e| is_image_name(&e.filename.to_string_lossy().to_ascii_lowercase()))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.count() as i32;
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn parse_pdf_page_count(path: &Path) -> Result<i32> {
|
||||
let output = std::process::Command::new("pdfinfo")
|
||||
.arg(path)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute pdfinfo for {}", path.display()))?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(anyhow::anyhow!("pdfinfo failed for {}", path.display()));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
for line in stdout.lines() {
|
||||
if line.starts_with("Pages:") {
|
||||
if let Some(pages_str) = line.split_whitespace().nth(1) {
|
||||
return pages_str
|
||||
.parse::<i32>()
|
||||
.with_context(|| format!("cannot parse page count: {}", pages_str));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow::anyhow!(
|
||||
"could not find page count in pdfinfo output"
|
||||
))
|
||||
let doc = lopdf::Document::load(path)
|
||||
.with_context(|| format!("cannot open pdf: {}", path.display()))?;
|
||||
Ok(doc.get_pages().len() as i32)
|
||||
}
|
||||
|
||||
fn is_image_name(name: &str) -> bool {
|
||||
@@ -351,13 +329,8 @@ fn is_image_name(name: &str) -> bool {
|
||||
pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
|
||||
match format {
|
||||
BookFormat::Cbz => extract_cbz_first_page(path),
|
||||
BookFormat::Cbr => {
|
||||
let mut image_names = list_cbr_images(path)?;
|
||||
image_names.sort();
|
||||
let first_name = image_names.into_iter().next().context("no images found in cbr")?;
|
||||
extract_cbr_first_page(path, &first_name)
|
||||
}
|
||||
BookFormat::Pdf => extract_pdf_first_page(path, 0),
|
||||
BookFormat::Cbr => analyze_cbr(path).map(|(_, bytes)| bytes),
|
||||
BookFormat::Pdf => analyze_pdf(path, 0).map(|(_, bytes)| bytes),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -386,98 +359,13 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result<Vec<u8>> {
|
||||
let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
|
||||
let extract_dir = work_dir.join("out");
|
||||
std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?;
|
||||
|
||||
// unar constructs internal regexes from (archive_path + "/" + internal_path).
|
||||
// Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause
|
||||
// XADRegexException. Work around by giving unar a safe symlink name.
|
||||
let safe_path = work_dir.join("archive.cbr");
|
||||
if std::os::unix::fs::symlink(path, &safe_path).is_err() {
|
||||
// Cross-filesystem fallback: copy (slower but safe)
|
||||
std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?;
|
||||
}
|
||||
|
||||
let output = std::process::Command::new("env")
|
||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
||||
.arg(&extract_dir)
|
||||
.arg(&safe_path)
|
||||
.output()
|
||||
.context("unar failed")?;
|
||||
|
||||
if !output.status.success() {
|
||||
let _ = std::fs::remove_dir_all(&work_dir);
|
||||
return Err(anyhow::anyhow!(
|
||||
"unar extract failed: {:?}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
let mut image_files: Vec<_> = WalkDir::new(&extract_dir)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
||||
is_image_name(&name)
|
||||
})
|
||||
.collect();
|
||||
|
||||
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
||||
|
||||
let first_image = image_files.first().context("no images found in cbr")?;
|
||||
|
||||
let data = std::fs::read(first_image.path())?;
|
||||
let _ = std::fs::remove_dir_all(&work_dir);
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result<Vec<u8>> {
|
||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
|
||||
std::fs::create_dir_all(&tmp_dir)?;
|
||||
let output_prefix = tmp_dir.join("page");
|
||||
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale };
|
||||
let scale_str = scale.to_string();
|
||||
|
||||
let output = Command::new("pdftoppm")
|
||||
.args([
|
||||
"-f",
|
||||
"1",
|
||||
"-singlefile",
|
||||
"-png",
|
||||
"-scale-to",
|
||||
&scale_str,
|
||||
path.to_str().unwrap(),
|
||||
output_prefix.to_str().unwrap(),
|
||||
])
|
||||
.output()
|
||||
.context("pdftoppm failed")?;
|
||||
|
||||
if !output.status.success() {
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
return Err(anyhow::anyhow!("pdftoppm failed"));
|
||||
}
|
||||
|
||||
let image_path = output_prefix.with_extension("png");
|
||||
let data = std::fs::read(&image_path)?;
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
/// Convert a CBR file to CBZ in-place (same directory, same stem).
|
||||
///
|
||||
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then
|
||||
/// atomically renamed to `.cbz`. The original CBR is **not** deleted by this
|
||||
/// function — the caller is responsible for removing it after a successful DB
|
||||
/// update.
|
||||
/// function — the caller is responsible for removing it after a successful DB update.
|
||||
///
|
||||
/// Returns the path of the newly created `.cbz` file.
|
||||
///
|
||||
/// # Errors
|
||||
/// - Returns an error if a `.cbz` file with the same stem already exists.
|
||||
/// - Returns an error if extraction, packing, or verification fails.
|
||||
/// - Returns an error if `cbr_path` has no parent directory or no file stem.
|
||||
pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||
let parent = cbr_path
|
||||
.parent()
|
||||
@@ -489,7 +377,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||
let cbz_path = parent.join(format!("{}.cbz", stem.to_string_lossy()));
|
||||
let tmp_path = parent.join(format!("{}.cbz.tmp", stem.to_string_lossy()));
|
||||
|
||||
// Refuse if target CBZ already exists
|
||||
if cbz_path.exists() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"CBZ file already exists: {}",
|
||||
@@ -497,46 +384,45 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||
));
|
||||
}
|
||||
|
||||
// Extract CBR to a temp dir
|
||||
let tmp_dir =
|
||||
std::env::temp_dir().join(format!("stripstream-cbr-convert-{}", Uuid::new_v4()));
|
||||
std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?;
|
||||
// Extract all images from CBR into memory using unrar crate (no subprocess)
|
||||
let mut images: Vec<(String, Vec<u8>)> = Vec::new();
|
||||
let mut archive = unrar::Archive::new(cbr_path)
|
||||
.open_for_processing()
|
||||
.map_err(|e| anyhow::anyhow!("unrar open failed for {}: {}", cbr_path.display(), e))?;
|
||||
|
||||
let output = std::process::Command::new("env")
|
||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
||||
.arg(&tmp_dir)
|
||||
.arg(cbr_path)
|
||||
.output()
|
||||
.context("unar failed to start")?;
|
||||
while let Some(header) = archive
|
||||
.read_header()
|
||||
.map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
|
||||
{
|
||||
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||
let file_name = Path::new(&entry_name)
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| entry_name.clone());
|
||||
|
||||
if !output.status.success() {
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
return Err(anyhow::anyhow!(
|
||||
"unar extraction failed: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
if is_image_name(&entry_name.to_ascii_lowercase()) {
|
||||
let (data, next) = header
|
||||
.read()
|
||||
.map_err(|e| anyhow::anyhow!("unrar read: {}", e))?;
|
||||
images.push((file_name, data));
|
||||
archive = next;
|
||||
} else {
|
||||
archive = header
|
||||
.skip()
|
||||
.map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
|
||||
}
|
||||
}
|
||||
|
||||
// Collect and sort image files
|
||||
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
||||
is_image_name(&name)
|
||||
})
|
||||
.collect();
|
||||
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
||||
|
||||
let image_count = image_files.len();
|
||||
if image_count == 0 {
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
if images.is_empty() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"no images found in CBR: {}",
|
||||
cbr_path.display()
|
||||
));
|
||||
}
|
||||
|
||||
images.sort_by(|(a, _), (b, _)| natord::compare(a, b));
|
||||
let image_count = images.len();
|
||||
|
||||
// Pack images into the .cbz.tmp file
|
||||
let pack_result = (|| -> Result<()> {
|
||||
let cbz_file = std::fs::File::create(&tmp_path)
|
||||
@@ -545,21 +431,16 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||
let options = zip::write::SimpleFileOptions::default()
|
||||
.compression_method(zip::CompressionMethod::Deflated);
|
||||
|
||||
for entry in &image_files {
|
||||
let file_name = entry.file_name().to_string_lossy().to_string();
|
||||
zip.start_file(&file_name, options)
|
||||
for (file_name, data) in &images {
|
||||
zip.start_file(file_name, options)
|
||||
.with_context(|| format!("cannot add file {} to zip", file_name))?;
|
||||
let data = std::fs::read(entry.path())
|
||||
.with_context(|| format!("cannot read {}", entry.path().display()))?;
|
||||
zip.write_all(&data)
|
||||
zip.write_all(data)
|
||||
.with_context(|| format!("cannot write {} to zip", file_name))?;
|
||||
}
|
||||
zip.finish().context("cannot finalize zip")?;
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
|
||||
if let Err(err) = pack_result {
|
||||
let _ = std::fs::remove_file(&tmp_path);
|
||||
return Err(err);
|
||||
@@ -593,7 +474,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
// Atomic rename .cbz.tmp → .cbz
|
||||
std::fs::rename(&tmp_path, &cbz_path)
|
||||
.with_context(|| format!("cannot rename {} to {}", tmp_path.display(), cbz_path.display()))?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user