fix(parsers): corriger la génération de thumbnails CBR/CBZ/PDF

- CBR: contourner le bug XADRegexException de unar en appelant unar
  avec un symlink à nom neutre (archive.cbr) au lieu du chemin réel,
  qui peut contenir des caractères regex spéciaux comme [ ] ( )
- CBR/CBZ: remplacer le tri lexicographique par natord (tri naturel)
  pour que page2.jpg soit trié avant page10.jpg
- PDF: brancher pdftoppm -scale-to sur config.width.max(config.height)
  au lieu d'une valeur hardcodée (800px → 400px par défaut)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 16:17:20 +01:00
parent 1c106a4ff2
commit f2d9bedcc7
5 changed files with 47 additions and 22 deletions

7
Cargo.lock generated
View File

@@ -1485,6 +1485,12 @@ dependencies = [
"pxfm", "pxfm",
] ]
[[package]]
name = "natord"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@@ -1627,6 +1633,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lopdf", "lopdf",
"natord",
"regex", "regex",
"uuid", "uuid",
"walkdir", "walkdir",

View File

@@ -32,6 +32,7 @@ tower = { version = "0.5", features = ["limit"] }
tracing = "0.1" tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
uuid = { version = "1.12", features = ["serde", "v4"] } uuid = { version = "1.12", features = ["serde", "v4"] }
natord = "1.0"
walkdir = "2.5" walkdir = "2.5"
webp = "0.3" webp = "0.3"
utoipa = "4.0" utoipa = "4.0"

View File

@@ -247,8 +247,9 @@ pub async fn analyze_library_books(
// Run blocking archive I/O on a thread pool // Run blocking archive I/O on a thread pool
let book_id = task.book_id; let book_id = task.book_id;
let path_owned = path.to_path_buf(); let path_owned = path.to_path_buf();
let pdf_scale = config.width.max(config.height);
let analyze_result = tokio::task::spawn_blocking(move || { let analyze_result = tokio::task::spawn_blocking(move || {
analyze_book(&path_owned, format) analyze_book(&path_owned, format, pdf_scale)
}) })
.await; .await;

View File

@@ -6,6 +6,7 @@ license.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
natord.workspace = true
lopdf = "0.35" lopdf = "0.35"
regex = "1" regex = "1"
uuid.workspace = true uuid.workspace = true

View File

@@ -153,11 +153,12 @@ pub fn parse_metadata(
/// Open an archive once and return (page_count, first_page_bytes). /// Open an archive once and return (page_count, first_page_bytes).
/// This is more efficient than calling parse_metadata + extract_first_page separately. /// This is more efficient than calling parse_metadata + extract_first_page separately.
pub fn analyze_book(path: &Path, format: BookFormat) -> Result<(i32, Vec<u8>)> { /// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400).
pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
match format { match format {
BookFormat::Cbz => analyze_cbz(path), BookFormat::Cbz => analyze_cbz(path),
BookFormat::Cbr => analyze_cbr(path), BookFormat::Cbr => analyze_cbr(path),
BookFormat::Pdf => analyze_pdf(path), BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
} }
} }
@@ -174,7 +175,7 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec<u8>)> {
image_names.push(entry.name().to_string()); image_names.push(entry.name().to_string());
} }
} }
image_names.sort(); image_names.sort_by(|a, b| natord::compare(a, b));
let count = image_names.len() as i32; let count = image_names.len() as i32;
let first_image = image_names.first().context("no images found in cbz")?; let first_image = image_names.first().context("no images found in cbz")?;
@@ -198,12 +199,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
if output.status.success() { if output.status.success() {
let stdout = String::from_utf8_lossy(&output.stdout); let stdout = String::from_utf8_lossy(&output.stdout);
let images: Vec<String> = stdout let mut images: Vec<String> = stdout
.lines() .lines()
.map(|l| l.trim().to_string()) .map(|l| l.trim().to_string())
.filter(|line| is_image_name(&line.to_ascii_lowercase())) .filter(|line| is_image_name(&line.to_ascii_lowercase()))
.collect(); .collect();
if !images.is_empty() { if !images.is_empty() {
images.sort_by(|a, b| natord::compare(a, b));
return Ok(images); return Ok(images);
} }
} }
@@ -223,12 +225,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
let stdout = String::from_utf8_lossy(&lsar_output.stdout); let stdout = String::from_utf8_lossy(&lsar_output.stdout);
// lsar output: first line is archive info, then one file per line (indented) // lsar output: first line is archive info, then one file per line (indented)
let images: Vec<String> = stdout let mut images: Vec<String> = stdout
.lines() .lines()
.skip(1) // skip the archive header line .skip(1) // skip the archive header line
.map(|l| l.trim().to_string()) .map(|l| l.trim().to_string())
.filter(|line| is_image_name(&line.to_ascii_lowercase())) .filter(|line| is_image_name(&line.to_ascii_lowercase()))
.collect(); .collect();
images.sort_by(|a, b| natord::compare(a, b));
Ok(images) Ok(images)
} }
@@ -281,9 +284,9 @@ fn looks_like_image(bytes: &[u8]) -> bool {
false false
} }
fn analyze_pdf(path: &Path) -> Result<(i32, Vec<u8>)> { fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
let count = parse_pdf_page_count(path)?; let count = parse_pdf_page_count(path)?;
let image_bytes = extract_pdf_first_page(path)?; let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?;
Ok((count, image_bytes)) Ok((count, image_bytes))
} }
@@ -354,7 +357,7 @@ pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
let first_name = image_names.into_iter().next().context("no images found in cbr")?; let first_name = image_names.into_iter().next().context("no images found in cbr")?;
extract_cbr_first_page(path, &first_name) extract_cbr_first_page(path, &first_name)
} }
BookFormat::Pdf => extract_pdf_first_page(path), BookFormat::Pdf => extract_pdf_first_page(path, 0),
} }
} }
@@ -371,7 +374,7 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
image_names.push(entry.name().to_string()); image_names.push(entry.name().to_string());
} }
} }
image_names.sort(); image_names.sort_by(|a, b| natord::compare(a, b));
let first_image = image_names.first().context("no images found in cbz")?; let first_image = image_names.first().context("no images found in cbz")?;
@@ -383,26 +386,36 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
Ok(buf) Ok(buf)
} }
fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> { fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result<Vec<u8>> {
let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4())); let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?; let extract_dir = work_dir.join("out");
std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?;
// unar constructs internal regexes from (archive_path + "/" + internal_path).
// Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause
// XADRegexException. Work around by giving unar a safe symlink name.
let safe_path = work_dir.join("archive.cbr");
if std::os::unix::fs::symlink(path, &safe_path).is_err() {
// Cross-filesystem fallback: copy (slower but safe)
std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?;
}
let output = std::process::Command::new("env") let output = std::process::Command::new("env")
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"]) .args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
.arg(&tmp_dir) .arg(&extract_dir)
.arg(path) .arg(&safe_path)
.output() .output()
.context("unar failed")?; .context("unar failed")?;
if !output.status.success() { if !output.status.success() {
let _ = std::fs::remove_dir_all(&tmp_dir); let _ = std::fs::remove_dir_all(&work_dir);
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"unar extract failed: {:?}", "unar extract failed: {:?}",
String::from_utf8_lossy(&output.stderr) String::from_utf8_lossy(&output.stderr)
)); ));
} }
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir) let mut image_files: Vec<_> = WalkDir::new(&extract_dir)
.into_iter() .into_iter()
.filter_map(|e| e.ok()) .filter_map(|e| e.ok())
.filter(|e| { .filter(|e| {
@@ -411,19 +424,21 @@ fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> {
}) })
.collect(); .collect();
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase()); image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
let first_image = image_files.first().context("no images found in cbr")?; let first_image = image_files.first().context("no images found in cbr")?;
let data = std::fs::read(first_image.path())?; let data = std::fs::read(first_image.path())?;
let _ = std::fs::remove_dir_all(&tmp_dir); let _ = std::fs::remove_dir_all(&work_dir);
Ok(data) Ok(data)
} }
fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> { fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result<Vec<u8>> {
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4())); let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
std::fs::create_dir_all(&tmp_dir)?; std::fs::create_dir_all(&tmp_dir)?;
let output_prefix = tmp_dir.join("page"); let output_prefix = tmp_dir.join("page");
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale };
let scale_str = scale.to_string();
let output = Command::new("pdftoppm") let output = Command::new("pdftoppm")
.args([ .args([
@@ -432,7 +447,7 @@ fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> {
"-singlefile", "-singlefile",
"-png", "-png",
"-scale-to", "-scale-to",
"800", &scale_str,
path.to_str().unwrap(), path.to_str().unwrap(),
output_prefix.to_str().unwrap(), output_prefix.to_str().unwrap(),
]) ])
@@ -511,7 +526,7 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
is_image_name(&name) is_image_name(&name)
}) })
.collect(); .collect();
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase()); image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
let image_count = image_files.len(); let image_count = image_files.len();
if image_count == 0 { if image_count == 0 {