fix(parsers): corriger la génération de thumbnails CBR/CBZ/PDF
- CBR: contourner le bug XADRegexException de unar en appelant unar avec un symlink à nom neutre (archive.cbr) au lieu du chemin réel, qui peut contenir des caractères regex spéciaux comme [ ] ( ) - CBR/CBZ: remplacer le tri lexicographique par natord (tri naturel) pour que page2.jpg soit trié avant page10.jpg - PDF: brancher pdftoppm -scale-to sur config.width.max(config.height) au lieu d'une valeur hardcodée (800px → 400px par défaut) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -1485,6 +1485,12 @@ dependencies = [
|
|||||||
"pxfm",
|
"pxfm",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "natord"
|
||||||
|
version = "1.0.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
@@ -1627,6 +1633,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"lopdf",
|
"lopdf",
|
||||||
|
"natord",
|
||||||
"regex",
|
"regex",
|
||||||
"uuid",
|
"uuid",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ tower = { version = "0.5", features = ["limit"] }
|
|||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
uuid = { version = "1.12", features = ["serde", "v4"] }
|
uuid = { version = "1.12", features = ["serde", "v4"] }
|
||||||
|
natord = "1.0"
|
||||||
walkdir = "2.5"
|
walkdir = "2.5"
|
||||||
webp = "0.3"
|
webp = "0.3"
|
||||||
utoipa = "4.0"
|
utoipa = "4.0"
|
||||||
|
|||||||
@@ -247,8 +247,9 @@ pub async fn analyze_library_books(
|
|||||||
// Run blocking archive I/O on a thread pool
|
// Run blocking archive I/O on a thread pool
|
||||||
let book_id = task.book_id;
|
let book_id = task.book_id;
|
||||||
let path_owned = path.to_path_buf();
|
let path_owned = path.to_path_buf();
|
||||||
|
let pdf_scale = config.width.max(config.height);
|
||||||
let analyze_result = tokio::task::spawn_blocking(move || {
|
let analyze_result = tokio::task::spawn_blocking(move || {
|
||||||
analyze_book(&path_owned, format)
|
analyze_book(&path_owned, format, pdf_scale)
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
natord.workspace = true
|
||||||
lopdf = "0.35"
|
lopdf = "0.35"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
uuid.workspace = true
|
uuid.workspace = true
|
||||||
|
|||||||
@@ -153,11 +153,12 @@ pub fn parse_metadata(
|
|||||||
|
|
||||||
/// Open an archive once and return (page_count, first_page_bytes).
|
/// Open an archive once and return (page_count, first_page_bytes).
|
||||||
/// This is more efficient than calling parse_metadata + extract_first_page separately.
|
/// This is more efficient than calling parse_metadata + extract_first_page separately.
|
||||||
pub fn analyze_book(path: &Path, format: BookFormat) -> Result<(i32, Vec<u8>)> {
|
/// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400).
|
||||||
|
pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||||
match format {
|
match format {
|
||||||
BookFormat::Cbz => analyze_cbz(path),
|
BookFormat::Cbz => analyze_cbz(path),
|
||||||
BookFormat::Cbr => analyze_cbr(path),
|
BookFormat::Cbr => analyze_cbr(path),
|
||||||
BookFormat::Pdf => analyze_pdf(path),
|
BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,7 +175,7 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec<u8>)> {
|
|||||||
image_names.push(entry.name().to_string());
|
image_names.push(entry.name().to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
image_names.sort();
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
|
||||||
let count = image_names.len() as i32;
|
let count = image_names.len() as i32;
|
||||||
let first_image = image_names.first().context("no images found in cbz")?;
|
let first_image = image_names.first().context("no images found in cbz")?;
|
||||||
@@ -198,12 +199,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
|
|||||||
|
|
||||||
if output.status.success() {
|
if output.status.success() {
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
let images: Vec<String> = stdout
|
let mut images: Vec<String> = stdout
|
||||||
.lines()
|
.lines()
|
||||||
.map(|l| l.trim().to_string())
|
.map(|l| l.trim().to_string())
|
||||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
||||||
.collect();
|
.collect();
|
||||||
if !images.is_empty() {
|
if !images.is_empty() {
|
||||||
|
images.sort_by(|a, b| natord::compare(a, b));
|
||||||
return Ok(images);
|
return Ok(images);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -223,12 +225,13 @@ fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
|
|||||||
|
|
||||||
let stdout = String::from_utf8_lossy(&lsar_output.stdout);
|
let stdout = String::from_utf8_lossy(&lsar_output.stdout);
|
||||||
// lsar output: first line is archive info, then one file per line (indented)
|
// lsar output: first line is archive info, then one file per line (indented)
|
||||||
let images: Vec<String> = stdout
|
let mut images: Vec<String> = stdout
|
||||||
.lines()
|
.lines()
|
||||||
.skip(1) // skip the archive header line
|
.skip(1) // skip the archive header line
|
||||||
.map(|l| l.trim().to_string())
|
.map(|l| l.trim().to_string())
|
||||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
||||||
.collect();
|
.collect();
|
||||||
|
images.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
|
||||||
Ok(images)
|
Ok(images)
|
||||||
}
|
}
|
||||||
@@ -281,9 +284,9 @@ fn looks_like_image(bytes: &[u8]) -> bool {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze_pdf(path: &Path) -> Result<(i32, Vec<u8>)> {
|
fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||||
let count = parse_pdf_page_count(path)?;
|
let count = parse_pdf_page_count(path)?;
|
||||||
let image_bytes = extract_pdf_first_page(path)?;
|
let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?;
|
||||||
Ok((count, image_bytes))
|
Ok((count, image_bytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -354,7 +357,7 @@ pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
|
|||||||
let first_name = image_names.into_iter().next().context("no images found in cbr")?;
|
let first_name = image_names.into_iter().next().context("no images found in cbr")?;
|
||||||
extract_cbr_first_page(path, &first_name)
|
extract_cbr_first_page(path, &first_name)
|
||||||
}
|
}
|
||||||
BookFormat::Pdf => extract_pdf_first_page(path),
|
BookFormat::Pdf => extract_pdf_first_page(path, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -371,7 +374,7 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
|
|||||||
image_names.push(entry.name().to_string());
|
image_names.push(entry.name().to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
image_names.sort();
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
|
||||||
let first_image = image_names.first().context("no images found in cbz")?;
|
let first_image = image_names.first().context("no images found in cbz")?;
|
||||||
|
|
||||||
@@ -383,26 +386,36 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
|
|||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> {
|
fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result<Vec<u8>> {
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
|
let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
|
||||||
std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?;
|
let extract_dir = work_dir.join("out");
|
||||||
|
std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?;
|
||||||
|
|
||||||
|
// unar constructs internal regexes from (archive_path + "/" + internal_path).
|
||||||
|
// Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause
|
||||||
|
// XADRegexException. Work around by giving unar a safe symlink name.
|
||||||
|
let safe_path = work_dir.join("archive.cbr");
|
||||||
|
if std::os::unix::fs::symlink(path, &safe_path).is_err() {
|
||||||
|
// Cross-filesystem fallback: copy (slower but safe)
|
||||||
|
std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?;
|
||||||
|
}
|
||||||
|
|
||||||
let output = std::process::Command::new("env")
|
let output = std::process::Command::new("env")
|
||||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
||||||
.arg(&tmp_dir)
|
.arg(&extract_dir)
|
||||||
.arg(path)
|
.arg(&safe_path)
|
||||||
.output()
|
.output()
|
||||||
.context("unar failed")?;
|
.context("unar failed")?;
|
||||||
|
|
||||||
if !output.status.success() {
|
if !output.status.success() {
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
let _ = std::fs::remove_dir_all(&work_dir);
|
||||||
return Err(anyhow::anyhow!(
|
return Err(anyhow::anyhow!(
|
||||||
"unar extract failed: {:?}",
|
"unar extract failed: {:?}",
|
||||||
String::from_utf8_lossy(&output.stderr)
|
String::from_utf8_lossy(&output.stderr)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
|
let mut image_files: Vec<_> = WalkDir::new(&extract_dir)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|e| e.ok())
|
.filter_map(|e| e.ok())
|
||||||
.filter(|e| {
|
.filter(|e| {
|
||||||
@@ -411,19 +424,21 @@ fn extract_cbr_first_page(path: &Path, first_name: &str) -> Result<Vec<u8>> {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
|
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
||||||
|
|
||||||
let first_image = image_files.first().context("no images found in cbr")?;
|
let first_image = image_files.first().context("no images found in cbr")?;
|
||||||
|
|
||||||
let data = std::fs::read(first_image.path())?;
|
let data = std::fs::read(first_image.path())?;
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
let _ = std::fs::remove_dir_all(&work_dir);
|
||||||
Ok(data)
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> {
|
fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result<Vec<u8>> {
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
|
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
|
||||||
std::fs::create_dir_all(&tmp_dir)?;
|
std::fs::create_dir_all(&tmp_dir)?;
|
||||||
let output_prefix = tmp_dir.join("page");
|
let output_prefix = tmp_dir.join("page");
|
||||||
|
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale };
|
||||||
|
let scale_str = scale.to_string();
|
||||||
|
|
||||||
let output = Command::new("pdftoppm")
|
let output = Command::new("pdftoppm")
|
||||||
.args([
|
.args([
|
||||||
@@ -432,7 +447,7 @@ fn extract_pdf_first_page(path: &Path) -> Result<Vec<u8>> {
|
|||||||
"-singlefile",
|
"-singlefile",
|
||||||
"-png",
|
"-png",
|
||||||
"-scale-to",
|
"-scale-to",
|
||||||
"800",
|
&scale_str,
|
||||||
path.to_str().unwrap(),
|
path.to_str().unwrap(),
|
||||||
output_prefix.to_str().unwrap(),
|
output_prefix.to_str().unwrap(),
|
||||||
])
|
])
|
||||||
@@ -511,7 +526,7 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
|||||||
is_image_name(&name)
|
is_image_name(&name)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
|
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
||||||
|
|
||||||
let image_count = image_files.len();
|
let image_count = image_files.len();
|
||||||
if image_count == 0 {
|
if image_count == 0 {
|
||||||
|
|||||||
Reference in New Issue
Block a user