Compare commits

..

2 Commits

Author SHA1 Message Date
f71ca92e85 chore: corriger whitespace et paths dans .env.example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 13:26:42 +01:00
7cca7e40c2 fix(parsers,api,indexer,backoffice): corriger CBZ Unicode extra fields, centraliser extraction, nettoyer Meili, fixer header
- Parsers: raw ZIP reader (flate2) contournant la validation CRC32 des
  Unicode extra fields (0x7075) qui bloquait certains CBZ
- Parsers: nouvelle API publique extract_page() pour extraire une page
  par index depuis CBZ/CBR/PDF avec fallbacks automatiques
- API: suppression du code d'extraction dupliqué, délégation à parsers::extract_page()
- API: retrait des dépendances directes zip/unrar/pdfium-render/natord
- Indexer: nettoyage Meili systématique à chaque sync (au lieu de ~10%)
  avec pagination pour supporter les grosses collections — corrige les
  doublons dans la recherche
- Indexer: retrait de la dépendance rand (plus utilisée)
- Backoffice: popin jobs rendue via createPortal avec positionnement
  dynamique — corrige le débordement desktop et le header cassé en mobile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 13:26:14 +01:00
9 changed files with 695 additions and 608 deletions

View File

@@ -46,11 +46,11 @@ LIBRARIES_ROOT_PATH=/libraries
# Path to libraries directory on host machine (for Docker volume mount) # Path to libraries directory on host machine (for Docker volume mount)
# Default: ../libraries (relative to infra/docker-compose.yml) # Default: ../libraries (relative to infra/docker-compose.yml)
# You can change this to an absolute path on your machine # You can change this to an absolute path on your machine
LIBRARIES_HOST_PATH=../libraries LIBRARIES_HOST_PATH=./libraries
# Path to thumbnails directory on host machine (for Docker volume mount) # Path to thumbnails directory on host machine (for Docker volume mount)
# Default: ../data/thumbnails (relative to infra/docker-compose.yml) # Default: ../data/thumbnails (relative to infra/docker-compose.yml)
THUMBNAILS_HOST_PATH=../data/thumbnails THUMBNAILS_HOST_PATH=./data/thumbnails
# ============================================================================= # =============================================================================
# Port Configuration # Port Configuration

210
Cargo.lock generated
View File

@@ -62,8 +62,7 @@ dependencies = [
"futures", "futures",
"image", "image",
"lru", "lru",
"natord", "parsers",
"pdfium-render",
"rand 0.8.5", "rand 0.8.5",
"reqwest", "reqwest",
"serde", "serde",
@@ -77,21 +76,10 @@ dependencies = [
"tower-http", "tower-http",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"unrar",
"utoipa", "utoipa",
"utoipa-swagger-ui", "utoipa-swagger-ui",
"uuid", "uuid",
"webp", "webp",
"zip 2.4.2",
]
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
] ]
[[package]] [[package]]
@@ -436,15 +424,6 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-deque" name = "crossbeam-deque"
version = "0.8.6" version = "0.8.6"
@@ -509,17 +488,6 @@ dependencies = [
"powerfmt", "powerfmt",
] ]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.7" version = "0.10.7"
@@ -549,6 +517,15 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "ecb"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7"
dependencies = [
"cipher",
]
[[package]] [[package]]
name = "either" name = "either"
version = "1.15.0" version = "1.15.0"
@@ -614,17 +591,6 @@ dependencies = [
"simd-adler32", "simd-adler32",
] ]
[[package]]
name = "filetime"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db"
dependencies = [
"cfg-if",
"libc",
"libredox",
]
[[package]] [[package]]
name = "find-msvc-tools" name = "find-msvc-tools"
version = "0.1.9" version = "0.1.9"
@@ -639,6 +605,7 @@ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [ dependencies = [
"crc32fast", "crc32fast",
"miniz_oxide", "miniz_oxide",
"zlib-rs",
] ]
[[package]] [[package]]
@@ -1179,7 +1146,6 @@ dependencies = [
"notify", "notify",
"num_cpus", "num_cpus",
"parsers", "parsers",
"rand 0.8.5",
"rayon", "rayon",
"reqwest", "reqwest",
"serde", "serde",
@@ -1209,11 +1175,11 @@ dependencies = [
[[package]] [[package]]
name = "inotify" name = "inotify"
version = "0.9.6" version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" checksum = "bd5b3eaf1a28b758ac0faa5a4254e8ab2705605496f1b1f3fbbc3988ad73d199"
dependencies = [ dependencies = [
"bitflags 1.3.2", "bitflags 2.11.0",
"inotify-sys", "inotify-sys",
"libc", "libc",
] ]
@@ -1268,6 +1234,47 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "jiff"
version = "0.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359"
dependencies = [
"jiff-static",
"jiff-tzdb-platform",
"log",
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.61.2",
]
[[package]]
name = "jiff-static"
version = "0.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "jiff-tzdb"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076"
[[package]]
name = "jiff-tzdb-platform"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
dependencies = [
"jiff-tzdb",
]
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.34" version = "0.1.34"
@@ -1400,25 +1407,33 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]] [[package]]
name = "lopdf" name = "lopdf"
version = "0.35.0" version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b" checksum = "f560f57dfb9142a02d673e137622fd515d4231e51feb8b4af28d92647d83f35b"
dependencies = [ dependencies = [
"aes", "aes",
"bitflags 2.11.0",
"cbc", "cbc",
"chrono", "chrono",
"ecb",
"encoding_rs", "encoding_rs",
"flate2", "flate2",
"getrandom 0.3.4",
"indexmap", "indexmap",
"itoa", "itoa",
"jiff",
"log", "log",
"md-5", "md-5",
"nom", "nom",
"nom_locate", "nom_locate",
"rand 0.9.2",
"rangemap", "rangemap",
"rayon", "rayon",
"sha2",
"stringprep",
"thiserror", "thiserror",
"time", "time",
"ttf-parser",
"weezl", "weezl",
] ]
@@ -1490,12 +1505,6 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.8.9" version = "0.8.9"
@@ -1506,18 +1515,6 @@ dependencies = [
"simd-adler32", "simd-adler32",
] ]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "mio" name = "mio"
version = "1.1.1" version = "1.1.1"
@@ -1525,6 +1522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
dependencies = [ dependencies = [
"libc", "libc",
"log",
"wasi", "wasi",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
@@ -1547,19 +1545,18 @@ checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [ dependencies = [
"memchr", "memchr",
"minimal-lexical",
] ]
[[package]] [[package]]
name = "nom_locate" name = "nom_locate"
version = "4.2.0" version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d"
dependencies = [ dependencies = [
"bytecount", "bytecount",
"memchr", "memchr",
@@ -1568,21 +1565,29 @@ dependencies = [
[[package]] [[package]]
name = "notify" name = "notify"
version = "6.1.1" version = "8.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"crossbeam-channel",
"filetime",
"fsevent-sys", "fsevent-sys",
"inotify", "inotify",
"kqueue", "kqueue",
"libc", "libc",
"log", "log",
"mio 0.8.11", "mio",
"notify-types",
"walkdir", "walkdir",
"windows-sys 0.48.0", "windows-sys 0.60.2",
]
[[package]]
name = "notify-types"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a"
dependencies = [
"bitflags 2.11.0",
] ]
[[package]] [[package]]
@@ -1696,13 +1701,14 @@ name = "parsers"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2",
"image", "image",
"lopdf", "lopdf",
"natord", "natord",
"pdfium-render", "pdfium-render",
"regex", "regex",
"unrar", "unrar",
"zip 2.4.2", "zip 8.2.0",
] ]
[[package]] [[package]]
@@ -1821,6 +1827,21 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "portable-atomic"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
[[package]]
name = "portable-atomic-util"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5"
dependencies = [
"portable-atomic",
]
[[package]] [[package]]
name = "potential_utf" name = "potential_utf"
version = "0.1.4" version = "0.1.4"
@@ -2824,7 +2845,7 @@ checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
dependencies = [ dependencies = [
"bytes", "bytes",
"libc", "libc",
"mio 1.1.1", "mio",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"socket2", "socket2",
@@ -2992,6 +3013,18 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "ttf-parser"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31"
[[package]]
name = "typed-path"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e"
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.19.0" version = "1.19.0"
@@ -3907,21 +3940,24 @@ dependencies = [
[[package]] [[package]]
name = "zip" name = "zip"
version = "2.4.2" version = "8.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" checksum = "b680f2a0cd479b4cff6e1233c483fdead418106eae419dc60200ae9850f6d004"
dependencies = [ dependencies = [
"arbitrary",
"crc32fast", "crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2", "flate2",
"indexmap", "indexmap",
"memchr", "memchr",
"thiserror", "typed-path",
"zopfli", "zopfli",
] ]
[[package]]
name = "zlib-rs"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
[[package]] [[package]]
name = "zmij" name = "zmij"
version = "1.0.21" version = "1.0.21"

View File

@@ -15,6 +15,7 @@ futures = "0.3"
image.workspace = true image.workspace = true
lru.workspace = true lru.workspace = true
stripstream-core = { path = "../../crates/core" } stripstream-core = { path = "../../crates/core" }
parsers = { path = "../../crates/parsers" }
rand.workspace = true rand.workspace = true
tokio-stream = "0.1" tokio-stream = "0.1"
reqwest.workspace = true reqwest.workspace = true
@@ -28,10 +29,6 @@ tower-http = { version = "0.6", features = ["cors"] }
tracing.workspace = true tracing.workspace = true
tracing-subscriber.workspace = true tracing-subscriber.workspace = true
uuid.workspace = true uuid.workspace = true
natord.workspace = true
pdfium-render.workspace = true
unrar.workspace = true
zip = { version = "8", default-features = false, features = ["deflate"] }
utoipa.workspace = true utoipa.workspace = true
utoipa-swagger-ui = { workspace = true, features = ["axum"] } utoipa-swagger-ui = { workspace = true, features = ["axum"] }
webp.workspace = true webp.workspace = true

View File

@@ -1,5 +1,5 @@
use std::{ use std::{
io::{Read, Write}, io::Write,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::{atomic::Ordering, Arc}, sync::{atomic::Ordering, Arc},
time::Duration, time::Duration,
@@ -351,241 +351,28 @@ fn render_page(
width: u32, width: u32,
filter: image::imageops::FilterType, filter: image::imageops::FilterType,
) -> Result<Vec<u8>, ApiError> { ) -> Result<Vec<u8>, ApiError> {
let page_bytes = match input_format { let format = match input_format {
"cbz" => extract_cbz_page(abs_path, page_number, true)?, "cbz" => parsers::BookFormat::Cbz,
"cbr" => extract_cbr_page(abs_path, page_number, true)?, "cbr" => parsers::BookFormat::Cbr,
"pdf" => render_pdf_page(abs_path, page_number, width)?, "pdf" => parsers::BookFormat::Pdf,
_ => return Err(ApiError::bad_request("unsupported source format")), _ => return Err(ApiError::bad_request("unsupported source format")),
}; };
let pdf_render_width = if width > 0 { width } else { 1200 };
let page_bytes = parsers::extract_page(
std::path::Path::new(abs_path),
format,
page_number,
pdf_render_width,
)
.map_err(|e| {
error!("Failed to extract page {} from {}: {}", page_number, abs_path, e);
ApiError::internal(format!("page extraction failed: {e}"))
})?;
transcode_image(&page_bytes, out_format, quality, width, filter) transcode_image(&page_bytes, out_format, quality, width, filter)
} }
fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
debug!("Opening CBZ archive: {}", abs_path);
let file = std::fs::File::open(abs_path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ApiError::not_found("book file not accessible")
} else {
error!("Cannot open CBZ file {}: {}", abs_path, e);
ApiError::internal(format!("cannot open cbz: {e}"))
}
})?;
let mut archive = match zip::ZipArchive::new(file) {
Ok(a) => a,
Err(zip_err) => {
if allow_fallback {
// Try RAR fallback (file might be a RAR with .cbz extension)
if let Ok(data) = extract_cbr_page(abs_path, page_number, false) {
return Ok(data);
}
// Streaming fallback: read local file headers without central directory
warn!("CBZ central dir failed for {}, trying streaming: {}", abs_path, zip_err);
return extract_cbz_page_streaming(abs_path, page_number);
}
error!("Invalid CBZ archive {}: {}", abs_path, zip_err);
return Err(ApiError::internal(format!("invalid cbz: {zip_err}")));
}
};
let mut image_names: Vec<String> = Vec::new();
for i in 0..archive.len() {
let entry = match archive.by_index(i) {
Ok(e) => e,
Err(e) => {
warn!("Skipping corrupted CBZ entry {} in {}: {}", i, abs_path, e);
continue;
}
};
let name = entry.name().to_ascii_lowercase();
if is_image_name(&name) {
image_names.push(entry.name().to_string());
}
}
image_names.sort_by(|a, b| natord::compare(a, b));
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
let index = page_number as usize - 1;
let selected = image_names.get(index).ok_or_else(|| {
error!("Page {} out of range in {} (total: {})", page_number, abs_path, image_names.len());
ApiError::not_found("page out of range")
})?;
debug!("Extracting page {} ({}) from {}", page_number, selected, abs_path);
let mut entry = archive.by_name(selected).map_err(|e| {
error!("Failed to read CBZ page {} from {}: {}", selected, abs_path, e);
ApiError::internal(format!("cbz page read failed: {e}"))
})?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf).map_err(|e| {
error!("Failed to load CBZ page {} from {}: {}", selected, abs_path, e);
ApiError::internal(format!("cbz page load failed: {e}"))
})?;
Ok(buf)
}
fn extract_cbz_page_streaming(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
let file = std::fs::File::open(abs_path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ApiError::not_found("book file not accessible")
} else {
ApiError::internal(format!("cannot open cbz: {e}"))
}
})?;
let mut reader = std::io::BufReader::new(file);
let mut image_names: Vec<String> = Vec::new();
loop {
match zip::read::read_zipfile_from_stream(&mut reader) {
Ok(Some(mut entry)) => {
let name = entry.name().to_string();
if is_image_name(&name.to_ascii_lowercase()) {
image_names.push(name);
}
std::io::copy(&mut entry, &mut std::io::sink())
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
}
Ok(None) => break,
Err(_) => {
if !image_names.is_empty() {
break;
}
return Err(ApiError::internal("cbz streaming read failed".to_string()));
}
}
}
image_names.sort_by(|a, b| natord::compare(a, b));
let target = image_names
.get(page_number as usize - 1)
.ok_or_else(|| ApiError::not_found("page out of range"))?
.clone();
// Second pass: extract the target page
let file2 = std::fs::File::open(abs_path)
.map_err(|e| ApiError::internal(format!("cannot reopen cbz: {e}")))?;
let mut reader2 = std::io::BufReader::new(file2);
loop {
match zip::read::read_zipfile_from_stream(&mut reader2) {
Ok(Some(mut entry)) => {
if entry.name() == target {
let mut buf = Vec::new();
entry
.read_to_end(&mut buf)
.map_err(|e| ApiError::internal(format!("cbz stream read: {e}")))?;
return Ok(buf);
}
std::io::copy(&mut entry, &mut std::io::sink())
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
}
Ok(None) => break,
Err(_) => break,
}
}
Err(ApiError::not_found("page not found in archive"))
}
fn extract_cbr_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
info!("Opening CBR archive: {}", abs_path);
let index = page_number as usize - 1;
// Pass 1: list all image names (in-process, no subprocess)
let mut image_names: Vec<String> = {
let archive = match unrar::Archive::new(abs_path).open_for_listing() {
Ok(a) => a,
Err(e) => {
if allow_fallback {
warn!("CBR open failed for {}, trying ZIP fallback: {}", abs_path, e);
return extract_cbz_page(abs_path, page_number, false);
}
return Err(ApiError::internal(format!("unrar listing failed: {}", e)));
}
};
let mut names = Vec::new();
for entry in archive {
let entry = entry.map_err(|e| ApiError::internal(format!("unrar entry error: {}", e)))?;
let name = entry.filename.to_string_lossy().to_string();
if is_image_name(&name.to_ascii_lowercase()) {
names.push(name);
}
}
names
};
image_names.sort_by(|a, b| natord::compare(a, b));
let target = image_names
.get(index)
.ok_or_else(|| {
error!("Page {} out of range (total: {})", page_number, image_names.len());
ApiError::not_found("page out of range")
})?
.clone();
// Pass 2: extract only the target page to memory
let mut archive = unrar::Archive::new(abs_path)
.open_for_processing()
.map_err(|e| ApiError::internal(format!("unrar processing failed: {}", e)))?;
while let Some(header) = archive
.read_header()
.map_err(|e| ApiError::internal(format!("unrar read header: {}", e)))?
{
let entry_name = header.entry().filename.to_string_lossy().to_string();
if entry_name == target {
let (data, _) = header
.read()
.map_err(|e| ApiError::internal(format!("unrar read: {}", e)))?;
info!("Extracted CBR page {} ({} bytes)", page_number, data.len());
return Ok(data);
}
archive = header
.skip()
.map_err(|e| ApiError::internal(format!("unrar skip: {}", e)))?;
}
Err(ApiError::not_found("page not found in archive"))
}
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
use pdfium_render::prelude::*;
debug!("Rendering PDF page {} of {} (width: {})", page_number, abs_path, width);
let pdfium = Pdfium::new(
Pdfium::bind_to_system_library()
.map_err(|e| ApiError::internal(format!("pdfium not available: {:?}", e)))?,
);
let document = pdfium
.load_pdf_from_file(abs_path, None)
.map_err(|e| ApiError::internal(format!("pdf load failed: {:?}", e)))?;
let page_index = (page_number - 1) as u16;
let page = document
.pages()
.get(page_index)
.map_err(|_| ApiError::not_found("page out of range"))?;
let render_width = if width > 0 { width as i32 } else { 1200 };
let config = PdfRenderConfig::new().set_target_width(render_width);
let bitmap = page
.render_with_config(&config)
.map_err(|e| ApiError::internal(format!("pdf render failed: {:?}", e)))?;
let image = bitmap.as_image();
let mut buf = std::io::Cursor::new(Vec::new());
image
.write_to(&mut buf, image::ImageFormat::Png)
.map_err(|e| ApiError::internal(format!("png encode failed: {}", e)))?;
debug!("Rendered PDF page {} ({} bytes)", page_number, buf.get_ref().len());
Ok(buf.into_inner())
}
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> { fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
debug!("Transcoding image: {} bytes, format: {:?}, quality: {}, width: {}", input.len(), out_format, quality, width); debug!("Transcoding image: {} bytes, format: {:?}, quality: {}, width: {}", input.len(), out_format, quality, width);
@@ -650,20 +437,3 @@ fn format_matches(source: &ImageFormat, target: &OutputFormat) -> bool {
) )
} }
fn is_image_name(name: &str) -> bool {
let lower = name.to_lowercase();
lower.ends_with(".jpg")
|| lower.ends_with(".jpeg")
|| lower.ends_with(".png")
|| lower.ends_with(".webp")
|| lower.ends_with(".avif")
|| lower.ends_with(".gif")
|| lower.ends_with(".tif")
|| lower.ends_with(".tiff")
|| lower.ends_with(".bmp")
}
#[allow(dead_code)]
fn _is_absolute_path(value: &str) -> bool {
Path::new(value).is_absolute()
}

View File

@@ -1,8 +1,8 @@
"use client"; "use client";
import { useEffect, useState, useRef } from "react"; import { useEffect, useState, useRef, useCallback } from "react";
import { createPortal } from "react-dom";
import Link from "next/link"; import Link from "next/link";
import { Button } from "./ui/Button";
import { Badge } from "./ui/Badge"; import { Badge } from "./ui/Badge";
import { ProgressBar } from "./ui/ProgressBar"; import { ProgressBar } from "./ui/ProgressBar";
@@ -46,7 +46,9 @@ const ChevronIcon = ({ className }: { className?: string }) => (
export function JobsIndicator() { export function JobsIndicator() {
const [activeJobs, setActiveJobs] = useState<Job[]>([]); const [activeJobs, setActiveJobs] = useState<Job[]>([]);
const [isOpen, setIsOpen] = useState(false); const [isOpen, setIsOpen] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null); const buttonRef = useRef<HTMLButtonElement>(null);
const popinRef = useRef<HTMLDivElement>(null);
const [popinStyle, setPopinStyle] = useState<React.CSSProperties>({});
useEffect(() => { useEffect(() => {
const fetchActiveJobs = async () => { const fetchActiveJobs = async () => {
@@ -66,23 +68,72 @@ export function JobsIndicator() {
return () => clearInterval(interval); return () => clearInterval(interval);
}, []); }, []);
// Close dropdown when clicking outside // Position the popin relative to the button
const updatePosition = useCallback(() => {
if (!buttonRef.current) return;
const rect = buttonRef.current.getBoundingClientRect();
const isMobile = window.innerWidth < 640;
if (isMobile) {
setPopinStyle({
position: "fixed",
top: `${rect.bottom + 8}px`,
left: "12px",
right: "12px",
});
} else {
// Align right edge of popin with right edge of button
const rightEdge = window.innerWidth - rect.right;
setPopinStyle({
position: "fixed",
top: `${rect.bottom + 8}px`,
right: `${Math.max(rightEdge, 12)}px`,
width: "384px", // w-96
});
}
}, []);
useEffect(() => { useEffect(() => {
if (!isOpen) return;
updatePosition();
window.addEventListener("resize", updatePosition);
window.addEventListener("scroll", updatePosition, true);
return () => {
window.removeEventListener("resize", updatePosition);
window.removeEventListener("scroll", updatePosition, true);
};
}, [isOpen, updatePosition]);
// Close when clicking outside
useEffect(() => {
if (!isOpen) return;
const handleClickOutside = (event: MouseEvent) => { const handleClickOutside = (event: MouseEvent) => {
if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) { const target = event.target as Node;
if (
buttonRef.current && !buttonRef.current.contains(target) &&
popinRef.current && !popinRef.current.contains(target)
) {
setIsOpen(false); setIsOpen(false);
} }
}; };
document.addEventListener("mousedown", handleClickOutside); document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside); return () => document.removeEventListener("mousedown", handleClickOutside);
}, []); }, [isOpen]);
// Close on Escape
useEffect(() => {
if (!isOpen) return;
const handleEsc = (e: KeyboardEvent) => {
if (e.key === "Escape") setIsOpen(false);
};
document.addEventListener("keydown", handleEsc);
return () => document.removeEventListener("keydown", handleEsc);
}, [isOpen]);
const runningJobs = activeJobs.filter(j => j.status === "running" || j.status === "extracting_pages" || j.status === "generating_thumbnails"); const runningJobs = activeJobs.filter(j => j.status === "running" || j.status === "extracting_pages" || j.status === "generating_thumbnails");
const pendingJobs = activeJobs.filter(j => j.status === "pending"); const pendingJobs = activeJobs.filter(j => j.status === "pending");
const totalCount = activeJobs.length; const totalCount = activeJobs.length;
// Calculate overall progress
const totalProgress = runningJobs.reduce((acc, job) => { const totalProgress = runningJobs.reduce((acc, job) => {
return acc + (job.progress_percent || 0); return acc + (job.progress_percent || 0);
}, 0) / (runningJobs.length || 1); }, 0) / (runningJobs.length || 1);
@@ -107,69 +158,29 @@ export function JobsIndicator() {
); );
} }
return ( const popin = isOpen && (
<div className="relative" ref={dropdownRef}> <>
<button {/* Mobile backdrop */}
className={`
flex items-center gap-2
px-3 py-2
rounded-md
font-medium text-sm
transition-all duration-200
${runningJobs.length > 0
? 'bg-success/10 text-success hover:bg-success/20'
: 'bg-warning/10 text-warning hover:bg-warning/20'
}
${isOpen ? 'ring-2 ring-ring ring-offset-2 ring-offset-background' : ''}
`}
onClick={() => setIsOpen(!isOpen)}
title={`${totalCount} active job${totalCount !== 1 ? 's' : ''}`}
>
{/* Animated spinner for running jobs */}
{runningJobs.length > 0 && (
<div className="w-4 h-4 animate-spin">
<SpinnerIcon className="w-4 h-4" />
</div>
)}
{/* Icon */}
<JobsIcon className="w-4 h-4" />
{/* Badge with count */}
<span className="flex items-center justify-center min-w-5 h-5 px-1.5 text-xs font-bold bg-current rounded-full">
<span className="text-background">{totalCount > 99 ? "99+" : totalCount}</span>
</span>
{/* Chevron */}
<ChevronIcon
className={`w-4 h-4 transition-transform duration-200 ${isOpen ? 'rotate-180' : ''}`}
/>
</button>
{/* Backdrop mobile */}
{isOpen && (
<div <div
className="fixed inset-0 z-40 sm:hidden bg-background/60 backdrop-blur-sm" className="fixed inset-0 z-[80] sm:hidden bg-background/60 backdrop-blur-sm"
onClick={() => setIsOpen(false)} onClick={() => setIsOpen(false)}
aria-hidden="true" aria-hidden="true"
/> />
)}
{/* Popin/Dropdown with glassmorphism */} {/* Popin */}
{isOpen && ( <div
<div className=" ref={popinRef}
fixed sm:absolute style={popinStyle}
inset-x-3 sm:inset-x-auto className="
top-[4.5rem] sm:top-full sm:mt-2 z-[90]
sm:w-96
bg-popover/95 backdrop-blur-md bg-popover/95 backdrop-blur-md
rounded-xl rounded-xl
shadow-elevation-2 shadow-elevation-2
border border-border/60 border border-border/60
overflow-hidden overflow-hidden
z-50 animate-fade-in
animate-scale-in "
"> >
{/* Header */} {/* Header */}
<div className="flex items-center justify-between px-4 py-3 border-b border-border/60 bg-muted/50"> <div className="flex items-center justify-between px-4 py-3 border-b border-border/60 bg-muted/50">
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
@@ -269,8 +280,51 @@ export function JobsIndicator() {
<p className="text-xs text-muted-foreground text-center">Auto-refreshing every 2s</p> <p className="text-xs text-muted-foreground text-center">Auto-refreshing every 2s</p>
</div> </div>
</div> </div>
)} </>
);
return (
<>
<button
ref={buttonRef}
className={`
flex items-center gap-1.5
p-2 sm:px-3 sm:py-2
rounded-md
font-medium text-sm
transition-all duration-200
${runningJobs.length > 0
? 'bg-success/10 text-success hover:bg-success/20'
: 'bg-warning/10 text-warning hover:bg-warning/20'
}
${isOpen ? 'ring-2 ring-ring ring-offset-2 ring-offset-background' : ''}
`}
onClick={() => setIsOpen(!isOpen)}
title={`${totalCount} active job${totalCount !== 1 ? 's' : ''}`}
>
{/* Animated spinner for running jobs */}
{runningJobs.length > 0 && (
<div className="w-4 h-4 animate-spin">
<SpinnerIcon className="w-4 h-4" />
</div> </div>
)}
{/* Icon */}
<JobsIcon className="w-4 h-4" />
{/* Badge with count */}
<span className="flex items-center justify-center min-w-5 h-5 px-1.5 text-xs font-bold bg-current rounded-full">
<span className="text-background">{totalCount > 99 ? "99+" : totalCount}</span>
</span>
{/* Chevron - hidden on small screens */}
<ChevronIcon
className={`w-4 h-4 hidden sm:block transition-transform duration-200 ${isOpen ? 'rotate-180' : ''}`}
/>
</button>
{typeof document !== "undefined" && createPortal(popin, document.body)}
</>
); );
} }

View File

@@ -15,7 +15,6 @@ image.workspace = true
notify = "8" notify = "8"
num_cpus.workspace = true num_cpus.workspace = true
parsers = { path = "../../crates/parsers" } parsers = { path = "../../crates/parsers" }
rand.workspace = true
rayon.workspace = true rayon.workspace = true
reqwest.workspace = true reqwest.workspace = true
serde.workspace = true serde.workspace = true

View File

@@ -117,44 +117,57 @@ pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str)
} }
} }
// Handle deletions: get all book IDs from DB and remove from MeiliSearch any that don't exist // Clean up stale documents: remove from Meilisearch any IDs that no longer exist in DB.
// This is expensive, so we only do it periodically (every 10 syncs) or on full syncs // Runs on every sync — the cost is minimal (single fetch of IDs only).
if is_full_sync || rand::random::<u8>() < 26 { // ~10% chance {
info!("[MEILI] Checking for documents to delete");
// Get all book IDs from database
let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books") let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books")
.fetch_all(pool) .fetch_all(pool)
.await?; .await?;
// Get all document IDs from MeiliSearch (this requires fetching all documents) // Fetch all document IDs from Meilisearch (paginated to handle large collections)
// For efficiency, we'll just delete by query for documents that might be stale let mut meili_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
// A better approach would be to track deletions in a separate table let mut offset: usize = 0;
const PAGE_SIZE: usize = 10000;
// For now, we'll do a simple approach: fetch all Meili docs and compare loop {
// Note: This could be slow for large collections let response = client
let meili_response = client
.post(format!("{base}/indexes/books/documents/fetch")) .post(format!("{base}/indexes/books/documents/fetch"))
.header("Authorization", format!("Bearer {meili_master_key}")) .header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({ .json(&serde_json::json!({
"fields": ["id"], "fields": ["id"],
"limit": 100000 "limit": PAGE_SIZE,
"offset": offset
})) }))
.send() .send()
.await; .await;
if let Ok(response) = meili_response { let response = match response {
if response.status().is_success() { Ok(r) if r.status().is_success() => r,
// Meilisearch returns { "results": [...], "offset": ..., "total": ... } _ => break,
if let Ok(payload) = response.json::<serde_json::Value>().await { };
let docs = payload.get("results")
let payload: serde_json::Value = match response.json().await {
Ok(v) => v,
Err(_) => break,
};
let results = payload.get("results")
.and_then(|v| v.as_array()) .and_then(|v| v.as_array())
.cloned() .cloned()
.unwrap_or_default(); .unwrap_or_default();
let meili_ids: std::collections::HashSet<String> = docs
.into_iter() let page_count = results.len();
.filter_map(|doc| doc.get("id").and_then(|id| id.as_str()).map(|s| s.to_string())) for doc in results {
.collect(); if let Some(id) = doc.get("id").and_then(|v| v.as_str()) {
meili_ids.insert(id.to_string());
}
}
if page_count < PAGE_SIZE {
break; // Last page
}
offset += PAGE_SIZE;
}
let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect(); let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect(); let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
@@ -169,9 +182,6 @@ pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str)
.await; .await;
} }
} }
}
}
}
// Update last sync timestamp // Update last sync timestamp
sqlx::query( sqlx::query(

View File

@@ -13,3 +13,4 @@ pdfium-render.workspace = true
regex = "1" regex = "1"
unrar.workspace = true unrar.workspace = true
zip = { version = "8", default-features = false, features = ["deflate"] } zip = { version = "8", default-features = false, features = ["deflate"] }
flate2 = "1"

View File

@@ -210,44 +210,114 @@ fn analyze_cbz(path: &Path, allow_fallback: bool) -> Result<(i32, Vec<u8>)> {
Err(anyhow::anyhow!("all entries unreadable in cbz: {}", path.display())) Err(anyhow::anyhow!("all entries unreadable in cbz: {}", path.display()))
} }
/// Fallback for ZIP files whose central directory can't be parsed (e.g. NTFS extra fields). // ---------------------------------------------------------------------------
/// Reads local file headers sequentially without relying on the central directory. // Raw ZIP reader — bypasses extra field validation (CRC32 on Unicode path, NTFS, etc.)
fn analyze_cbz_streaming(path: &Path) -> Result<(i32, Vec<u8>)> { // ---------------------------------------------------------------------------
let file = std::fs::File::open(path)
.with_context(|| format!("cannot open cbz for streaming: {}", path.display()))?;
let mut reader = std::io::BufReader::new(file);
let mut all_images: Vec<(String, Vec<u8>)> = Vec::new(); /// Information about a ZIP local file entry (parsed from raw headers).
struct RawZipEntry {
name: String,
compression: u16,
compressed_size: u64,
uncompressed_size: u64,
/// File offset of the compressed data (right after name + extra field).
data_offset: u64,
}
/// Scan local file headers and return metadata for all entries.
/// Does NOT read file data — only collects names and offsets.
fn raw_zip_list_entries(path: &Path) -> Result<Vec<RawZipEntry>> {
use std::io::{BufReader, Seek, SeekFrom};
let file = std::fs::File::open(path)
.with_context(|| format!("cannot open zip: {}", path.display()))?;
let mut reader = BufReader::new(file);
let mut entries = Vec::new();
loop { loop {
match zip::read::read_zipfile_from_stream(&mut reader) { let mut sig = [0u8; 4];
Ok(Some(mut entry)) => { if reader.read_exact(&mut sig).is_err() {
let name = entry.name().to_string(); break;
if is_image_name(&name.to_ascii_lowercase()) {
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
all_images.push((name, buf));
} else {
std::io::copy(&mut entry, &mut std::io::sink())?;
} }
if u32::from_le_bytes(sig) != 0x04034b50 {
break;
} }
Ok(None) => break,
Err(_) => { let mut hdr = [0u8; 26];
if !all_images.is_empty() { reader.read_exact(&mut hdr).context("truncated local file header")?;
break; // Partial read — use what we have
} let compression = u16::from_le_bytes([hdr[4], hdr[5]]);
return Err(anyhow::anyhow!("streaming ZIP read failed for {}", path.display())); let compressed_size = u32::from_le_bytes([hdr[14], hdr[15], hdr[16], hdr[17]]) as u64;
let uncompressed_size = u32::from_le_bytes([hdr[18], hdr[19], hdr[20], hdr[21]]) as u64;
let name_len = u16::from_le_bytes([hdr[22], hdr[23]]) as u64;
let extra_len = u16::from_le_bytes([hdr[24], hdr[25]]) as u64;
let mut name_buf = vec![0u8; name_len as usize];
reader.read_exact(&mut name_buf)?;
let name = String::from_utf8_lossy(&name_buf).to_string();
// Skip extra field entirely
if extra_len > 0 {
reader.seek(SeekFrom::Current(extra_len as i64))?;
} }
let data_offset = reader.stream_position()?;
entries.push(RawZipEntry {
name,
compression,
compressed_size,
uncompressed_size,
data_offset,
});
// Skip file data
if compressed_size > 0 {
reader.seek(SeekFrom::Current(compressed_size as i64))?;
} }
} }
if all_images.is_empty() { Ok(entries)
}
/// Read and decompress the data for a single entry.
fn raw_zip_read_entry(path: &Path, entry: &RawZipEntry) -> Result<Vec<u8>> {
use std::io::{BufReader, Seek, SeekFrom};
let file = std::fs::File::open(path)?;
let mut reader = BufReader::new(file);
reader.seek(SeekFrom::Start(entry.data_offset))?;
let mut compressed = vec![0u8; entry.compressed_size as usize];
reader.read_exact(&mut compressed)?;
match entry.compression {
0 => Ok(compressed),
8 => {
let mut decoder = flate2::read::DeflateDecoder::new(&compressed[..]);
let mut decompressed = Vec::with_capacity(entry.uncompressed_size as usize);
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
other => Err(anyhow::anyhow!("unsupported zip compression method: {}", other)),
}
}
/// Fallback: list image names + extract all images (for analyze_book which needs first page + count).
fn analyze_cbz_streaming(path: &Path) -> Result<(i32, Vec<u8>)> {
let entries = raw_zip_list_entries(path)?;
let mut image_entries: Vec<&RawZipEntry> = entries
.iter()
.filter(|e| is_image_name(&e.name.to_ascii_lowercase()))
.collect();
if image_entries.is_empty() {
return Err(anyhow::anyhow!("no images found in streaming cbz: {}", path.display())); return Err(anyhow::anyhow!("no images found in streaming cbz: {}", path.display()));
} }
all_images.sort_by(|(a, _), (b, _)| natord::compare(a, b)); image_entries.sort_by(|a, b| natord::compare(&a.name, &b.name));
let count = all_images.len() as i32; let count = image_entries.len() as i32;
let (_, first_bytes) = all_images.remove(0); let first_bytes = raw_zip_read_entry(path, image_entries[0])?;
Ok((count, first_bytes)) Ok((count, first_bytes))
} }
@@ -366,7 +436,8 @@ fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
fn parse_cbz_page_count(path: &Path) -> Result<i32> { fn parse_cbz_page_count(path: &Path) -> Result<i32> {
let file = std::fs::File::open(path) let file = std::fs::File::open(path)
.with_context(|| format!("cannot open cbz: {}", path.display()))?; .with_context(|| format!("cannot open cbz: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file).context("invalid cbz archive")?; match zip::ZipArchive::new(file) {
Ok(mut archive) => {
let mut count: i32 = 0; let mut count: i32 = 0;
for i in 0..archive.len() { for i in 0..archive.len() {
let entry = archive.by_index(i).context("cannot read cbz entry")?; let entry = archive.by_index(i).context("cannot read cbz entry")?;
@@ -376,6 +447,21 @@ fn parse_cbz_page_count(path: &Path) -> Result<i32> {
} }
} }
Ok(count) Ok(count)
}
Err(_) => {
// Fallback: streaming count (bypasses extra field validation)
parse_cbz_page_count_streaming(path)
}
}
}
fn parse_cbz_page_count_streaming(path: &Path) -> Result<i32> {
let entries = raw_zip_list_entries(path)?;
let count = entries
.iter()
.filter(|e| is_image_name(&e.name.to_ascii_lowercase()))
.count() as i32;
Ok(count)
} }
fn parse_cbr_page_count(path: &Path) -> Result<i32> { fn parse_cbr_page_count(path: &Path) -> Result<i32> {
@@ -422,21 +508,38 @@ fn is_image_name(name: &str) -> bool {
} }
pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> { pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
extract_page(path, format, 1, 0)
}
/// Extract a specific page (1-based index) from a book archive.
/// `pdf_render_width`: max width for PDF rasterization; 0 means use default (1200).
pub fn extract_page(path: &Path, format: BookFormat, page_number: u32, pdf_render_width: u32) -> Result<Vec<u8>> {
if page_number == 0 {
return Err(anyhow::anyhow!("page index starts at 1"));
}
match format { match format {
BookFormat::Cbz => extract_cbz_first_page(path), BookFormat::Cbz => extract_cbz_page(path, page_number, true),
BookFormat::Cbr => analyze_cbr(path, true).map(|(_, bytes)| bytes), BookFormat::Cbr => extract_cbr_page(path, page_number, true),
BookFormat::Pdf => analyze_pdf(path, 0).map(|(_, bytes)| bytes), BookFormat::Pdf => {
let width = if pdf_render_width == 0 { 1200 } else { pdf_render_width };
render_pdf_page_n(path, page_number, width)
}
} }
} }
fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> { fn extract_cbz_page(path: &Path, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>> {
let file = std::fs::File::open(path) let file = std::fs::File::open(path)
.with_context(|| format!("cannot open cbz: {}", path.display()))?; .with_context(|| format!("cannot open cbz: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file).context("invalid cbz archive")?; let index = page_number as usize - 1;
match zip::ZipArchive::new(file) {
Ok(mut archive) => {
let mut image_names: Vec<String> = Vec::new(); let mut image_names: Vec<String> = Vec::new();
for i in 0..archive.len() { for i in 0..archive.len() {
let entry = archive.by_index(i).context("cannot read cbz entry")?; let entry = match archive.by_index(i) {
Ok(e) => e,
Err(_) => continue,
};
let name = entry.name().to_ascii_lowercase(); let name = entry.name().to_ascii_lowercase();
if is_image_name(&name) { if is_image_name(&name) {
image_names.push(entry.name().to_string()); image_names.push(entry.name().to_string());
@@ -444,16 +547,133 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
} }
image_names.sort_by(|a, b| natord::compare(a, b)); image_names.sort_by(|a, b| natord::compare(a, b));
let first_image = image_names.first().context("no images found in cbz")?; let selected = image_names
.get(index)
.with_context(|| format!("page {} out of range (total: {})", page_number, image_names.len()))?;
let mut entry = archive let mut entry = archive.by_name(selected)
.by_name(first_image) .with_context(|| format!("cannot read page {}", selected))?;
.context("cannot read first image")?;
let mut buf = Vec::new(); let mut buf = Vec::new();
entry.read_to_end(&mut buf)?; entry.read_to_end(&mut buf)?;
Ok(buf) Ok(buf)
}
Err(zip_err) => {
if allow_fallback {
// Try RAR fallback (file might be a RAR with .cbz extension)
if let Ok(data) = extract_cbr_page(path, page_number, false) {
return Ok(data);
}
// Raw ZIP fallback (bypasses extra field validation)
return extract_cbz_page_raw(path, page_number);
}
Err(anyhow::anyhow!("invalid cbz archive for {}: {}", path.display(), zip_err))
}
}
} }
fn extract_cbz_page_raw(path: &Path, page_number: u32) -> Result<Vec<u8>> {
let entries = raw_zip_list_entries(path)?;
let mut image_entries: Vec<&RawZipEntry> = entries
.iter()
.filter(|e| is_image_name(&e.name.to_ascii_lowercase()))
.collect();
image_entries.sort_by(|a, b| natord::compare(&a.name, &b.name));
let index = page_number as usize - 1;
let entry = image_entries
.get(index)
.with_context(|| format!("page {} out of range (total: {})", page_number, image_entries.len()))?;
raw_zip_read_entry(path, entry)
}
fn extract_cbr_page(path: &Path, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>> {
let index = page_number as usize - 1;
let mut image_names: Vec<String> = {
let archive = match unrar::Archive::new(path).open_for_listing() {
Ok(a) => a,
Err(e) => {
if allow_fallback {
return extract_cbz_page(path, page_number, false);
}
return Err(anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e));
}
};
let mut names = Vec::new();
for entry in archive {
let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?;
let name = entry.filename.to_string_lossy().to_string();
if is_image_name(&name.to_ascii_lowercase()) {
names.push(name);
}
}
names
};
image_names.sort_by(|a, b| natord::compare(a, b));
let target = image_names
.get(index)
.with_context(|| format!("page {} out of range (total: {})", page_number, image_names.len()))?
.clone();
let mut archive = unrar::Archive::new(path)
.open_for_processing()
.map_err(|e| anyhow::anyhow!("unrar open for processing failed: {}", e))?;
while let Some(header) = archive
.read_header()
.map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
{
let entry_name = header.entry().filename.to_string_lossy().to_string();
if entry_name == target {
let (data, _) = header
.read()
.map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?;
return Ok(data);
}
archive = header
.skip()
.map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
}
Err(anyhow::anyhow!("page '{}' not found in {}", target, path.display()))
}
fn render_pdf_page_n(path: &Path, page_number: u32, width: u32) -> Result<Vec<u8>> {
use pdfium_render::prelude::*;
let pdfium = Pdfium::new(
Pdfium::bind_to_system_library()
.map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?,
);
let document = pdfium
.load_pdf_from_file(path, None)
.map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?;
let page_index = (page_number - 1) as u16;
let page = document
.pages()
.get(page_index)
.map_err(|_| anyhow::anyhow!("page {} out of range in {}", page_number, path.display()))?;
let config = PdfRenderConfig::new().set_target_width(width as i32);
let bitmap = page
.render_with_config(&config)
.map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?;
let image = bitmap.as_image();
let mut buf = std::io::Cursor::new(Vec::new());
image
.write_to(&mut buf, image::ImageFormat::Png)
.context("failed to encode rendered PDF page as PNG")?;
Ok(buf.into_inner())
}
/// Convert a CBR file to CBZ in-place (same directory, same stem). /// Convert a CBR file to CBZ in-place (same directory, same stem).
/// ///
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then /// The conversion is safe: a `.cbz.tmp` file is written first, verified, then