diff --git a/apps/api/src/pages.rs b/apps/api/src/pages.rs index 7b33ceb..53e8059 100644 --- a/apps/api/src/pages.rs +++ b/apps/api/src/pages.rs @@ -376,8 +376,13 @@ fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> R Ok(a) => a, Err(zip_err) => { if allow_fallback { - warn!("CBZ open failed for {}, trying RAR fallback: {}", abs_path, zip_err); - return extract_cbr_page(abs_path, page_number, false); + // Try RAR fallback (file might be a RAR with .cbz extension) + if let Ok(data) = extract_cbr_page(abs_path, page_number, false) { + return Ok(data); + } + // Streaming fallback: read local file headers without central directory + warn!("CBZ central dir failed for {}, trying streaming: {}", abs_path, zip_err); + return extract_cbz_page_streaming(abs_path, page_number); } error!("Invalid CBZ archive {}: {}", abs_path, zip_err); return Err(ApiError::internal(format!("invalid cbz: {zip_err}"))); @@ -417,6 +422,69 @@ fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> R Ok(buf) } +fn extract_cbz_page_streaming(abs_path: &str, page_number: u32) -> Result, ApiError> { + let file = std::fs::File::open(abs_path).map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + ApiError::not_found("book file not accessible") + } else { + ApiError::internal(format!("cannot open cbz: {e}")) + } + })?; + let mut reader = std::io::BufReader::new(file); + let mut image_names: Vec = Vec::new(); + + loop { + match zip::read::read_zipfile_from_stream(&mut reader) { + Ok(Some(mut entry)) => { + let name = entry.name().to_string(); + if is_image_name(&name.to_ascii_lowercase()) { + image_names.push(name); + } + std::io::copy(&mut entry, &mut std::io::sink()) + .map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?; + } + Ok(None) => break, + Err(_) => { + if !image_names.is_empty() { + break; + } + return Err(ApiError::internal("cbz streaming read failed".to_string())); + } + } + } + + image_names.sort_by(|a, b| natord::compare(a, b)); + let target = image_names + .get(page_number as usize - 1) + .ok_or_else(|| ApiError::not_found("page out of range"))? + .clone(); + + // Second pass: extract the target page + let file2 = std::fs::File::open(abs_path) + .map_err(|e| ApiError::internal(format!("cannot reopen cbz: {e}")))?; + let mut reader2 = std::io::BufReader::new(file2); + + loop { + match zip::read::read_zipfile_from_stream(&mut reader2) { + Ok(Some(mut entry)) => { + if entry.name() == target { + let mut buf = Vec::new(); + entry + .read_to_end(&mut buf) + .map_err(|e| ApiError::internal(format!("cbz stream read: {e}")))?; + return Ok(buf); + } + std::io::copy(&mut entry, &mut std::io::sink()) + .map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?; + } + Ok(None) => break, + Err(_) => break, + } + } + + Err(ApiError::not_found("page not found in archive")) +} + fn extract_cbr_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result, ApiError> { info!("Opening CBR archive: {}", abs_path); let index = page_number as usize - 1; diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs index 6fc00d8..b88480a 100644 --- a/crates/parsers/src/lib.rs +++ b/crates/parsers/src/lib.rs @@ -163,19 +163,19 @@ fn analyze_cbz(path: &Path, allow_fallback: bool) -> Result<(i32, Vec)> { .with_context(|| format!("cannot open cbz: {}", path.display()))?; let mut archive = match zip::ZipArchive::new(file) { Ok(a) => a, - Err(e) => { + Err(zip_err) => { if allow_fallback { - // Some .cbz files are actually RAR archives with the wrong extension — fallback to CBR parser - return analyze_cbr(path, false).map_err(|rar_err| { - anyhow::anyhow!( - "invalid cbz archive and RAR fallback also failed for {}: ZIP={}, RAR={}", - path.display(), - e, - rar_err - ) - }); + // Try RAR fallback first (file might be a RAR with .cbz extension) + if let Ok(result) = analyze_cbr(path, false) { + return Ok(result); + } + // Try streaming fallback: read local file headers without central directory + // (handles ZIP files with NTFS extra fields that confuse the central dir parser) + if let Ok(result) = analyze_cbz_streaming(path) { + return Ok(result); + } } - return Err(anyhow::anyhow!("invalid cbz archive for {}: {}", path.display(), e)); + return Err(anyhow::anyhow!("invalid cbz archive for {}: {}", path.display(), zip_err)); } }; @@ -201,6 +201,47 @@ fn analyze_cbz(path: &Path, allow_fallback: bool) -> Result<(i32, Vec)> { Ok((count, buf)) } +/// Fallback for ZIP files whose central directory can't be parsed (e.g. NTFS extra fields). +/// Reads local file headers sequentially without relying on the central directory. +fn analyze_cbz_streaming(path: &Path) -> Result<(i32, Vec)> { + let file = std::fs::File::open(path) + .with_context(|| format!("cannot open cbz for streaming: {}", path.display()))?; + let mut reader = std::io::BufReader::new(file); + + let mut all_images: Vec<(String, Vec)> = Vec::new(); + + loop { + match zip::read::read_zipfile_from_stream(&mut reader) { + Ok(Some(mut entry)) => { + let name = entry.name().to_string(); + if is_image_name(&name.to_ascii_lowercase()) { + let mut buf = Vec::new(); + entry.read_to_end(&mut buf)?; + all_images.push((name, buf)); + } else { + std::io::copy(&mut entry, &mut std::io::sink())?; + } + } + Ok(None) => break, + Err(_) => { + if !all_images.is_empty() { + break; // Partial read — use what we have + } + return Err(anyhow::anyhow!("streaming ZIP read failed for {}", path.display())); + } + } + } + + if all_images.is_empty() { + return Err(anyhow::anyhow!("no images found in streaming cbz: {}", path.display())); + } + + all_images.sort_by(|(a, _), (b, _)| natord::compare(a, b)); + let count = all_images.len() as i32; + let (_, first_bytes) = all_images.remove(0); + Ok((count, first_bytes)) +} + fn analyze_cbr(path: &Path, allow_fallback: bool) -> Result<(i32, Vec)> { // Pass 1: list all image names via unrar (in-process, no subprocess) let mut image_names: Vec = {