fix(parsers,api): fallback streaming ZIP pour archives avec extra fields NTFS
Les ZIP créés par des outils Windows (version 6.3) contiennent des extra fields NTFS (tag 0x000A) qui font échouer ZipArchive::new() avec "Could not find EOCD". Ajout d'un fallback via read_zipfile_from_stream qui lit les local file headers sans dépendre du central directory. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -376,8 +376,13 @@ fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> R
|
|||||||
Ok(a) => a,
|
Ok(a) => a,
|
||||||
Err(zip_err) => {
|
Err(zip_err) => {
|
||||||
if allow_fallback {
|
if allow_fallback {
|
||||||
warn!("CBZ open failed for {}, trying RAR fallback: {}", abs_path, zip_err);
|
// Try RAR fallback (file might be a RAR with .cbz extension)
|
||||||
return extract_cbr_page(abs_path, page_number, false);
|
if let Ok(data) = extract_cbr_page(abs_path, page_number, false) {
|
||||||
|
return Ok(data);
|
||||||
|
}
|
||||||
|
// Streaming fallback: read local file headers without central directory
|
||||||
|
warn!("CBZ central dir failed for {}, trying streaming: {}", abs_path, zip_err);
|
||||||
|
return extract_cbz_page_streaming(abs_path, page_number);
|
||||||
}
|
}
|
||||||
error!("Invalid CBZ archive {}: {}", abs_path, zip_err);
|
error!("Invalid CBZ archive {}: {}", abs_path, zip_err);
|
||||||
return Err(ApiError::internal(format!("invalid cbz: {zip_err}")));
|
return Err(ApiError::internal(format!("invalid cbz: {zip_err}")));
|
||||||
@@ -417,6 +422,69 @@ fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> R
|
|||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_cbz_page_streaming(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
|
||||||
|
let file = std::fs::File::open(abs_path).map_err(|e| {
|
||||||
|
if e.kind() == std::io::ErrorKind::NotFound {
|
||||||
|
ApiError::not_found("book file not accessible")
|
||||||
|
} else {
|
||||||
|
ApiError::internal(format!("cannot open cbz: {e}"))
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
let mut reader = std::io::BufReader::new(file);
|
||||||
|
let mut image_names: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match zip::read::read_zipfile_from_stream(&mut reader) {
|
||||||
|
Ok(Some(mut entry)) => {
|
||||||
|
let name = entry.name().to_string();
|
||||||
|
if is_image_name(&name.to_ascii_lowercase()) {
|
||||||
|
image_names.push(name);
|
||||||
|
}
|
||||||
|
std::io::copy(&mut entry, &mut std::io::sink())
|
||||||
|
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
|
||||||
|
}
|
||||||
|
Ok(None) => break,
|
||||||
|
Err(_) => {
|
||||||
|
if !image_names.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return Err(ApiError::internal("cbz streaming read failed".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
let target = image_names
|
||||||
|
.get(page_number as usize - 1)
|
||||||
|
.ok_or_else(|| ApiError::not_found("page out of range"))?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// Second pass: extract the target page
|
||||||
|
let file2 = std::fs::File::open(abs_path)
|
||||||
|
.map_err(|e| ApiError::internal(format!("cannot reopen cbz: {e}")))?;
|
||||||
|
let mut reader2 = std::io::BufReader::new(file2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match zip::read::read_zipfile_from_stream(&mut reader2) {
|
||||||
|
Ok(Some(mut entry)) => {
|
||||||
|
if entry.name() == target {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
entry
|
||||||
|
.read_to_end(&mut buf)
|
||||||
|
.map_err(|e| ApiError::internal(format!("cbz stream read: {e}")))?;
|
||||||
|
return Ok(buf);
|
||||||
|
}
|
||||||
|
std::io::copy(&mut entry, &mut std::io::sink())
|
||||||
|
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
|
||||||
|
}
|
||||||
|
Ok(None) => break,
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(ApiError::not_found("page not found in archive"))
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_cbr_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
|
fn extract_cbr_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
|
||||||
info!("Opening CBR archive: {}", abs_path);
|
info!("Opening CBR archive: {}", abs_path);
|
||||||
let index = page_number as usize - 1;
|
let index = page_number as usize - 1;
|
||||||
|
|||||||
@@ -163,19 +163,19 @@ fn analyze_cbz(path: &Path, allow_fallback: bool) -> Result<(i32, Vec<u8>)> {
|
|||||||
.with_context(|| format!("cannot open cbz: {}", path.display()))?;
|
.with_context(|| format!("cannot open cbz: {}", path.display()))?;
|
||||||
let mut archive = match zip::ZipArchive::new(file) {
|
let mut archive = match zip::ZipArchive::new(file) {
|
||||||
Ok(a) => a,
|
Ok(a) => a,
|
||||||
Err(e) => {
|
Err(zip_err) => {
|
||||||
if allow_fallback {
|
if allow_fallback {
|
||||||
// Some .cbz files are actually RAR archives with the wrong extension — fallback to CBR parser
|
// Try RAR fallback first (file might be a RAR with .cbz extension)
|
||||||
return analyze_cbr(path, false).map_err(|rar_err| {
|
if let Ok(result) = analyze_cbr(path, false) {
|
||||||
anyhow::anyhow!(
|
return Ok(result);
|
||||||
"invalid cbz archive and RAR fallback also failed for {}: ZIP={}, RAR={}",
|
}
|
||||||
path.display(),
|
// Try streaming fallback: read local file headers without central directory
|
||||||
e,
|
// (handles ZIP files with NTFS extra fields that confuse the central dir parser)
|
||||||
rar_err
|
if let Ok(result) = analyze_cbz_streaming(path) {
|
||||||
)
|
return Ok(result);
|
||||||
});
|
}
|
||||||
}
|
}
|
||||||
return Err(anyhow::anyhow!("invalid cbz archive for {}: {}", path.display(), e));
|
return Err(anyhow::anyhow!("invalid cbz archive for {}: {}", path.display(), zip_err));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -201,6 +201,47 @@ fn analyze_cbz(path: &Path, allow_fallback: bool) -> Result<(i32, Vec<u8>)> {
|
|||||||
Ok((count, buf))
|
Ok((count, buf))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fallback for ZIP files whose central directory can't be parsed (e.g. NTFS extra fields).
|
||||||
|
/// Reads local file headers sequentially without relying on the central directory.
|
||||||
|
fn analyze_cbz_streaming(path: &Path) -> Result<(i32, Vec<u8>)> {
|
||||||
|
let file = std::fs::File::open(path)
|
||||||
|
.with_context(|| format!("cannot open cbz for streaming: {}", path.display()))?;
|
||||||
|
let mut reader = std::io::BufReader::new(file);
|
||||||
|
|
||||||
|
let mut all_images: Vec<(String, Vec<u8>)> = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match zip::read::read_zipfile_from_stream(&mut reader) {
|
||||||
|
Ok(Some(mut entry)) => {
|
||||||
|
let name = entry.name().to_string();
|
||||||
|
if is_image_name(&name.to_ascii_lowercase()) {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
entry.read_to_end(&mut buf)?;
|
||||||
|
all_images.push((name, buf));
|
||||||
|
} else {
|
||||||
|
std::io::copy(&mut entry, &mut std::io::sink())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => break,
|
||||||
|
Err(_) => {
|
||||||
|
if !all_images.is_empty() {
|
||||||
|
break; // Partial read — use what we have
|
||||||
|
}
|
||||||
|
return Err(anyhow::anyhow!("streaming ZIP read failed for {}", path.display()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if all_images.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!("no images found in streaming cbz: {}", path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
all_images.sort_by(|(a, _), (b, _)| natord::compare(a, b));
|
||||||
|
let count = all_images.len() as i32;
|
||||||
|
let (_, first_bytes) = all_images.remove(0);
|
||||||
|
Ok((count, first_bytes))
|
||||||
|
}
|
||||||
|
|
||||||
fn analyze_cbr(path: &Path, allow_fallback: bool) -> Result<(i32, Vec<u8>)> {
|
fn analyze_cbr(path: &Path, allow_fallback: bool) -> Result<(i32, Vec<u8>)> {
|
||||||
// Pass 1: list all image names via unrar (in-process, no subprocess)
|
// Pass 1: list all image names via unrar (in-process, no subprocess)
|
||||||
let mut image_names: Vec<String> = {
|
let mut image_names: Vec<String> = {
|
||||||
|
|||||||
Reference in New Issue
Block a user