Compare commits
3 Commits
5fb24188e1
...
ed7665248e
| Author | SHA1 | Date | |
|---|---|---|---|
| ed7665248e | |||
| 736b8aedc0 | |||
| 3daa49ae6c |
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -64,7 +64,7 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "api"
|
name = "api"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
@@ -1232,7 +1232,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexer"
|
name = "indexer"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"axum",
|
"axum",
|
||||||
@@ -1771,7 +1771,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parsers"
|
name = "parsers"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"flate2",
|
"flate2",
|
||||||
@@ -2906,7 +2906,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stripstream-core"
|
name = "stripstream-core"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ resolver = "2"
|
|||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ pub struct BookDetails {
|
|||||||
tag = "books",
|
tag = "books",
|
||||||
params(
|
params(
|
||||||
("library_id" = Option<String>, Query, description = "Filter by library ID"),
|
("library_id" = Option<String>, Query, description = "Filter by library ID"),
|
||||||
("kind" = Option<String>, Query, description = "Filter by book kind (cbz, cbr, pdf)"),
|
("kind" = Option<String>, Query, description = "Filter by book kind (cbz, cbr, pdf, epub)"),
|
||||||
("series" = Option<String>, Query, description = "Filter by series name (use 'unclassified' for books without series)"),
|
("series" = Option<String>, Query, description = "Filter by series name (use 'unclassified' for books without series)"),
|
||||||
("reading_status" = Option<String>, Query, description = "Filter by reading status, comma-separated (e.g. 'unread,reading')"),
|
("reading_status" = Option<String>, Query, description = "Filter by reading status, comma-separated (e.g. 'unread,reading')"),
|
||||||
("page" = Option<i64>, Query, description = "Page number (1-indexed, default 1)"),
|
("page" = Option<i64>, Query, description = "Page number (1-indexed, default 1)"),
|
||||||
|
|||||||
@@ -351,6 +351,7 @@ async fn prefetch_page(state: AppState, params: &PrefetchParams<'_>) {
|
|||||||
Some(ref e) if e == "cbz" => "cbz",
|
Some(ref e) if e == "cbz" => "cbz",
|
||||||
Some(ref e) if e == "cbr" => "cbr",
|
Some(ref e) if e == "cbr" => "cbr",
|
||||||
Some(ref e) if e == "pdf" => "pdf",
|
Some(ref e) if e == "pdf" => "pdf",
|
||||||
|
Some(ref e) if e == "epub" => "epub",
|
||||||
_ => return,
|
_ => return,
|
||||||
}
|
}
|
||||||
.to_string();
|
.to_string();
|
||||||
@@ -479,6 +480,7 @@ fn render_page(
|
|||||||
"cbz" => parsers::BookFormat::Cbz,
|
"cbz" => parsers::BookFormat::Cbz,
|
||||||
"cbr" => parsers::BookFormat::Cbr,
|
"cbr" => parsers::BookFormat::Cbr,
|
||||||
"pdf" => parsers::BookFormat::Pdf,
|
"pdf" => parsers::BookFormat::Pdf,
|
||||||
|
"epub" => parsers::BookFormat::Epub,
|
||||||
_ => return Err(ApiError::bad_request("unsupported source format")),
|
_ => return Err(ApiError::bad_request("unsupported source format")),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ pub struct SearchResponse {
|
|||||||
params(
|
params(
|
||||||
("q" = String, Query, description = "Search query (books + series via PostgreSQL full-text)"),
|
("q" = String, Query, description = "Search query (books + series via PostgreSQL full-text)"),
|
||||||
("library_id" = Option<String>, Query, description = "Filter by library ID"),
|
("library_id" = Option<String>, Query, description = "Filter by library ID"),
|
||||||
("type" = Option<String>, Query, description = "Filter by type (cbz, cbr, pdf)"),
|
("type" = Option<String>, Query, description = "Filter by type (cbz, cbr, pdf, epub)"),
|
||||||
("kind" = Option<String>, Query, description = "Filter by kind (alias for type)"),
|
("kind" = Option<String>, Query, description = "Filter by kind (alias for type)"),
|
||||||
("limit" = Option<usize>, Query, description = "Max results per type (max 100)"),
|
("limit" = Option<usize>, Query, description = "Max results per type (max 100)"),
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ export function BookCard({ book, readingStatus }: BookCardProps) {
|
|||||||
${(book.format ?? book.kind) === 'cbz' ? 'bg-success/10 text-success' : ''}
|
${(book.format ?? book.kind) === 'cbz' ? 'bg-success/10 text-success' : ''}
|
||||||
${(book.format ?? book.kind) === 'cbr' ? 'bg-warning/10 text-warning' : ''}
|
${(book.format ?? book.kind) === 'cbr' ? 'bg-warning/10 text-warning' : ''}
|
||||||
${(book.format ?? book.kind) === 'pdf' ? 'bg-destructive/10 text-destructive' : ''}
|
${(book.format ?? book.kind) === 'pdf' ? 'bg-destructive/10 text-destructive' : ''}
|
||||||
|
${(book.format ?? book.kind) === 'epub' ? 'bg-info/10 text-info' : ''}
|
||||||
`}>
|
`}>
|
||||||
{book.format ?? book.kind}
|
{book.format ?? book.kind}
|
||||||
</span>
|
</span>
|
||||||
|
|||||||
44
apps/backoffice/app/components/JobDetailLive.tsx
Normal file
44
apps/backoffice/app/components/JobDetailLive.tsx
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useEffect, useRef } from "react";
|
||||||
|
import { useRouter } from "next/navigation";
|
||||||
|
|
||||||
|
interface JobDetailLiveProps {
|
||||||
|
jobId: string;
|
||||||
|
isTerminal: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function JobDetailLive({ jobId, isTerminal }: JobDetailLiveProps) {
|
||||||
|
const router = useRouter();
|
||||||
|
const isTerminalRef = useRef(isTerminal);
|
||||||
|
isTerminalRef.current = isTerminal;
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isTerminalRef.current) return;
|
||||||
|
|
||||||
|
const eventSource = new EventSource(`/api/jobs/${jobId}/stream`);
|
||||||
|
|
||||||
|
eventSource.onmessage = (event) => {
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(event.data);
|
||||||
|
router.refresh();
|
||||||
|
|
||||||
|
if (data.status === "success" || data.status === "failed" || data.status === "cancelled") {
|
||||||
|
eventSource.close();
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore parse errors
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
eventSource.onerror = () => {
|
||||||
|
eventSource.close();
|
||||||
|
};
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
eventSource.close();
|
||||||
|
};
|
||||||
|
}, [jobId, router]);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
export const dynamic = "force-dynamic";
|
||||||
|
|
||||||
import { notFound } from "next/navigation";
|
import { notFound } from "next/navigation";
|
||||||
import Link from "next/link";
|
import Link from "next/link";
|
||||||
import { apiFetch, getMetadataBatchReport, getMetadataBatchResults, getMetadataRefreshReport, MetadataBatchReportDto, MetadataBatchResultDto, MetadataRefreshReportDto } from "../../../lib/api";
|
import { apiFetch, getMetadataBatchReport, getMetadataBatchResults, getMetadataRefreshReport, MetadataBatchReportDto, MetadataBatchResultDto, MetadataRefreshReportDto } from "../../../lib/api";
|
||||||
@@ -5,6 +7,7 @@ import {
|
|||||||
Card, CardHeader, CardTitle, CardDescription, CardContent,
|
Card, CardHeader, CardTitle, CardDescription, CardContent,
|
||||||
StatusBadge, JobTypeBadge, StatBox, ProgressBar
|
StatusBadge, JobTypeBadge, StatBox, ProgressBar
|
||||||
} from "../../components/ui";
|
} from "../../components/ui";
|
||||||
|
import { JobDetailLive } from "../../components/JobDetailLive";
|
||||||
import { getServerTranslations } from "../../../lib/i18n/server";
|
import { getServerTranslations } from "../../../lib/i18n/server";
|
||||||
|
|
||||||
interface JobDetailPageProps {
|
interface JobDetailPageProps {
|
||||||
@@ -158,6 +161,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
const isCompleted = job.status === "success";
|
const isCompleted = job.status === "success";
|
||||||
const isFailed = job.status === "failed";
|
const isFailed = job.status === "failed";
|
||||||
const isCancelled = job.status === "cancelled";
|
const isCancelled = job.status === "cancelled";
|
||||||
|
const isTerminal = isCompleted || isFailed || isCancelled;
|
||||||
const isExtractingPages = job.status === "extracting_pages";
|
const isExtractingPages = job.status === "extracting_pages";
|
||||||
const isThumbnailPhase = job.status === "generating_thumbnails";
|
const isThumbnailPhase = job.status === "generating_thumbnails";
|
||||||
const isPhase2 = isExtractingPages || isThumbnailPhase;
|
const isPhase2 = isExtractingPages || isThumbnailPhase;
|
||||||
@@ -199,6 +203,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
<JobDetailLive jobId={id} isTerminal={isTerminal} />
|
||||||
<div className="mb-6">
|
<div className="mb-6">
|
||||||
<Link
|
<Link
|
||||||
href="/jobs"
|
href="/jobs"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "stripstream-backoffice",
|
"name": "stripstream-backoffice",
|
||||||
"version": "1.15.0",
|
"version": "1.16.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "next dev -p 7082",
|
"dev": "next dev -p 7082",
|
||||||
|
|||||||
@@ -290,6 +290,7 @@ fn book_format_from_str(s: &str) -> Option<BookFormat> {
|
|||||||
"cbz" => Some(BookFormat::Cbz),
|
"cbz" => Some(BookFormat::Cbz),
|
||||||
"cbr" => Some(BookFormat::Cbr),
|
"cbr" => Some(BookFormat::Cbr),
|
||||||
"pdf" => Some(BookFormat::Pdf),
|
"pdf" => Some(BookFormat::Pdf),
|
||||||
|
"epub" => Some(BookFormat::Epub),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ pub fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime<Utc>) -> Res
|
|||||||
|
|
||||||
pub fn kind_from_format(format: BookFormat) -> &'static str {
|
pub fn kind_from_format(format: BookFormat) -> &'static str {
|
||||||
match format {
|
match format {
|
||||||
BookFormat::Pdf => "ebook",
|
BookFormat::Pdf | BookFormat::Epub => "ebook",
|
||||||
BookFormat::Cbz | BookFormat::Cbr => "comic",
|
BookFormat::Cbz | BookFormat::Cbr => "comic",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ pub enum BookFormat {
|
|||||||
Cbz,
|
Cbz,
|
||||||
Cbr,
|
Cbr,
|
||||||
Pdf,
|
Pdf,
|
||||||
|
Epub,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BookFormat {
|
impl BookFormat {
|
||||||
@@ -17,6 +18,7 @@ impl BookFormat {
|
|||||||
Self::Cbz => "cbz",
|
Self::Cbz => "cbz",
|
||||||
Self::Cbr => "cbr",
|
Self::Cbr => "cbr",
|
||||||
Self::Pdf => "pdf",
|
Self::Pdf => "pdf",
|
||||||
|
Self::Epub => "epub",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -35,6 +37,7 @@ pub fn detect_format(path: &Path) -> Option<BookFormat> {
|
|||||||
"cbz" => Some(BookFormat::Cbz),
|
"cbz" => Some(BookFormat::Cbz),
|
||||||
"cbr" => Some(BookFormat::Cbr),
|
"cbr" => Some(BookFormat::Cbr),
|
||||||
"pdf" => Some(BookFormat::Pdf),
|
"pdf" => Some(BookFormat::Pdf),
|
||||||
|
"epub" => Some(BookFormat::Epub),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -144,6 +147,7 @@ pub fn parse_metadata(
|
|||||||
BookFormat::Cbz => parse_cbz_page_count(path).ok(),
|
BookFormat::Cbz => parse_cbz_page_count(path).ok(),
|
||||||
BookFormat::Cbr => parse_cbr_page_count(path).ok(),
|
BookFormat::Cbr => parse_cbr_page_count(path).ok(),
|
||||||
BookFormat::Pdf => parse_pdf_page_count(path).ok(),
|
BookFormat::Pdf => parse_pdf_page_count(path).ok(),
|
||||||
|
BookFormat::Epub => parse_epub_page_count(path).ok(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(meta)
|
Ok(meta)
|
||||||
@@ -156,6 +160,7 @@ pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> R
|
|||||||
BookFormat::Cbz => analyze_cbz(path, true),
|
BookFormat::Cbz => analyze_cbz(path, true),
|
||||||
BookFormat::Cbr => analyze_cbr(path, true),
|
BookFormat::Cbr => analyze_cbr(path, true),
|
||||||
BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
|
BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
|
||||||
|
BookFormat::Epub => analyze_epub(path),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -530,6 +535,7 @@ pub fn list_archive_images(path: &Path, format: BookFormat) -> Result<Vec<String
|
|||||||
BookFormat::Cbz => list_cbz_images(path),
|
BookFormat::Cbz => list_cbz_images(path),
|
||||||
BookFormat::Cbr => list_cbr_images(path),
|
BookFormat::Cbr => list_cbr_images(path),
|
||||||
BookFormat::Pdf => Err(anyhow::anyhow!("list_archive_images not applicable for PDF")),
|
BookFormat::Pdf => Err(anyhow::anyhow!("list_archive_images not applicable for PDF")),
|
||||||
|
BookFormat::Epub => get_epub_image_index(path),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -629,6 +635,7 @@ pub fn extract_image_by_name(path: &Path, format: BookFormat, image_name: &str)
|
|||||||
BookFormat::Cbz => extract_cbz_by_name(path, image_name),
|
BookFormat::Cbz => extract_cbz_by_name(path, image_name),
|
||||||
BookFormat::Cbr => extract_cbr_by_name(path, image_name),
|
BookFormat::Cbr => extract_cbr_by_name(path, image_name),
|
||||||
BookFormat::Pdf => Err(anyhow::anyhow!("use extract_page for PDF")),
|
BookFormat::Pdf => Err(anyhow::anyhow!("use extract_page for PDF")),
|
||||||
|
BookFormat::Epub => extract_cbz_by_name(path, image_name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -721,6 +728,7 @@ pub fn extract_page(path: &Path, format: BookFormat, page_number: u32, pdf_rende
|
|||||||
let width = if pdf_render_width == 0 { 1200 } else { pdf_render_width };
|
let width = if pdf_render_width == 0 { 1200 } else { pdf_render_width };
|
||||||
render_pdf_page_n(path, page_number, width)
|
render_pdf_page_n(path, page_number, width)
|
||||||
}
|
}
|
||||||
|
BookFormat::Epub => extract_epub_page(path, page_number),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -894,6 +902,340 @@ fn render_pdf_page_n(path: &Path, page_number: u32, width: u32) -> Result<Vec<u8
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// EPUB support — spine-aware image index with cache
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/// Cache of ordered image paths per EPUB file. Avoids re-parsing OPF/XHTML on every page request.
|
||||||
|
static EPUB_INDEX_CACHE: OnceLock<Mutex<HashMap<PathBuf, Vec<String>>>> = OnceLock::new();
|
||||||
|
|
||||||
|
fn epub_index_cache() -> &'static Mutex<HashMap<PathBuf, Vec<String>>> {
|
||||||
|
EPUB_INDEX_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-compiled regex patterns for EPUB XML parsing (compiled once on first use)
|
||||||
|
static RE_EPUB_ROOTFILE: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_ITEM: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_ITEMREF: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_IMG_SRC: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_SVG_HREF: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_ATTR_ID: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_ATTR_HREF: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
static RE_EPUB_ATTR_MEDIA: OnceLock<regex::Regex> = OnceLock::new();
|
||||||
|
|
||||||
|
struct EpubManifestItem {
|
||||||
|
href: String,
|
||||||
|
media_type: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the ordered list of image paths for an EPUB file.
|
||||||
|
/// Walks the OPF spine to determine reading order, parses XHTML/SVG pages
|
||||||
|
/// for image references, and falls back to CBZ-style listing if no
|
||||||
|
/// images are found through the spine.
|
||||||
|
fn build_epub_image_index(path: &Path) -> Result<Vec<String>> {
|
||||||
|
let file = std::fs::File::open(path)
|
||||||
|
.with_context(|| format!("cannot open epub: {}", path.display()))?;
|
||||||
|
let mut archive = zip::ZipArchive::new(file)
|
||||||
|
.with_context(|| format!("invalid epub zip: {}", path.display()))?;
|
||||||
|
|
||||||
|
// 1. Find OPF path from META-INF/container.xml
|
||||||
|
let opf_path = {
|
||||||
|
let mut entry = archive
|
||||||
|
.by_name("META-INF/container.xml")
|
||||||
|
.context("missing META-INF/container.xml — not a valid EPUB")?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
entry.read_to_end(&mut buf)?;
|
||||||
|
let xml = String::from_utf8_lossy(&buf);
|
||||||
|
let re = RE_EPUB_ROOTFILE.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"<(?:\w+:)?rootfile[^>]+full-path="([^"]+)""#).unwrap()
|
||||||
|
});
|
||||||
|
re.captures(&xml)
|
||||||
|
.and_then(|c| c.get(1))
|
||||||
|
.map(|m| decode_xml_entities(m.as_str()))
|
||||||
|
.context("no rootfile found in container.xml")?
|
||||||
|
};
|
||||||
|
|
||||||
|
let opf_dir = std::path::Path::new(&opf_path)
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// 2. Parse OPF manifest + spine
|
||||||
|
let (manifest, spine_idrefs) = {
|
||||||
|
let mut entry = archive
|
||||||
|
.by_name(&opf_path)
|
||||||
|
.with_context(|| format!("missing OPF file: {}", opf_path))?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
entry.read_to_end(&mut buf)?;
|
||||||
|
let xml = String::from_utf8_lossy(&buf);
|
||||||
|
parse_epub_opf(&xml, &opf_dir)?
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3. Walk spine entries to build ordered image list
|
||||||
|
let re_img = RE_EPUB_IMG_SRC.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"(?i)<img\s[^>]*src=["']([^"']+)["']"#).unwrap()
|
||||||
|
});
|
||||||
|
let re_svg = RE_EPUB_SVG_HREF.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"(?i)<image\s[^>]*(?:xlink:)?href=["']([^"']+)["']"#).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut images: Vec<String> = Vec::new();
|
||||||
|
let mut seen = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
for idref in &spine_idrefs {
|
||||||
|
let item = match manifest.get(idref.as_str()) {
|
||||||
|
Some(item) => item,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Direct raster image in spine (rare but possible)
|
||||||
|
if item.media_type.starts_with("image/") && !item.media_type.contains("svg") {
|
||||||
|
if seen.insert(item.href.clone()) {
|
||||||
|
images.push(item.href.clone());
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read XHTML/SVG content — entry is dropped at end of match arm, releasing archive borrow
|
||||||
|
let content = match archive.by_name(&item.href) {
|
||||||
|
Ok(mut entry) => {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
match entry.read_to_end(&mut buf) {
|
||||||
|
Ok(_) => String::from_utf8_lossy(&buf).to_string(),
|
||||||
|
Err(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let content_dir = std::path::Path::new(&item.href)
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// Extract <img src="..."> and <image [xlink:]href="...">
|
||||||
|
for re in [re_img, re_svg] {
|
||||||
|
for cap in re.captures_iter(&content) {
|
||||||
|
if let Some(src) = cap.get(1) {
|
||||||
|
let src_str = src.as_str();
|
||||||
|
if src_str.starts_with("data:") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let decoded = decode_xml_entities(&percent_decode_epub(src_str));
|
||||||
|
let resolved = resolve_epub_path(&content_dir, &decoded);
|
||||||
|
if seen.insert(resolved.clone()) {
|
||||||
|
images.push(resolved);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fallback: no images from spine → list all images in ZIP (CBZ-style)
|
||||||
|
if images.is_empty() {
|
||||||
|
for i in 0..archive.len() {
|
||||||
|
if let Ok(entry) = archive.by_index(i) {
|
||||||
|
let name = entry.name().to_string();
|
||||||
|
if is_image_name(&name.to_ascii_lowercase()) && seen.insert(name.clone()) {
|
||||||
|
images.push(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
images.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
if images.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!("no images found in epub: {}", path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(images)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_epub_opf(
|
||||||
|
xml: &str,
|
||||||
|
opf_dir: &str,
|
||||||
|
) -> Result<(HashMap<String, EpubManifestItem>, Vec<String>)> {
|
||||||
|
let re_item = RE_EPUB_ITEM.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"(?s)<(?:\w+:)?item\s([^>]+?)/?>"#).unwrap()
|
||||||
|
});
|
||||||
|
let re_itemref = RE_EPUB_ITEMREF.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"<(?:\w+:)?itemref\s[^>]*idref="([^"]+)""#).unwrap()
|
||||||
|
});
|
||||||
|
let re_id = RE_EPUB_ATTR_ID.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"(?:^|\s)id="([^"]+)""#).unwrap()
|
||||||
|
});
|
||||||
|
let re_href = RE_EPUB_ATTR_HREF.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"(?:^|\s)href="([^"]+)""#).unwrap()
|
||||||
|
});
|
||||||
|
let re_media = RE_EPUB_ATTR_MEDIA.get_or_init(|| {
|
||||||
|
regex::Regex::new(r#"media-type="([^"]+)""#).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut manifest: HashMap<String, EpubManifestItem> = HashMap::new();
|
||||||
|
for cap in re_item.captures_iter(xml) {
|
||||||
|
if let Some(attrs) = cap.get(1) {
|
||||||
|
let a = attrs.as_str();
|
||||||
|
let id = re_id.captures(a).and_then(|c| c.get(1));
|
||||||
|
let href = re_href.captures(a).and_then(|c| c.get(1));
|
||||||
|
let media = re_media.captures(a).and_then(|c| c.get(1));
|
||||||
|
|
||||||
|
if let (Some(id), Some(href), Some(media)) = (id, href, media) {
|
||||||
|
let decoded_href = decode_xml_entities(&percent_decode_epub(href.as_str()));
|
||||||
|
let resolved = resolve_epub_path(opf_dir, &decoded_href);
|
||||||
|
manifest.insert(
|
||||||
|
id.as_str().to_string(),
|
||||||
|
EpubManifestItem {
|
||||||
|
href: resolved,
|
||||||
|
media_type: media.as_str().to_string(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let spine_idrefs: Vec<String> = re_itemref
|
||||||
|
.captures_iter(xml)
|
||||||
|
.filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok((manifest, spine_idrefs))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the cached image index for an EPUB, building it on first access.
|
||||||
|
fn get_epub_image_index(path: &Path) -> Result<Vec<String>> {
|
||||||
|
{
|
||||||
|
let cache = epub_index_cache().lock().unwrap();
|
||||||
|
if let Some(names) = cache.get(path) {
|
||||||
|
return Ok(names.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let images = build_epub_image_index(path)?;
|
||||||
|
{
|
||||||
|
let mut cache = epub_index_cache().lock().unwrap();
|
||||||
|
cache.insert(path.to_path_buf(), images.clone());
|
||||||
|
}
|
||||||
|
Ok(images)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_epub_page_count(path: &Path) -> Result<i32> {
|
||||||
|
let images = build_epub_image_index(path)?;
|
||||||
|
Ok(images.len() as i32)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn analyze_epub(path: &Path) -> Result<(i32, Vec<u8>)> {
|
||||||
|
let images = get_epub_image_index(path)?;
|
||||||
|
let count = images.len() as i32;
|
||||||
|
|
||||||
|
let file = std::fs::File::open(path)
|
||||||
|
.with_context(|| format!("cannot open epub: {}", path.display()))?;
|
||||||
|
let mut archive = zip::ZipArchive::new(file)?;
|
||||||
|
|
||||||
|
for img_path in &images {
|
||||||
|
if let Ok(mut entry) = archive.by_name(img_path) {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
if entry.read_to_end(&mut buf).is_ok() && !buf.is_empty() {
|
||||||
|
return Ok((count, buf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(anyhow::anyhow!(
|
||||||
|
"no readable images in epub: {}",
|
||||||
|
path.display()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_epub_page(path: &Path, page_number: u32) -> Result<Vec<u8>> {
|
||||||
|
let images = get_epub_image_index(path)?;
|
||||||
|
let index = page_number as usize - 1;
|
||||||
|
let img_path = images
|
||||||
|
.get(index)
|
||||||
|
.with_context(|| {
|
||||||
|
format!(
|
||||||
|
"page {} out of range (total: {})",
|
||||||
|
page_number,
|
||||||
|
images.len()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let file = std::fs::File::open(path)
|
||||||
|
.with_context(|| format!("cannot open epub: {}", path.display()))?;
|
||||||
|
let mut archive = zip::ZipArchive::new(file)?;
|
||||||
|
let mut entry = archive
|
||||||
|
.by_name(img_path)
|
||||||
|
.with_context(|| format!("image '{}' not found in epub", img_path))?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
entry.read_to_end(&mut buf)?;
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- EPUB path/encoding helpers ---
|
||||||
|
|
||||||
|
fn resolve_epub_path(base_dir: &str, href: &str) -> String {
|
||||||
|
if let Some(stripped) = href.strip_prefix('/') {
|
||||||
|
return normalize_epub_path(stripped);
|
||||||
|
}
|
||||||
|
if base_dir.is_empty() {
|
||||||
|
return normalize_epub_path(href);
|
||||||
|
}
|
||||||
|
normalize_epub_path(&format!("{}/{}", base_dir, href))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_epub_path(path: &str) -> String {
|
||||||
|
let mut parts: Vec<&str> = Vec::new();
|
||||||
|
for part in path.split('/') {
|
||||||
|
match part {
|
||||||
|
".." => {
|
||||||
|
parts.pop();
|
||||||
|
}
|
||||||
|
"." | "" => {}
|
||||||
|
_ => parts.push(part),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parts.join("/")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn percent_decode_epub(s: &str) -> String {
|
||||||
|
if !s.contains('%') {
|
||||||
|
return s.to_string();
|
||||||
|
}
|
||||||
|
let bytes = s.as_bytes();
|
||||||
|
let mut result = Vec::with_capacity(bytes.len());
|
||||||
|
let mut i = 0;
|
||||||
|
while i < bytes.len() {
|
||||||
|
if bytes[i] == b'%' && i + 2 < bytes.len() {
|
||||||
|
if let (Some(h), Some(l)) = (epub_hex_val(bytes[i + 1]), epub_hex_val(bytes[i + 2])) {
|
||||||
|
result.push(h * 16 + l);
|
||||||
|
i += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(bytes[i]);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
String::from_utf8_lossy(&result).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn epub_hex_val(b: u8) -> Option<u8> {
|
||||||
|
match b {
|
||||||
|
b'0'..=b'9' => Some(b - b'0'),
|
||||||
|
b'a'..=b'f' => Some(b - b'a' + 10),
|
||||||
|
b'A'..=b'F' => Some(b - b'A' + 10),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_xml_entities(s: &str) -> String {
|
||||||
|
if !s.contains('&') {
|
||||||
|
return s.to_string();
|
||||||
|
}
|
||||||
|
s.replace("&", "&")
|
||||||
|
.replace("<", "<")
|
||||||
|
.replace(">", ">")
|
||||||
|
.replace(""", "\"")
|
||||||
|
.replace("'", "'")
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert a CBR file to CBZ in-place (same directory, same stem).
|
/// Convert a CBR file to CBZ in-place (same directory, same stem).
|
||||||
///
|
///
|
||||||
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then
|
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then
|
||||||
|
|||||||
10
infra/migrations/0046_add_epub_format.sql
Normal file
10
infra/migrations/0046_add_epub_format.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Add EPUB to allowed format values in book_files and books tables.
|
||||||
|
-- PostgreSQL CHECK constraints are dropped+recreated (no ALTER CONSTRAINT).
|
||||||
|
|
||||||
|
-- book_files.format
|
||||||
|
ALTER TABLE book_files DROP CONSTRAINT IF EXISTS book_files_format_check;
|
||||||
|
ALTER TABLE book_files ADD CONSTRAINT book_files_format_check CHECK (format IN ('pdf', 'cbz', 'cbr', 'epub'));
|
||||||
|
|
||||||
|
-- books.format (denormalized column added in 0020)
|
||||||
|
ALTER TABLE books DROP CONSTRAINT IF EXISTS books_format_check;
|
||||||
|
ALTER TABLE books ADD CONSTRAINT books_format_check CHECK (format IN ('pdf', 'cbz', 'cbr', 'epub'));
|
||||||
Reference in New Issue
Block a user