Compare commits

...

3 Commits

Author SHA1 Message Date
ed7665248e chore: bump version to 1.16.0
All checks were successful
Deploy with Docker Compose / deploy (push) Successful in 1m5s
2026-03-21 07:06:28 +01:00
736b8aedc0 feat: add EPUB format support with spine-aware image extraction
Parse EPUB structure (container.xml → OPF → spine → XHTML) to extract
images in reading order. Zero new dependencies — reuses zip + regex
crates with pre-compiled regexes and per-file index cache for
performance. Falls back to CBZ-style image listing when spine contains
no images. Includes DB migration, API/indexer/backoffice updates.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 07:05:47 +01:00
3daa49ae6c feat: add live refresh to job detail page via SSE
The job detail page was only server-rendered with no live updates,
unlike the jobs list page. Add a lightweight JobDetailLive client
component that subscribes to the existing SSE endpoint and calls
router.refresh() on each update, keeping the page in sync while
a job is running.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 06:52:57 +01:00
13 changed files with 414 additions and 9 deletions

8
Cargo.lock generated
View File

@@ -64,7 +64,7 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "api"
version = "1.15.0"
version = "1.16.0"
dependencies = [
"anyhow",
"argon2",
@@ -1232,7 +1232,7 @@ dependencies = [
[[package]]
name = "indexer"
version = "1.15.0"
version = "1.16.0"
dependencies = [
"anyhow",
"axum",
@@ -1771,7 +1771,7 @@ dependencies = [
[[package]]
name = "parsers"
version = "1.15.0"
version = "1.16.0"
dependencies = [
"anyhow",
"flate2",
@@ -2906,7 +2906,7 @@ dependencies = [
[[package]]
name = "stripstream-core"
version = "1.15.0"
version = "1.16.0"
dependencies = [
"anyhow",
"serde",

View File

@@ -9,7 +9,7 @@ resolver = "2"
[workspace.package]
edition = "2021"
version = "1.15.0"
version = "1.16.0"
license = "MIT"
[workspace.dependencies]

View File

@@ -102,7 +102,7 @@ pub struct BookDetails {
tag = "books",
params(
("library_id" = Option<String>, Query, description = "Filter by library ID"),
("kind" = Option<String>, Query, description = "Filter by book kind (cbz, cbr, pdf)"),
("kind" = Option<String>, Query, description = "Filter by book kind (cbz, cbr, pdf, epub)"),
("series" = Option<String>, Query, description = "Filter by series name (use 'unclassified' for books without series)"),
("reading_status" = Option<String>, Query, description = "Filter by reading status, comma-separated (e.g. 'unread,reading')"),
("page" = Option<i64>, Query, description = "Page number (1-indexed, default 1)"),

View File

@@ -351,6 +351,7 @@ async fn prefetch_page(state: AppState, params: &PrefetchParams<'_>) {
Some(ref e) if e == "cbz" => "cbz",
Some(ref e) if e == "cbr" => "cbr",
Some(ref e) if e == "pdf" => "pdf",
Some(ref e) if e == "epub" => "epub",
_ => return,
}
.to_string();
@@ -479,6 +480,7 @@ fn render_page(
"cbz" => parsers::BookFormat::Cbz,
"cbr" => parsers::BookFormat::Cbr,
"pdf" => parsers::BookFormat::Pdf,
"epub" => parsers::BookFormat::Epub,
_ => return Err(ApiError::bad_request("unsupported source format")),
};

View File

@@ -47,7 +47,7 @@ pub struct SearchResponse {
params(
("q" = String, Query, description = "Search query (books + series via PostgreSQL full-text)"),
("library_id" = Option<String>, Query, description = "Filter by library ID"),
("type" = Option<String>, Query, description = "Filter by type (cbz, cbr, pdf)"),
("type" = Option<String>, Query, description = "Filter by type (cbz, cbr, pdf, epub)"),
("kind" = Option<String>, Query, description = "Filter by kind (alias for type)"),
("limit" = Option<usize>, Query, description = "Max results per type (max 100)"),
),

View File

@@ -115,6 +115,7 @@ export function BookCard({ book, readingStatus }: BookCardProps) {
${(book.format ?? book.kind) === 'cbz' ? 'bg-success/10 text-success' : ''}
${(book.format ?? book.kind) === 'cbr' ? 'bg-warning/10 text-warning' : ''}
${(book.format ?? book.kind) === 'pdf' ? 'bg-destructive/10 text-destructive' : ''}
${(book.format ?? book.kind) === 'epub' ? 'bg-info/10 text-info' : ''}
`}>
{book.format ?? book.kind}
</span>

View File

@@ -0,0 +1,44 @@
"use client";
import { useEffect, useRef } from "react";
import { useRouter } from "next/navigation";
interface JobDetailLiveProps {
jobId: string;
isTerminal: boolean;
}
export function JobDetailLive({ jobId, isTerminal }: JobDetailLiveProps) {
const router = useRouter();
const isTerminalRef = useRef(isTerminal);
isTerminalRef.current = isTerminal;
useEffect(() => {
if (isTerminalRef.current) return;
const eventSource = new EventSource(`/api/jobs/${jobId}/stream`);
eventSource.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
router.refresh();
if (data.status === "success" || data.status === "failed" || data.status === "cancelled") {
eventSource.close();
}
} catch {
// ignore parse errors
}
};
eventSource.onerror = () => {
eventSource.close();
};
return () => {
eventSource.close();
};
}, [jobId, router]);
return null;
}

View File

@@ -1,3 +1,5 @@
export const dynamic = "force-dynamic";
import { notFound } from "next/navigation";
import Link from "next/link";
import { apiFetch, getMetadataBatchReport, getMetadataBatchResults, getMetadataRefreshReport, MetadataBatchReportDto, MetadataBatchResultDto, MetadataRefreshReportDto } from "../../../lib/api";
@@ -5,6 +7,7 @@ import {
Card, CardHeader, CardTitle, CardDescription, CardContent,
StatusBadge, JobTypeBadge, StatBox, ProgressBar
} from "../../components/ui";
import { JobDetailLive } from "../../components/JobDetailLive";
import { getServerTranslations } from "../../../lib/i18n/server";
interface JobDetailPageProps {
@@ -158,6 +161,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
const isCompleted = job.status === "success";
const isFailed = job.status === "failed";
const isCancelled = job.status === "cancelled";
const isTerminal = isCompleted || isFailed || isCancelled;
const isExtractingPages = job.status === "extracting_pages";
const isThumbnailPhase = job.status === "generating_thumbnails";
const isPhase2 = isExtractingPages || isThumbnailPhase;
@@ -199,6 +203,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
return (
<>
<JobDetailLive jobId={id} isTerminal={isTerminal} />
<div className="mb-6">
<Link
href="/jobs"

View File

@@ -1,6 +1,6 @@
{
"name": "stripstream-backoffice",
"version": "1.15.0",
"version": "1.16.0",
"private": true,
"scripts": {
"dev": "next dev -p 7082",

View File

@@ -290,6 +290,7 @@ fn book_format_from_str(s: &str) -> Option<BookFormat> {
"cbz" => Some(BookFormat::Cbz),
"cbr" => Some(BookFormat::Cbr),
"pdf" => Some(BookFormat::Pdf),
"epub" => Some(BookFormat::Epub),
_ => None,
}
}

View File

@@ -40,7 +40,7 @@ pub fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime<Utc>) -> Res
pub fn kind_from_format(format: BookFormat) -> &'static str {
match format {
BookFormat::Pdf => "ebook",
BookFormat::Pdf | BookFormat::Epub => "ebook",
BookFormat::Cbz | BookFormat::Cbr => "comic",
}
}

View File

@@ -9,6 +9,7 @@ pub enum BookFormat {
Cbz,
Cbr,
Pdf,
Epub,
}
impl BookFormat {
@@ -17,6 +18,7 @@ impl BookFormat {
Self::Cbz => "cbz",
Self::Cbr => "cbr",
Self::Pdf => "pdf",
Self::Epub => "epub",
}
}
}
@@ -35,6 +37,7 @@ pub fn detect_format(path: &Path) -> Option<BookFormat> {
"cbz" => Some(BookFormat::Cbz),
"cbr" => Some(BookFormat::Cbr),
"pdf" => Some(BookFormat::Pdf),
"epub" => Some(BookFormat::Epub),
_ => None,
}
}
@@ -144,6 +147,7 @@ pub fn parse_metadata(
BookFormat::Cbz => parse_cbz_page_count(path).ok(),
BookFormat::Cbr => parse_cbr_page_count(path).ok(),
BookFormat::Pdf => parse_pdf_page_count(path).ok(),
BookFormat::Epub => parse_epub_page_count(path).ok(),
};
Ok(meta)
@@ -156,6 +160,7 @@ pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> R
BookFormat::Cbz => analyze_cbz(path, true),
BookFormat::Cbr => analyze_cbr(path, true),
BookFormat::Pdf => analyze_pdf(path, pdf_render_scale),
BookFormat::Epub => analyze_epub(path),
}
}
@@ -530,6 +535,7 @@ pub fn list_archive_images(path: &Path, format: BookFormat) -> Result<Vec<String
BookFormat::Cbz => list_cbz_images(path),
BookFormat::Cbr => list_cbr_images(path),
BookFormat::Pdf => Err(anyhow::anyhow!("list_archive_images not applicable for PDF")),
BookFormat::Epub => get_epub_image_index(path),
}
}
@@ -629,6 +635,7 @@ pub fn extract_image_by_name(path: &Path, format: BookFormat, image_name: &str)
BookFormat::Cbz => extract_cbz_by_name(path, image_name),
BookFormat::Cbr => extract_cbr_by_name(path, image_name),
BookFormat::Pdf => Err(anyhow::anyhow!("use extract_page for PDF")),
BookFormat::Epub => extract_cbz_by_name(path, image_name),
}
}
@@ -721,6 +728,7 @@ pub fn extract_page(path: &Path, format: BookFormat, page_number: u32, pdf_rende
let width = if pdf_render_width == 0 { 1200 } else { pdf_render_width };
render_pdf_page_n(path, page_number, width)
}
BookFormat::Epub => extract_epub_page(path, page_number),
}
}
@@ -894,6 +902,340 @@ fn render_pdf_page_n(path: &Path, page_number: u32, width: u32) -> Result<Vec<u8
}
// ============================================================
// EPUB support — spine-aware image index with cache
// ============================================================
/// Cache of ordered image paths per EPUB file. Avoids re-parsing OPF/XHTML on every page request.
static EPUB_INDEX_CACHE: OnceLock<Mutex<HashMap<PathBuf, Vec<String>>>> = OnceLock::new();
fn epub_index_cache() -> &'static Mutex<HashMap<PathBuf, Vec<String>>> {
EPUB_INDEX_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
}
// Pre-compiled regex patterns for EPUB XML parsing (compiled once on first use)
static RE_EPUB_ROOTFILE: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_ITEM: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_ITEMREF: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_IMG_SRC: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_SVG_HREF: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_ATTR_ID: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_ATTR_HREF: OnceLock<regex::Regex> = OnceLock::new();
static RE_EPUB_ATTR_MEDIA: OnceLock<regex::Regex> = OnceLock::new();
struct EpubManifestItem {
href: String,
media_type: String,
}
/// Build the ordered list of image paths for an EPUB file.
/// Walks the OPF spine to determine reading order, parses XHTML/SVG pages
/// for image references, and falls back to CBZ-style listing if no
/// images are found through the spine.
fn build_epub_image_index(path: &Path) -> Result<Vec<String>> {
let file = std::fs::File::open(path)
.with_context(|| format!("cannot open epub: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file)
.with_context(|| format!("invalid epub zip: {}", path.display()))?;
// 1. Find OPF path from META-INF/container.xml
let opf_path = {
let mut entry = archive
.by_name("META-INF/container.xml")
.context("missing META-INF/container.xml — not a valid EPUB")?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
let xml = String::from_utf8_lossy(&buf);
let re = RE_EPUB_ROOTFILE.get_or_init(|| {
regex::Regex::new(r#"<(?:\w+:)?rootfile[^>]+full-path="([^"]+)""#).unwrap()
});
re.captures(&xml)
.and_then(|c| c.get(1))
.map(|m| decode_xml_entities(m.as_str()))
.context("no rootfile found in container.xml")?
};
let opf_dir = std::path::Path::new(&opf_path)
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// 2. Parse OPF manifest + spine
let (manifest, spine_idrefs) = {
let mut entry = archive
.by_name(&opf_path)
.with_context(|| format!("missing OPF file: {}", opf_path))?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
let xml = String::from_utf8_lossy(&buf);
parse_epub_opf(&xml, &opf_dir)?
};
// 3. Walk spine entries to build ordered image list
let re_img = RE_EPUB_IMG_SRC.get_or_init(|| {
regex::Regex::new(r#"(?i)<img\s[^>]*src=["']([^"']+)["']"#).unwrap()
});
let re_svg = RE_EPUB_SVG_HREF.get_or_init(|| {
regex::Regex::new(r#"(?i)<image\s[^>]*(?:xlink:)?href=["']([^"']+)["']"#).unwrap()
});
let mut images: Vec<String> = Vec::new();
let mut seen = std::collections::HashSet::new();
for idref in &spine_idrefs {
let item = match manifest.get(idref.as_str()) {
Some(item) => item,
None => continue,
};
// Direct raster image in spine (rare but possible)
if item.media_type.starts_with("image/") && !item.media_type.contains("svg") {
if seen.insert(item.href.clone()) {
images.push(item.href.clone());
}
continue;
}
// Read XHTML/SVG content — entry is dropped at end of match arm, releasing archive borrow
let content = match archive.by_name(&item.href) {
Ok(mut entry) => {
let mut buf = Vec::new();
match entry.read_to_end(&mut buf) {
Ok(_) => String::from_utf8_lossy(&buf).to_string(),
Err(_) => continue,
}
}
Err(_) => continue,
};
let content_dir = std::path::Path::new(&item.href)
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// Extract <img src="..."> and <image [xlink:]href="...">
for re in [re_img, re_svg] {
for cap in re.captures_iter(&content) {
if let Some(src) = cap.get(1) {
let src_str = src.as_str();
if src_str.starts_with("data:") {
continue;
}
let decoded = decode_xml_entities(&percent_decode_epub(src_str));
let resolved = resolve_epub_path(&content_dir, &decoded);
if seen.insert(resolved.clone()) {
images.push(resolved);
}
}
}
}
}
// 4. Fallback: no images from spine → list all images in ZIP (CBZ-style)
if images.is_empty() {
for i in 0..archive.len() {
if let Ok(entry) = archive.by_index(i) {
let name = entry.name().to_string();
if is_image_name(&name.to_ascii_lowercase()) && seen.insert(name.clone()) {
images.push(name);
}
}
}
images.sort_by(|a, b| natord::compare(a, b));
}
if images.is_empty() {
return Err(anyhow::anyhow!("no images found in epub: {}", path.display()));
}
Ok(images)
}
fn parse_epub_opf(
xml: &str,
opf_dir: &str,
) -> Result<(HashMap<String, EpubManifestItem>, Vec<String>)> {
let re_item = RE_EPUB_ITEM.get_or_init(|| {
regex::Regex::new(r#"(?s)<(?:\w+:)?item\s([^>]+?)/?>"#).unwrap()
});
let re_itemref = RE_EPUB_ITEMREF.get_or_init(|| {
regex::Regex::new(r#"<(?:\w+:)?itemref\s[^>]*idref="([^"]+)""#).unwrap()
});
let re_id = RE_EPUB_ATTR_ID.get_or_init(|| {
regex::Regex::new(r#"(?:^|\s)id="([^"]+)""#).unwrap()
});
let re_href = RE_EPUB_ATTR_HREF.get_or_init(|| {
regex::Regex::new(r#"(?:^|\s)href="([^"]+)""#).unwrap()
});
let re_media = RE_EPUB_ATTR_MEDIA.get_or_init(|| {
regex::Regex::new(r#"media-type="([^"]+)""#).unwrap()
});
let mut manifest: HashMap<String, EpubManifestItem> = HashMap::new();
for cap in re_item.captures_iter(xml) {
if let Some(attrs) = cap.get(1) {
let a = attrs.as_str();
let id = re_id.captures(a).and_then(|c| c.get(1));
let href = re_href.captures(a).and_then(|c| c.get(1));
let media = re_media.captures(a).and_then(|c| c.get(1));
if let (Some(id), Some(href), Some(media)) = (id, href, media) {
let decoded_href = decode_xml_entities(&percent_decode_epub(href.as_str()));
let resolved = resolve_epub_path(opf_dir, &decoded_href);
manifest.insert(
id.as_str().to_string(),
EpubManifestItem {
href: resolved,
media_type: media.as_str().to_string(),
},
);
}
}
}
let spine_idrefs: Vec<String> = re_itemref
.captures_iter(xml)
.filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
.collect();
Ok((manifest, spine_idrefs))
}
/// Get the cached image index for an EPUB, building it on first access.
fn get_epub_image_index(path: &Path) -> Result<Vec<String>> {
{
let cache = epub_index_cache().lock().unwrap();
if let Some(names) = cache.get(path) {
return Ok(names.clone());
}
}
let images = build_epub_image_index(path)?;
{
let mut cache = epub_index_cache().lock().unwrap();
cache.insert(path.to_path_buf(), images.clone());
}
Ok(images)
}
fn parse_epub_page_count(path: &Path) -> Result<i32> {
let images = build_epub_image_index(path)?;
Ok(images.len() as i32)
}
fn analyze_epub(path: &Path) -> Result<(i32, Vec<u8>)> {
let images = get_epub_image_index(path)?;
let count = images.len() as i32;
let file = std::fs::File::open(path)
.with_context(|| format!("cannot open epub: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file)?;
for img_path in &images {
if let Ok(mut entry) = archive.by_name(img_path) {
let mut buf = Vec::new();
if entry.read_to_end(&mut buf).is_ok() && !buf.is_empty() {
return Ok((count, buf));
}
}
}
Err(anyhow::anyhow!(
"no readable images in epub: {}",
path.display()
))
}
fn extract_epub_page(path: &Path, page_number: u32) -> Result<Vec<u8>> {
let images = get_epub_image_index(path)?;
let index = page_number as usize - 1;
let img_path = images
.get(index)
.with_context(|| {
format!(
"page {} out of range (total: {})",
page_number,
images.len()
)
})?;
let file = std::fs::File::open(path)
.with_context(|| format!("cannot open epub: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file)?;
let mut entry = archive
.by_name(img_path)
.with_context(|| format!("image '{}' not found in epub", img_path))?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
Ok(buf)
}
// --- EPUB path/encoding helpers ---
fn resolve_epub_path(base_dir: &str, href: &str) -> String {
if let Some(stripped) = href.strip_prefix('/') {
return normalize_epub_path(stripped);
}
if base_dir.is_empty() {
return normalize_epub_path(href);
}
normalize_epub_path(&format!("{}/{}", base_dir, href))
}
fn normalize_epub_path(path: &str) -> String {
let mut parts: Vec<&str> = Vec::new();
for part in path.split('/') {
match part {
".." => {
parts.pop();
}
"." | "" => {}
_ => parts.push(part),
}
}
parts.join("/")
}
fn percent_decode_epub(s: &str) -> String {
if !s.contains('%') {
return s.to_string();
}
let bytes = s.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let (Some(h), Some(l)) = (epub_hex_val(bytes[i + 1]), epub_hex_val(bytes[i + 2])) {
result.push(h * 16 + l);
i += 3;
continue;
}
}
result.push(bytes[i]);
i += 1;
}
String::from_utf8_lossy(&result).to_string()
}
fn epub_hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
fn decode_xml_entities(s: &str) -> String {
if !s.contains('&') {
return s.to_string();
}
s.replace("&amp;", "&")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&quot;", "\"")
.replace("&apos;", "'")
}
/// Convert a CBR file to CBZ in-place (same directory, same stem).
///
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then

View File

@@ -0,0 +1,10 @@
-- Add EPUB to allowed format values in book_files and books tables.
-- PostgreSQL CHECK constraints are dropped+recreated (no ALTER CONSTRAINT).
-- book_files.format
ALTER TABLE book_files DROP CONSTRAINT IF EXISTS book_files_format_check;
ALTER TABLE book_files ADD CONSTRAINT book_files_format_check CHECK (format IN ('pdf', 'cbz', 'cbr', 'epub'));
-- books.format (denormalized column added in 0020)
ALTER TABLE books DROP CONSTRAINT IF EXISTS books_format_check;
ALTER TABLE books ADD CONSTRAINT books_format_check CHECK (format IN ('pdf', 'cbz', 'cbr', 'epub'));