Compare commits
3 Commits
f2d9bedcc7
...
3bd2fb7c1f
| Author | SHA1 | Date | |
|---|---|---|---|
| 3bd2fb7c1f | |||
| 3b6cc2903d | |||
| 6abaa96fba |
155
Cargo.lock
generated
155
Cargo.lock
generated
@@ -62,6 +62,8 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"image",
|
"image",
|
||||||
"lru",
|
"lru",
|
||||||
|
"natord",
|
||||||
|
"pdfium-render",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -75,10 +77,10 @@ dependencies = [
|
|||||||
"tower-http",
|
"tower-http",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"unrar",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"utoipa-swagger-ui",
|
"utoipa-swagger-ui",
|
||||||
"uuid",
|
"uuid",
|
||||||
"walkdir",
|
|
||||||
"webp",
|
"webp",
|
||||||
"zip 2.4.2",
|
"zip 2.4.2",
|
||||||
]
|
]
|
||||||
@@ -369,6 +371,26 @@ dependencies = [
|
|||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console_error_panic_hook"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console_log"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "const-oid"
|
name = "const-oid"
|
||||||
version = "0.9.6"
|
version = "0.9.6"
|
||||||
@@ -1224,6 +1246,15 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.17"
|
version = "1.0.17"
|
||||||
@@ -1291,6 +1322,16 @@ version = "0.2.182"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libm"
|
name = "libm"
|
||||||
version = "0.2.16"
|
version = "0.2.16"
|
||||||
@@ -1404,6 +1445,12 @@ version = "0.7.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "maybe-owned"
|
||||||
|
version = "0.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "md-5"
|
name = "md-5"
|
||||||
version = "0.10.6"
|
version = "0.10.6"
|
||||||
@@ -1632,11 +1679,12 @@ name = "parsers"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"image",
|
||||||
"lopdf",
|
"lopdf",
|
||||||
"natord",
|
"natord",
|
||||||
|
"pdfium-render",
|
||||||
"regex",
|
"regex",
|
||||||
"uuid",
|
"unrar",
|
||||||
"walkdir",
|
|
||||||
"zip 2.4.2",
|
"zip 2.4.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1651,6 +1699,32 @@ dependencies = [
|
|||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdfium-render"
|
||||||
|
version = "0.8.37"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6553f6604a52b3203db7b4e9d51eb4dd193cf455af9e56d40cab6575b547b679"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.11.0",
|
||||||
|
"bytemuck",
|
||||||
|
"bytes",
|
||||||
|
"chrono",
|
||||||
|
"console_error_panic_hook",
|
||||||
|
"console_log",
|
||||||
|
"image",
|
||||||
|
"itertools",
|
||||||
|
"js-sys",
|
||||||
|
"libloading",
|
||||||
|
"log",
|
||||||
|
"maybe-owned",
|
||||||
|
"once_cell",
|
||||||
|
"utf16string",
|
||||||
|
"vecmath",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pem-rfc7468"
|
name = "pem-rfc7468"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
@@ -1678,6 +1752,12 @@ version = "0.1.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "piston-float"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pkcs1"
|
name = "pkcs1"
|
||||||
version = "0.7.5"
|
version = "0.7.5"
|
||||||
@@ -2940,6 +3020,29 @@ version = "0.2.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unrar"
|
||||||
|
version = "0.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "92ec61343a630d2b50d13216dea5125e157d3fc180a7d3f447d22fe146b648fc"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.11.0",
|
||||||
|
"regex",
|
||||||
|
"unrar_sys",
|
||||||
|
"widestring",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unrar_sys"
|
||||||
|
version = "0.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b77675b883cfbe6bf41e6b7a5cd6008e0a83ba497de3d96e41a064bbeead765"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "untrusted"
|
name = "untrusted"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
@@ -2958,6 +3061,15 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf16string"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8_iter"
|
name = "utf8_iter"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
@@ -3028,6 +3140,15 @@ version = "0.2.15"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "vecmath"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a"
|
||||||
|
dependencies = [
|
||||||
|
"piston-float",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "version_check"
|
name = "version_check"
|
||||||
version = "0.9.5"
|
version = "0.9.5"
|
||||||
@@ -3240,6 +3361,28 @@ dependencies = [
|
|||||||
"wasite",
|
"wasite",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "widestring"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-util"
|
name = "winapi-util"
|
||||||
version = "0.1.11"
|
version = "0.1.11"
|
||||||
@@ -3249,6 +3392,12 @@ dependencies = [
|
|||||||
"windows-sys 0.61.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-core"
|
name = "windows-core"
|
||||||
version = "0.62.2"
|
version = "0.62.2"
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ tracing = "0.1"
|
|||||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
uuid = { version = "1.12", features = ["serde", "v4"] }
|
uuid = { version = "1.12", features = ["serde", "v4"] }
|
||||||
natord = "1.0"
|
natord = "1.0"
|
||||||
|
pdfium-render = { version = "0.8", default-features = false, features = ["pdfium_latest", "image_latest", "thread_safe"] }
|
||||||
|
unrar = "0.5"
|
||||||
walkdir = "2.5"
|
walkdir = "2.5"
|
||||||
webp = "0.3"
|
webp = "0.3"
|
||||||
utoipa = "4.0"
|
utoipa = "4.0"
|
||||||
|
|||||||
@@ -28,8 +28,10 @@ tower-http = { version = "0.6", features = ["cors"] }
|
|||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
tracing-subscriber.workspace = true
|
tracing-subscriber.workspace = true
|
||||||
uuid.workspace = true
|
uuid.workspace = true
|
||||||
|
natord.workspace = true
|
||||||
|
pdfium-render.workspace = true
|
||||||
|
unrar.workspace = true
|
||||||
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
||||||
utoipa.workspace = true
|
utoipa.workspace = true
|
||||||
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
|
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
|
||||||
webp.workspace = true
|
webp.workspace = true
|
||||||
walkdir = "2"
|
|
||||||
|
|||||||
@@ -22,12 +22,26 @@ RUN --mount=type=cache,target=/sccache \
|
|||||||
cargo install sqlx-cli --no-default-features --features postgres --locked
|
cargo install sqlx-cli --no-default-features --features postgres --locked
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ca-certificates wget unar poppler-utils locales postgresql-client \
|
ca-certificates wget locales postgresql-client \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
||||||
ENV LANG=en_US.UTF-8
|
ENV LANG=en_US.UTF-8
|
||||||
ENV LC_ALL=en_US.UTF-8
|
ENV LC_ALL=en_US.UTF-8
|
||||||
|
|
||||||
|
# Download pdfium shared library (replaces pdftoppm subprocess)
|
||||||
|
RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
|
case "$ARCH" in \
|
||||||
|
amd64) PDFIUM_ARCH="linux-x64" ;; \
|
||||||
|
arm64) PDFIUM_ARCH="linux-arm64" ;; \
|
||||||
|
*) echo "Unsupported arch: $ARCH" && exit 1 ;; \
|
||||||
|
esac && \
|
||||||
|
wget -q "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-${PDFIUM_ARCH}.tgz" -O /tmp/pdfium.tgz && \
|
||||||
|
tar -xzf /tmp/pdfium.tgz -C /tmp && \
|
||||||
|
cp /tmp/lib/libpdfium.so /usr/local/lib/ && \
|
||||||
|
rm -rf /tmp/pdfium.tgz /tmp/lib /tmp/include && \
|
||||||
|
ldconfig
|
||||||
COPY --from=builder /app/target/release/api /usr/local/bin/api
|
COPY --from=builder /app/target/release/api /usr/local/bin/api
|
||||||
COPY --from=builder /usr/local/cargo/bin/sqlx /usr/local/bin/sqlx
|
COPY --from=builder /usr/local/cargo/bin/sqlx /usr/local/bin/sqlx
|
||||||
COPY infra/migrations /app/migrations
|
COPY infra/migrations /app/migrations
|
||||||
|
|||||||
@@ -65,6 +65,8 @@ pub struct IndexJobDetailResponse {
|
|||||||
pub finished_at: Option<DateTime<Utc>>,
|
pub finished_at: Option<DateTime<Utc>>,
|
||||||
#[schema(value_type = Option<String>)]
|
#[schema(value_type = Option<String>)]
|
||||||
pub phase2_started_at: Option<DateTime<Utc>>,
|
pub phase2_started_at: Option<DateTime<Utc>>,
|
||||||
|
#[schema(value_type = Option<String>)]
|
||||||
|
pub generating_thumbnails_started_at: Option<DateTime<Utc>>,
|
||||||
pub stats_json: Option<serde_json::Value>,
|
pub stats_json: Option<serde_json::Value>,
|
||||||
pub error_opt: Option<String>,
|
pub error_opt: Option<String>,
|
||||||
#[schema(value_type = String)]
|
#[schema(value_type = String)]
|
||||||
@@ -324,6 +326,7 @@ fn map_row_detail(row: sqlx::postgres::PgRow) -> IndexJobDetailResponse {
|
|||||||
started_at: row.get("started_at"),
|
started_at: row.get("started_at"),
|
||||||
finished_at: row.get("finished_at"),
|
finished_at: row.get("finished_at"),
|
||||||
phase2_started_at: row.try_get("phase2_started_at").ok().flatten(),
|
phase2_started_at: row.try_get("phase2_started_at").ok().flatten(),
|
||||||
|
generating_thumbnails_started_at: row.try_get("generating_thumbnails_started_at").ok().flatten(),
|
||||||
stats_json: row.get("stats_json"),
|
stats_json: row.get("stats_json"),
|
||||||
error_opt: row.get("error_opt"),
|
error_opt: row.get("error_opt"),
|
||||||
created_at: row.get("created_at"),
|
created_at: row.get("created_at"),
|
||||||
@@ -350,7 +353,7 @@ pub async fn get_active_jobs(State(state): State<AppState>) -> Result<Json<Vec<I
|
|||||||
let rows = sqlx::query(
|
let rows = sqlx::query(
|
||||||
"SELECT id, library_id, book_id, type, status, started_at, finished_at, stats_json, error_opt, created_at, progress_percent, processed_files, total_files
|
"SELECT id, library_id, book_id, type, status, started_at, finished_at, stats_json, error_opt, created_at, progress_percent, processed_files, total_files
|
||||||
FROM index_jobs
|
FROM index_jobs
|
||||||
WHERE status IN ('pending', 'running', 'generating_thumbnails')
|
WHERE status IN ('pending', 'running', 'extracting_pages', 'generating_thumbnails')
|
||||||
ORDER BY created_at ASC"
|
ORDER BY created_at ASC"
|
||||||
)
|
)
|
||||||
.fetch_all(&state.pool)
|
.fetch_all(&state.pool)
|
||||||
@@ -380,7 +383,7 @@ pub async fn get_job_details(
|
|||||||
id: axum::extract::Path<Uuid>,
|
id: axum::extract::Path<Uuid>,
|
||||||
) -> Result<Json<IndexJobDetailResponse>, ApiError> {
|
) -> Result<Json<IndexJobDetailResponse>, ApiError> {
|
||||||
let row = sqlx::query(
|
let row = sqlx::query(
|
||||||
"SELECT id, library_id, book_id, type, status, started_at, finished_at, phase2_started_at,
|
"SELECT id, library_id, book_id, type, status, started_at, finished_at, phase2_started_at, generating_thumbnails_started_at,
|
||||||
stats_json, error_opt, created_at, current_file, progress_percent, total_files, processed_files
|
stats_json, error_opt, created_at, current_file, progress_percent, total_files, processed_files
|
||||||
FROM index_jobs WHERE id = $1"
|
FROM index_jobs WHERE id = $1"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ use sha2::{Digest, Sha256};
|
|||||||
use sqlx::Row;
|
use sqlx::Row;
|
||||||
use tracing::{debug, error, info, instrument, warn};
|
use tracing::{debug, error, info, instrument, warn};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use walkdir::WalkDir;
|
|
||||||
|
|
||||||
use crate::{error::ApiError, state::AppState};
|
use crate::{error::ApiError, state::AppState};
|
||||||
|
|
||||||
@@ -389,7 +388,7 @@ fn extract_cbz_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiErro
|
|||||||
image_names.push(entry.name().to_string());
|
image_names.push(entry.name().to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
image_names.sort();
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
|
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
|
||||||
|
|
||||||
let index = page_number as usize - 1;
|
let index = page_number as usize - 1;
|
||||||
@@ -413,107 +412,94 @@ fn extract_cbz_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiErro
|
|||||||
|
|
||||||
fn extract_cbr_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
|
fn extract_cbr_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
|
||||||
info!("Opening CBR archive: {}", abs_path);
|
info!("Opening CBR archive: {}", abs_path);
|
||||||
|
|
||||||
let index = page_number as usize - 1;
|
let index = page_number as usize - 1;
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-{}", Uuid::new_v4()));
|
|
||||||
debug!("Creating temp dir for CBR extraction: {}", tmp_dir.display());
|
|
||||||
|
|
||||||
std::fs::create_dir_all(&tmp_dir).map_err(|e| {
|
// Pass 1: list all image names (in-process, no subprocess)
|
||||||
error!("Cannot create temp dir: {}", e);
|
let mut image_names: Vec<String> = {
|
||||||
ApiError::internal(format!("temp dir error: {}", e))
|
let archive = unrar::Archive::new(abs_path)
|
||||||
})?;
|
.open_for_listing()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar listing failed: {}", e)))?;
|
||||||
|
let mut names = Vec::new();
|
||||||
|
for entry in archive {
|
||||||
|
let entry = entry.map_err(|e| ApiError::internal(format!("unrar entry error: {}", e)))?;
|
||||||
|
let name = entry.filename.to_string_lossy().to_string();
|
||||||
|
if is_image_name(&name.to_ascii_lowercase()) {
|
||||||
|
names.push(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
names
|
||||||
|
};
|
||||||
|
|
||||||
// Extract directly - skip listing which fails on UTF-16 encoded filenames
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
let extract_output = std::process::Command::new("env")
|
|
||||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
|
||||||
.arg(&tmp_dir)
|
|
||||||
.arg(abs_path)
|
|
||||||
.output()
|
|
||||||
.map_err(|e| {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("unar extract failed: {}", e);
|
|
||||||
ApiError::internal(format!("unar extract failed: {e}"))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
if !extract_output.status.success() {
|
let target = image_names
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
.get(index)
|
||||||
let stderr = String::from_utf8_lossy(&extract_output.stderr);
|
.ok_or_else(|| {
|
||||||
error!("unar extract failed {}: {}", abs_path, stderr);
|
error!("Page {} out of range (total: {})", page_number, image_names.len());
|
||||||
return Err(ApiError::internal("unar extract failed"));
|
ApiError::not_found("page out of range")
|
||||||
|
})?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// Pass 2: extract only the target page to memory
|
||||||
|
let mut archive = unrar::Archive::new(abs_path)
|
||||||
|
.open_for_processing()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar processing failed: {}", e)))?;
|
||||||
|
|
||||||
|
while let Some(header) = archive
|
||||||
|
.read_header()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar read header: {}", e)))?
|
||||||
|
{
|
||||||
|
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||||
|
if entry_name == target {
|
||||||
|
let (data, _) = header
|
||||||
|
.read()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar read: {}", e)))?;
|
||||||
|
info!("Extracted CBR page {} ({} bytes)", page_number, data.len());
|
||||||
|
return Ok(data);
|
||||||
|
}
|
||||||
|
archive = header
|
||||||
|
.skip()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar skip: {}", e)))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find and read the requested image (recursive search for CBR files with subdirectories)
|
Err(ApiError::not_found("page not found in archive"))
|
||||||
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|e| e.ok())
|
|
||||||
.filter(|e| {
|
|
||||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
|
||||||
is_image_name(&name)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
|
|
||||||
|
|
||||||
let selected = image_files.get(index).ok_or_else(|| {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("Page {} not found (total: {})", page_number, image_files.len());
|
|
||||||
ApiError::not_found("page out of range")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let data = std::fs::read(selected.path()).map_err(|e| {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("read failed: {}", e);
|
|
||||||
ApiError::internal(format!("read error: {}", e))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
|
|
||||||
info!("Successfully extracted CBR page {} ({} bytes)", page_number, data.len());
|
|
||||||
Ok(data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
|
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-{}", Uuid::new_v4()));
|
use pdfium_render::prelude::*;
|
||||||
debug!("Creating temp dir for PDF rendering: {}", tmp_dir.display());
|
|
||||||
std::fs::create_dir_all(&tmp_dir).map_err(|e| {
|
|
||||||
error!("Cannot create temp dir {}: {}", tmp_dir.display(), e);
|
|
||||||
ApiError::internal(format!("cannot create temp dir: {e}"))
|
|
||||||
})?;
|
|
||||||
let output_prefix = tmp_dir.join("page");
|
|
||||||
|
|
||||||
let mut cmd = std::process::Command::new("pdftoppm");
|
debug!("Rendering PDF page {} of {} (width: {})", page_number, abs_path, width);
|
||||||
cmd.arg("-f")
|
|
||||||
.arg(page_number.to_string())
|
|
||||||
.arg("-singlefile")
|
|
||||||
.arg("-png");
|
|
||||||
if width > 0 {
|
|
||||||
cmd.arg("-scale-to-x").arg(width.to_string()).arg("-scale-to-y").arg("-1");
|
|
||||||
}
|
|
||||||
cmd.arg(abs_path).arg(&output_prefix);
|
|
||||||
|
|
||||||
debug!("Running pdftoppm for page {} of {} (width: {})", page_number, abs_path, width);
|
let pdfium = Pdfium::new(
|
||||||
let output = cmd
|
Pdfium::bind_to_system_library()
|
||||||
.output()
|
.map_err(|e| ApiError::internal(format!("pdfium not available: {:?}", e)))?,
|
||||||
.map_err(|e| {
|
);
|
||||||
error!("pdftoppm command failed for {} page {}: {}", abs_path, page_number, e);
|
|
||||||
ApiError::internal(format!("pdf render failed: {e}"))
|
|
||||||
})?;
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("pdftoppm failed for {} page {}: {}", abs_path, page_number, stderr);
|
|
||||||
return Err(ApiError::internal("pdf render command failed"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let image_path = output_prefix.with_extension("png");
|
let document = pdfium
|
||||||
debug!("Reading rendered PDF page from: {}", image_path.display());
|
.load_pdf_from_file(abs_path, None)
|
||||||
let bytes = std::fs::read(&image_path).map_err(|e| {
|
.map_err(|e| ApiError::internal(format!("pdf load failed: {:?}", e)))?;
|
||||||
error!("Failed to read rendered PDF output {}: {}", image_path.display(), e);
|
|
||||||
ApiError::internal(format!("render output missing: {e}"))
|
let page_index = (page_number - 1) as u16;
|
||||||
})?;
|
let page = document
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
.pages()
|
||||||
debug!("Successfully rendered PDF page {} to {} bytes", page_number, bytes.len());
|
.get(page_index)
|
||||||
Ok(bytes)
|
.map_err(|_| ApiError::not_found("page out of range"))?;
|
||||||
|
|
||||||
|
let render_width = if width > 0 { width as i32 } else { 1200 };
|
||||||
|
let config = PdfRenderConfig::new().set_target_width(render_width);
|
||||||
|
|
||||||
|
let bitmap = page
|
||||||
|
.render_with_config(&config)
|
||||||
|
.map_err(|e| ApiError::internal(format!("pdf render failed: {:?}", e)))?;
|
||||||
|
|
||||||
|
let image = bitmap.as_image();
|
||||||
|
let mut buf = std::io::Cursor::new(Vec::new());
|
||||||
|
image
|
||||||
|
.write_to(&mut buf, image::ImageFormat::Png)
|
||||||
|
.map_err(|e| ApiError::internal(format!("png encode failed: {}", e)))?;
|
||||||
|
|
||||||
|
debug!("Rendered PDF page {} ({} bytes)", page_number, buf.get_ref().len());
|
||||||
|
Ok(buf.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
|
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
|
||||||
|
|||||||
@@ -87,8 +87,8 @@ export function JobProgress({ jobId, onComplete }: JobProgressProps) {
|
|||||||
const percent = progress.progress_percent ?? 0;
|
const percent = progress.progress_percent ?? 0;
|
||||||
const processed = progress.processed_files ?? 0;
|
const processed = progress.processed_files ?? 0;
|
||||||
const total = progress.total_files ?? 0;
|
const total = progress.total_files ?? 0;
|
||||||
const isThumbnailsPhase = progress.status === "generating_thumbnails";
|
const isPhase2 = progress.status === "extracting_pages" || progress.status === "generating_thumbnails";
|
||||||
const unitLabel = isThumbnailsPhase ? "thumbnails" : "files";
|
const unitLabel = progress.status === "extracting_pages" ? "pages" : progress.status === "generating_thumbnails" ? "thumbnails" : "files";
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="p-4 bg-card rounded-lg border border-border">
|
<div className="p-4 bg-card rounded-lg border border-border">
|
||||||
@@ -112,7 +112,7 @@ export function JobProgress({ jobId, onComplete }: JobProgressProps) {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{progress.stats_json && !isThumbnailsPhase && (
|
{progress.stats_json && !isPhase2 && (
|
||||||
<div className="flex flex-wrap gap-3 text-xs">
|
<div className="flex flex-wrap gap-3 text-xs">
|
||||||
<Badge variant="primary">Scanned: {progress.stats_json.scanned_files}</Badge>
|
<Badge variant="primary">Scanned: {progress.stats_json.scanned_files}</Badge>
|
||||||
<Badge variant="success">Indexed: {progress.stats_json.indexed_files}</Badge>
|
<Badge variant="success">Indexed: {progress.stats_json.indexed_files}</Badge>
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ interface JobRowProps {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, formatDuration }: JobRowProps) {
|
export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, formatDuration }: JobRowProps) {
|
||||||
const isActive = job.status === "running" || job.status === "pending" || job.status === "generating_thumbnails";
|
const isActive = job.status === "running" || job.status === "pending" || job.status === "extracting_pages" || job.status === "generating_thumbnails";
|
||||||
const [showProgress, setShowProgress] = useState(highlighted || isActive);
|
const [showProgress, setShowProgress] = useState(highlighted || isActive);
|
||||||
|
|
||||||
const handleComplete = () => {
|
const handleComplete = () => {
|
||||||
@@ -52,13 +52,14 @@ export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, fo
|
|||||||
const removed = job.stats_json?.removed_files ?? 0;
|
const removed = job.stats_json?.removed_files ?? 0;
|
||||||
const errors = job.stats_json?.errors ?? 0;
|
const errors = job.stats_json?.errors ?? 0;
|
||||||
|
|
||||||
|
const isPhase2 = job.status === "extracting_pages" || job.status === "generating_thumbnails";
|
||||||
const isThumbnailPhase = job.status === "generating_thumbnails";
|
const isThumbnailPhase = job.status === "generating_thumbnails";
|
||||||
const isThumbnailJob = job.type === "thumbnail_rebuild" || job.type === "thumbnail_regenerate";
|
const isThumbnailJob = job.type === "thumbnail_rebuild" || job.type === "thumbnail_regenerate";
|
||||||
const hasThumbnailPhase = isThumbnailPhase || isThumbnailJob;
|
const hasThumbnailPhase = isPhase2 || isThumbnailJob;
|
||||||
|
|
||||||
// Files column: index-phase stats only
|
// Files column: index-phase stats only (Phase 1 discovery)
|
||||||
const filesDisplay =
|
const filesDisplay =
|
||||||
job.status === "running" && !isThumbnailPhase
|
job.status === "running" && !isPhase2
|
||||||
? job.total_files != null
|
? job.total_files != null
|
||||||
? `${job.processed_files ?? 0}/${job.total_files}`
|
? `${job.processed_files ?? 0}/${job.total_files}`
|
||||||
: scanned > 0
|
: scanned > 0
|
||||||
@@ -70,8 +71,8 @@ export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, fo
|
|||||||
? `${scanned} scanned`
|
? `${scanned} scanned`
|
||||||
: "—";
|
: "—";
|
||||||
|
|
||||||
// Thumbnails column
|
// Thumbnails column (Phase 2: extracting_pages + generating_thumbnails)
|
||||||
const thumbInProgress = hasThumbnailPhase && (job.status === "running" || isThumbnailPhase);
|
const thumbInProgress = hasThumbnailPhase && (job.status === "running" || isPhase2);
|
||||||
const thumbDisplay =
|
const thumbDisplay =
|
||||||
thumbInProgress && job.total_files != null
|
thumbInProgress && job.total_files != null
|
||||||
? `${job.processed_files ?? 0}/${job.total_files}`
|
? `${job.processed_files ?? 0}/${job.total_files}`
|
||||||
@@ -128,7 +129,7 @@ export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, fo
|
|||||||
{errors > 0 && <span className="text-error">⚠ {errors}</span>}
|
{errors > 0 && <span className="text-error">⚠ {errors}</span>}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{job.status === "running" && !isThumbnailPhase && job.total_files != null && (
|
{job.status === "running" && !isPhase2 && job.total_files != null && (
|
||||||
<MiniProgressBar value={job.processed_files ?? 0} max={job.total_files} className="w-24" />
|
<MiniProgressBar value={job.processed_files ?? 0} max={job.total_files} className="w-24" />
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
@@ -155,7 +156,7 @@ export function JobRow({ job, libraryName, highlighted, onCancel, formatDate, fo
|
|||||||
>
|
>
|
||||||
View
|
View
|
||||||
</Link>
|
</Link>
|
||||||
{(job.status === "pending" || job.status === "running" || job.status === "generating_thumbnails") && (
|
{(job.status === "pending" || job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && (
|
||||||
<Button
|
<Button
|
||||||
variant="danger"
|
variant="danger"
|
||||||
size="sm"
|
size="sm"
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ export function JobsIndicator() {
|
|||||||
return () => document.removeEventListener("mousedown", handleClickOutside);
|
return () => document.removeEventListener("mousedown", handleClickOutside);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const runningJobs = activeJobs.filter(j => j.status === "running" || j.status === "generating_thumbnails");
|
const runningJobs = activeJobs.filter(j => j.status === "running" || j.status === "extracting_pages" || j.status === "generating_thumbnails");
|
||||||
const pendingJobs = activeJobs.filter(j => j.status === "pending");
|
const pendingJobs = activeJobs.filter(j => j.status === "pending");
|
||||||
const totalCount = activeJobs.length;
|
const totalCount = activeJobs.length;
|
||||||
|
|
||||||
@@ -222,7 +222,7 @@ export function JobsIndicator() {
|
|||||||
>
|
>
|
||||||
<div className="flex items-start gap-3">
|
<div className="flex items-start gap-3">
|
||||||
<div className="mt-0.5">
|
<div className="mt-0.5">
|
||||||
{(job.status === "running" || job.status === "generating_thumbnails") && <span className="animate-spin inline-block">⏳</span>}
|
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && <span className="animate-spin inline-block">⏳</span>}
|
||||||
{job.status === "pending" && <span>⏸</span>}
|
{job.status === "pending" && <span>⏸</span>}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ export function JobsIndicator() {
|
|||||||
</Badge>
|
</Badge>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{(job.status === "running" || job.status === "generating_thumbnails") && job.progress_percent != null && (
|
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && job.progress_percent != null && (
|
||||||
<div className="flex items-center gap-2 mt-2">
|
<div className="flex items-center gap-2 mt-2">
|
||||||
<MiniProgressBar value={job.progress_percent} />
|
<MiniProgressBar value={job.progress_percent} />
|
||||||
<span className="text-xs font-medium text-muted-foreground">{job.progress_percent}%</span>
|
<span className="text-xs font-medium text-muted-foreground">{job.progress_percent}%</span>
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ export function Badge({ children, variant = "default", className = "" }: BadgePr
|
|||||||
// Status badge for jobs/tasks
|
// Status badge for jobs/tasks
|
||||||
const statusVariants: Record<string, BadgeVariant> = {
|
const statusVariants: Record<string, BadgeVariant> = {
|
||||||
running: "in-progress",
|
running: "in-progress",
|
||||||
|
extracting_pages: "in-progress",
|
||||||
generating_thumbnails: "in-progress",
|
generating_thumbnails: "in-progress",
|
||||||
success: "completed",
|
success: "completed",
|
||||||
completed: "completed",
|
completed: "completed",
|
||||||
@@ -70,6 +71,7 @@ const statusVariants: Record<string, BadgeVariant> = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const statusLabels: Record<string, string> = {
|
const statusLabels: Record<string, string> = {
|
||||||
|
extracting_pages: "Extracting pages",
|
||||||
generating_thumbnails: "Thumbnails",
|
generating_thumbnails: "Thumbnails",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ interface JobDetails {
|
|||||||
started_at: string | null;
|
started_at: string | null;
|
||||||
finished_at: string | null;
|
finished_at: string | null;
|
||||||
phase2_started_at: string | null;
|
phase2_started_at: string | null;
|
||||||
|
generating_thumbnails_started_at: string | null;
|
||||||
current_file: string | null;
|
current_file: string | null;
|
||||||
progress_percent: number | null;
|
progress_percent: number | null;
|
||||||
processed_files: number | null;
|
processed_files: number | null;
|
||||||
@@ -123,21 +124,27 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
const isCompleted = job.status === "success";
|
const isCompleted = job.status === "success";
|
||||||
const isFailed = job.status === "failed";
|
const isFailed = job.status === "failed";
|
||||||
const isCancelled = job.status === "cancelled";
|
const isCancelled = job.status === "cancelled";
|
||||||
|
const isExtractingPages = job.status === "extracting_pages";
|
||||||
const isThumbnailPhase = job.status === "generating_thumbnails";
|
const isThumbnailPhase = job.status === "generating_thumbnails";
|
||||||
|
const isPhase2 = isExtractingPages || isThumbnailPhase;
|
||||||
const { isThumbnailOnly } = typeInfo;
|
const { isThumbnailOnly } = typeInfo;
|
||||||
|
|
||||||
// Which label to use for the progress card
|
// Which label to use for the progress card
|
||||||
const progressTitle = isThumbnailOnly
|
const progressTitle = isThumbnailOnly
|
||||||
? "Thumbnails"
|
? "Thumbnails"
|
||||||
: isThumbnailPhase
|
: isExtractingPages
|
||||||
? "Phase 2 — Thumbnails"
|
? "Phase 2 — Extracting pages"
|
||||||
: "Phase 1 — Discovery";
|
: isThumbnailPhase
|
||||||
|
? "Phase 2 — Thumbnails"
|
||||||
|
: "Phase 1 — Discovery";
|
||||||
|
|
||||||
const progressDescription = isThumbnailOnly
|
const progressDescription = isThumbnailOnly
|
||||||
? undefined
|
? undefined
|
||||||
: isThumbnailPhase
|
: isExtractingPages
|
||||||
? "Generating thumbnails for the analyzed books"
|
? "Extracting first page from each archive (page count + raw image)"
|
||||||
: "Scanning and indexing files in the library";
|
: isThumbnailPhase
|
||||||
|
? "Generating thumbnails for the analyzed books"
|
||||||
|
: "Scanning and indexing files in the library";
|
||||||
|
|
||||||
// Speed metric: thumbnail count for thumbnail jobs, scanned files for index jobs
|
// Speed metric: thumbnail count for thumbnail jobs, scanned files for index jobs
|
||||||
const speedCount = isThumbnailOnly
|
const speedCount = isThumbnailOnly
|
||||||
@@ -145,7 +152,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
: (job.stats_json?.scanned_files ?? 0);
|
: (job.stats_json?.scanned_files ?? 0);
|
||||||
|
|
||||||
const showProgressCard =
|
const showProgressCard =
|
||||||
(isCompleted || isFailed || job.status === "running" || isThumbnailPhase) &&
|
(isCompleted || isFailed || job.status === "running" || isPhase2) &&
|
||||||
(job.total_files != null || !!job.current_file);
|
(job.total_files != null || !!job.current_file);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -312,20 +319,44 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Phase 2 start — for index jobs that have two phases */}
|
{/* Phase 2a — Extracting pages (index jobs with phase2) */}
|
||||||
{job.phase2_started_at && (
|
{job.phase2_started_at && !isThumbnailOnly && (
|
||||||
|
<div className="flex items-start gap-4">
|
||||||
|
<div className={`w-3.5 h-3.5 rounded-full mt-0.5 shrink-0 z-10 ${
|
||||||
|
job.generating_thumbnails_started_at || job.finished_at ? "bg-primary" : "bg-primary animate-pulse"
|
||||||
|
}`} />
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<span className="text-sm font-medium text-foreground">Phase 2a — Extracting pages</span>
|
||||||
|
<p className="text-xs text-muted-foreground">{new Date(job.phase2_started_at).toLocaleString()}</p>
|
||||||
|
<p className="text-xs text-primary/80 font-medium mt-0.5">
|
||||||
|
Duration: {formatDuration(job.phase2_started_at, job.generating_thumbnails_started_at ?? job.finished_at ?? null)}
|
||||||
|
{!job.generating_thumbnails_started_at && !job.finished_at && isExtractingPages && (
|
||||||
|
<span className="text-muted-foreground font-normal ml-1">· in progress</span>
|
||||||
|
)}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Phase 2b — Generating thumbnails */}
|
||||||
|
{(job.generating_thumbnails_started_at || (job.phase2_started_at && isThumbnailOnly)) && (
|
||||||
<div className="flex items-start gap-4">
|
<div className="flex items-start gap-4">
|
||||||
<div className={`w-3.5 h-3.5 rounded-full mt-0.5 shrink-0 z-10 ${
|
<div className={`w-3.5 h-3.5 rounded-full mt-0.5 shrink-0 z-10 ${
|
||||||
job.finished_at ? "bg-primary" : "bg-primary animate-pulse"
|
job.finished_at ? "bg-primary" : "bg-primary animate-pulse"
|
||||||
}`} />
|
}`} />
|
||||||
<div className="flex-1 min-w-0">
|
<div className="flex-1 min-w-0">
|
||||||
<span className="text-sm font-medium text-foreground">
|
<span className="text-sm font-medium text-foreground">
|
||||||
{isThumbnailOnly ? "Thumbnails" : "Phase 2 — Thumbnails"}
|
{isThumbnailOnly ? "Thumbnails" : "Phase 2b — Generating thumbnails"}
|
||||||
</span>
|
</span>
|
||||||
<p className="text-xs text-muted-foreground">{new Date(job.phase2_started_at).toLocaleString()}</p>
|
<p className="text-xs text-muted-foreground">
|
||||||
{job.finished_at && (
|
{(job.generating_thumbnails_started_at ? new Date(job.generating_thumbnails_started_at) : job.phase2_started_at ? new Date(job.phase2_started_at) : null)?.toLocaleString()}
|
||||||
|
</p>
|
||||||
|
{(job.generating_thumbnails_started_at || job.finished_at) && (
|
||||||
<p className="text-xs text-primary/80 font-medium mt-0.5">
|
<p className="text-xs text-primary/80 font-medium mt-0.5">
|
||||||
Duration: {formatDuration(job.phase2_started_at, job.finished_at)}
|
Duration: {formatDuration(
|
||||||
|
job.generating_thumbnails_started_at ?? job.phase2_started_at!,
|
||||||
|
job.finished_at ?? null
|
||||||
|
)}
|
||||||
{job.total_files != null && job.total_files > 0 && (
|
{job.total_files != null && job.total_files > 0 && (
|
||||||
<span className="text-muted-foreground font-normal ml-1">
|
<span className="text-muted-foreground font-normal ml-1">
|
||||||
· {job.processed_files ?? job.total_files} thumbnails
|
· {job.processed_files ?? job.total_files} thumbnails
|
||||||
@@ -333,6 +364,9 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
)}
|
)}
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
|
{!job.finished_at && isThumbnailPhase && (
|
||||||
|
<span className="text-xs text-muted-foreground">in progress</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
@@ -393,7 +427,7 @@ export default async function JobDetailPage({ params }: JobDetailPageProps) {
|
|||||||
<div className="grid grid-cols-3 gap-4">
|
<div className="grid grid-cols-3 gap-4">
|
||||||
<StatBox
|
<StatBox
|
||||||
value={job.processed_files ?? 0}
|
value={job.processed_files ?? 0}
|
||||||
label={isThumbnailOnly || isThumbnailPhase ? "Generated" : "Processed"}
|
label={isThumbnailOnly || isPhase2 ? "Generated" : "Processed"}
|
||||||
variant="primary"
|
variant="primary"
|
||||||
/>
|
/>
|
||||||
<StatBox value={job.total_files} label="Total" />
|
<StatBox value={job.total_files} label="Total" />
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -21,11 +21,24 @@ RUN --mount=type=cache,target=/sccache \
|
|||||||
cargo build --release -p indexer
|
cargo build --release -p indexer
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ca-certificates wget \
|
ca-certificates wget \
|
||||||
unrar-free unar \
|
|
||||||
poppler-utils \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Download pdfium shared library (replaces pdftoppm + pdfinfo subprocesses)
|
||||||
|
RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
|
case "$ARCH" in \
|
||||||
|
amd64) PDFIUM_ARCH="linux-x64" ;; \
|
||||||
|
arm64) PDFIUM_ARCH="linux-arm64" ;; \
|
||||||
|
*) echo "Unsupported arch: $ARCH" && exit 1 ;; \
|
||||||
|
esac && \
|
||||||
|
wget -q "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-${PDFIUM_ARCH}.tgz" -O /tmp/pdfium.tgz && \
|
||||||
|
tar -xzf /tmp/pdfium.tgz -C /tmp && \
|
||||||
|
cp /tmp/lib/libpdfium.so /usr/local/lib/ && \
|
||||||
|
rm -rf /tmp/pdfium.tgz /tmp/lib /tmp/include && \
|
||||||
|
ldconfig
|
||||||
|
|
||||||
COPY --from=builder /app/target/release/indexer /usr/local/bin/indexer
|
COPY --from=builder /app/target/release/indexer /usr/local/bin/indexer
|
||||||
EXPOSE 7081
|
EXPOSE 7081
|
||||||
CMD ["/usr/local/bin/indexer"]
|
CMD ["/usr/local/bin/indexer"]
|
||||||
|
|||||||
@@ -103,17 +103,32 @@ fn generate_thumbnail(image_bytes: &[u8], config: &ThumbnailConfig) -> anyhow::R
|
|||||||
Ok(webp_data.to_vec())
|
Ok(webp_data.to_vec())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_thumbnail(
|
/// Save raw image bytes (as extracted from the archive) without any processing.
|
||||||
|
fn save_raw_image(book_id: Uuid, raw_bytes: &[u8], directory: &str) -> anyhow::Result<String> {
|
||||||
|
let dir = Path::new(directory);
|
||||||
|
std::fs::create_dir_all(dir)?;
|
||||||
|
let path = dir.join(format!("{}.raw", book_id));
|
||||||
|
std::fs::write(&path, raw_bytes)?;
|
||||||
|
Ok(path.to_string_lossy().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resize the raw image and save it as a WebP thumbnail, overwriting the raw file.
|
||||||
|
fn resize_raw_to_webp(
|
||||||
book_id: Uuid,
|
book_id: Uuid,
|
||||||
thumbnail_bytes: &[u8],
|
raw_path: &str,
|
||||||
config: &ThumbnailConfig,
|
config: &ThumbnailConfig,
|
||||||
) -> anyhow::Result<String> {
|
) -> anyhow::Result<String> {
|
||||||
let dir = Path::new(&config.directory);
|
let raw_bytes = std::fs::read(raw_path)
|
||||||
std::fs::create_dir_all(dir)?;
|
.map_err(|e| anyhow::anyhow!("failed to read raw image {}: {}", raw_path, e))?;
|
||||||
let filename = format!("{}.webp", book_id);
|
let webp_bytes = generate_thumbnail(&raw_bytes, config)?;
|
||||||
let path = dir.join(&filename);
|
|
||||||
std::fs::write(&path, thumbnail_bytes)?;
|
let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
|
||||||
Ok(path.to_string_lossy().to_string())
|
std::fs::write(&webp_path, &webp_bytes)?;
|
||||||
|
|
||||||
|
// Delete the raw file now that the WebP is written
|
||||||
|
let _ = std::fs::remove_file(raw_path);
|
||||||
|
|
||||||
|
Ok(webp_path.to_string_lossy().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn book_format_from_str(s: &str) -> Option<BookFormat> {
|
fn book_format_from_str(s: &str) -> Option<BookFormat> {
|
||||||
@@ -125,7 +140,14 @@ fn book_format_from_str(s: &str) -> Option<BookFormat> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Phase 2 — Analysis: open each unanalyzed archive once, extract page_count + thumbnail.
|
/// Phase 2 — Two-sub-phase analysis:
|
||||||
|
///
|
||||||
|
/// **Sub-phase A (extracting_pages)**: open each archive once, extract (page_count, raw_image_bytes),
|
||||||
|
/// save the raw bytes to `{directory}/{book_id}.raw`. I/O bound — runs at `concurrent_renders`.
|
||||||
|
///
|
||||||
|
/// **Sub-phase B (generating_thumbnails)**: load each `.raw` file, resize and encode as WebP,
|
||||||
|
/// overwrite as `{directory}/{book_id}.webp`. CPU bound — runs at `concurrent_renders`.
|
||||||
|
///
|
||||||
/// `thumbnail_only` = true: only process books missing thumbnail (page_count may already be set).
|
/// `thumbnail_only` = true: only process books missing thumbnail (page_count may already be set).
|
||||||
/// `thumbnail_only` = false: process books missing page_count.
|
/// `thumbnail_only` = false: process books missing page_count.
|
||||||
pub async fn analyze_library_books(
|
pub async fn analyze_library_books(
|
||||||
@@ -143,7 +165,6 @@ pub async fn analyze_library_books(
|
|||||||
|
|
||||||
let concurrency = load_thumbnail_concurrency(&state.pool).await;
|
let concurrency = load_thumbnail_concurrency(&state.pool).await;
|
||||||
|
|
||||||
// Query books that need analysis
|
|
||||||
let query_filter = if thumbnail_only {
|
let query_filter = if thumbnail_only {
|
||||||
"b.thumbnail_path IS NULL"
|
"b.thumbnail_path IS NULL"
|
||||||
} else {
|
} else {
|
||||||
@@ -177,19 +198,7 @@ pub async fn analyze_library_books(
|
|||||||
total, thumbnail_only, concurrency
|
total, thumbnail_only, concurrency
|
||||||
);
|
);
|
||||||
|
|
||||||
// Update job status
|
|
||||||
let _ = sqlx::query(
|
|
||||||
"UPDATE index_jobs SET status = 'generating_thumbnails', total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
|
||||||
)
|
|
||||||
.bind(job_id)
|
|
||||||
.bind(total)
|
|
||||||
.execute(&state.pool)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let processed_count = Arc::new(AtomicI32::new(0));
|
|
||||||
let cancelled_flag = Arc::new(AtomicBool::new(false));
|
let cancelled_flag = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
// Background task: poll DB every 2s to detect cancellation
|
|
||||||
let cancel_pool = state.pool.clone();
|
let cancel_pool = state.pool.clone();
|
||||||
let cancel_flag_for_poller = cancelled_flag.clone();
|
let cancel_flag_for_poller = cancelled_flag.clone();
|
||||||
let cancel_handle = tokio::spawn(async move {
|
let cancel_handle = tokio::spawn(async move {
|
||||||
@@ -221,43 +230,56 @@ pub async fn analyze_library_books(
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
stream::iter(tasks)
|
// -------------------------------------------------------------------------
|
||||||
.for_each_concurrent(concurrency, |task| {
|
// Sub-phase A: extract first page from each archive and store raw image
|
||||||
let processed_count = processed_count.clone();
|
// I/O bound — limited by HDD throughput, runs at `concurrency`
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
let phase_a_start = std::time::Instant::now();
|
||||||
|
let _ = sqlx::query(
|
||||||
|
"UPDATE index_jobs SET status = 'extracting_pages', total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
||||||
|
)
|
||||||
|
.bind(job_id)
|
||||||
|
.bind(total)
|
||||||
|
.execute(&state.pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let extracted_count = Arc::new(AtomicI32::new(0));
|
||||||
|
|
||||||
|
// Collected results: (book_id, raw_path, page_count)
|
||||||
|
let extracted: Vec<(Uuid, String, i32)> = stream::iter(tasks)
|
||||||
|
.map(|task| {
|
||||||
let pool = state.pool.clone();
|
let pool = state.pool.clone();
|
||||||
let config = config.clone();
|
let config = config.clone();
|
||||||
let cancelled = cancelled_flag.clone();
|
let cancelled = cancelled_flag.clone();
|
||||||
|
let extracted_count = extracted_count.clone();
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
if cancelled.load(Ordering::Relaxed) {
|
if cancelled.load(Ordering::Relaxed) {
|
||||||
return;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let local_path = utils::remap_libraries_path(&task.abs_path);
|
let local_path = utils::remap_libraries_path(&task.abs_path);
|
||||||
let path = Path::new(&local_path);
|
let path = std::path::Path::new(&local_path);
|
||||||
|
let book_id = task.book_id;
|
||||||
|
|
||||||
let format = match book_format_from_str(&task.format) {
|
let format = match book_format_from_str(&task.format) {
|
||||||
Some(f) => f,
|
Some(f) => f,
|
||||||
None => {
|
None => {
|
||||||
warn!("[ANALYZER] Unknown format '{}' for book {}", task.format, task.book_id);
|
warn!("[ANALYZER] Unknown format '{}' for book {}", task.format, book_id);
|
||||||
return;
|
return None;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Run blocking archive I/O on a thread pool
|
|
||||||
let book_id = task.book_id;
|
|
||||||
let path_owned = path.to_path_buf();
|
|
||||||
let pdf_scale = config.width.max(config.height);
|
let pdf_scale = config.width.max(config.height);
|
||||||
let analyze_result = tokio::task::spawn_blocking(move || {
|
let path_owned = path.to_path_buf();
|
||||||
analyze_book(&path_owned, format, pdf_scale)
|
let analyze_result =
|
||||||
})
|
tokio::task::spawn_blocking(move || analyze_book(&path_owned, format, pdf_scale))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let (page_count, image_bytes) = match analyze_result {
|
let (page_count, raw_bytes) = match analyze_result {
|
||||||
Ok(Ok(result)) => result,
|
Ok(Ok(result)) => result,
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
warn!("[ANALYZER] analyze_book failed for book {}: {}", book_id, e);
|
warn!("[ANALYZER] analyze_book failed for book {}: {}", book_id, e);
|
||||||
// Mark parse_status = error in book_files
|
|
||||||
let _ = sqlx::query(
|
let _ = sqlx::query(
|
||||||
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE book_id = $1",
|
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE book_id = $1",
|
||||||
)
|
)
|
||||||
@@ -265,66 +287,125 @@ pub async fn analyze_library_books(
|
|||||||
.bind(e.to_string())
|
.bind(e.to_string())
|
||||||
.execute(&pool)
|
.execute(&pool)
|
||||||
.await;
|
.await;
|
||||||
return;
|
return None;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("[ANALYZER] spawn_blocking error for book {}: {}", book_id, e);
|
warn!("[ANALYZER] spawn_blocking error for book {}: {}", book_id, e);
|
||||||
return;
|
return None;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generate thumbnail
|
// Save raw bytes to disk (no resize, no encode)
|
||||||
let thumb_result = tokio::task::spawn_blocking({
|
let raw_path = match tokio::task::spawn_blocking({
|
||||||
let config = config.clone();
|
let dir = config.directory.clone();
|
||||||
move || generate_thumbnail(&image_bytes, &config)
|
let bytes = raw_bytes.clone();
|
||||||
|
move || save_raw_image(book_id, &bytes, &dir)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(Ok(p)) => p,
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
warn!("[ANALYZER] save_raw_image failed for book {}: {}", book_id, e);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("[ANALYZER] spawn_blocking save_raw error for book {}: {}", book_id, e);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Update page_count in DB
|
||||||
|
if let Err(e) = sqlx::query("UPDATE books SET page_count = $1 WHERE id = $2")
|
||||||
|
.bind(page_count)
|
||||||
|
.bind(book_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
warn!("[ANALYZER] DB page_count update failed for book {}: {}", book_id, e);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let processed = extracted_count.fetch_add(1, Ordering::Relaxed) + 1;
|
||||||
|
let percent = (processed as f64 / total as f64 * 50.0) as i32; // first 50%
|
||||||
|
let _ = sqlx::query(
|
||||||
|
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
||||||
|
)
|
||||||
|
.bind(job_id)
|
||||||
|
.bind(processed)
|
||||||
|
.bind(percent)
|
||||||
|
.execute(&pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
Some((book_id, raw_path, page_count))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.buffer_unordered(concurrency)
|
||||||
|
.filter_map(|x| async move { x })
|
||||||
|
.collect()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if cancelled_flag.load(Ordering::Relaxed) {
|
||||||
|
cancel_handle.abort();
|
||||||
|
info!("[ANALYZER] Job {} cancelled during extraction phase", job_id);
|
||||||
|
return Err(anyhow::anyhow!("Job cancelled by user"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let extracted_total = extracted.len() as i32;
|
||||||
|
let phase_a_elapsed = phase_a_start.elapsed();
|
||||||
|
info!(
|
||||||
|
"[ANALYZER] Sub-phase A complete: {}/{} books extracted in {:.1}s ({:.0} ms/book)",
|
||||||
|
extracted_total,
|
||||||
|
total,
|
||||||
|
phase_a_elapsed.as_secs_f64(),
|
||||||
|
if extracted_total > 0 { phase_a_elapsed.as_millis() as f64 / extracted_total as f64 } else { 0.0 }
|
||||||
|
);
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Sub-phase B: resize raw images and encode as WebP
|
||||||
|
// CPU bound — can run at higher concurrency than I/O phase
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
let phase_b_start = std::time::Instant::now();
|
||||||
|
let _ = sqlx::query(
|
||||||
|
"UPDATE index_jobs SET status = 'generating_thumbnails', generating_thumbnails_started_at = NOW(), total_files = $2, processed_files = 0, current_file = NULL WHERE id = $1",
|
||||||
|
)
|
||||||
|
.bind(job_id)
|
||||||
|
.bind(extracted_total)
|
||||||
|
.execute(&state.pool)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let resize_count = Arc::new(AtomicI32::new(0));
|
||||||
|
|
||||||
|
stream::iter(extracted)
|
||||||
|
.for_each_concurrent(concurrency, |(book_id, raw_path, page_count)| {
|
||||||
|
let pool = state.pool.clone();
|
||||||
|
let config = config.clone();
|
||||||
|
let cancelled = cancelled_flag.clone();
|
||||||
|
let resize_count = resize_count.clone();
|
||||||
|
|
||||||
|
async move {
|
||||||
|
if cancelled.load(Ordering::Relaxed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let raw_path_clone = raw_path.clone();
|
||||||
|
let thumb_result = tokio::task::spawn_blocking(move || {
|
||||||
|
resize_raw_to_webp(book_id, &raw_path_clone, &config)
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let thumb_bytes = match thumb_result {
|
let thumb_path = match thumb_result {
|
||||||
Ok(Ok(b)) => b,
|
|
||||||
Ok(Err(e)) => {
|
|
||||||
warn!("[ANALYZER] thumbnail generation failed for book {}: {}", book_id, e);
|
|
||||||
// Still update page_count even if thumbnail fails
|
|
||||||
let _ = sqlx::query(
|
|
||||||
"UPDATE books SET page_count = $1 WHERE id = $2",
|
|
||||||
)
|
|
||||||
.bind(page_count)
|
|
||||||
.bind(book_id)
|
|
||||||
.execute(&pool)
|
|
||||||
.await;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
warn!("[ANALYZER] spawn_blocking thumbnail error for book {}: {}", book_id, e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Save thumbnail file
|
|
||||||
let save_result = {
|
|
||||||
let config = config.clone();
|
|
||||||
tokio::task::spawn_blocking(move || save_thumbnail(book_id, &thumb_bytes, &config))
|
|
||||||
.await
|
|
||||||
};
|
|
||||||
|
|
||||||
let thumb_path = match save_result {
|
|
||||||
Ok(Ok(p)) => p,
|
Ok(Ok(p)) => p,
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
warn!("[ANALYZER] save_thumbnail failed for book {}: {}", book_id, e);
|
warn!("[ANALYZER] resize_raw_to_webp failed for book {}: {}", book_id, e);
|
||||||
let _ = sqlx::query("UPDATE books SET page_count = $1 WHERE id = $2")
|
// page_count is already set; thumbnail stays NULL
|
||||||
.bind(page_count)
|
|
||||||
.bind(book_id)
|
|
||||||
.execute(&pool)
|
|
||||||
.await;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("[ANALYZER] spawn_blocking save error for book {}: {}", book_id, e);
|
warn!("[ANALYZER] spawn_blocking resize error for book {}: {}", book_id, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Update DB
|
|
||||||
if let Err(e) = sqlx::query(
|
if let Err(e) = sqlx::query(
|
||||||
"UPDATE books SET page_count = $1, thumbnail_path = $2 WHERE id = $3",
|
"UPDATE books SET page_count = $1, thumbnail_path = $2 WHERE id = $3",
|
||||||
)
|
)
|
||||||
@@ -334,12 +415,13 @@ pub async fn analyze_library_books(
|
|||||||
.execute(&pool)
|
.execute(&pool)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
warn!("[ANALYZER] DB update failed for book {}: {}", book_id, e);
|
warn!("[ANALYZER] DB thumbnail update failed for book {}: {}", book_id, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let processed = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
|
let processed = resize_count.fetch_add(1, Ordering::Relaxed) + 1;
|
||||||
let percent = (processed as f64 / total as f64 * 100.0) as i32;
|
let percent =
|
||||||
|
50 + (processed as f64 / extracted_total as f64 * 50.0) as i32; // last 50%
|
||||||
let _ = sqlx::query(
|
let _ = sqlx::query(
|
||||||
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
"UPDATE index_jobs SET processed_files = $2, progress_percent = $3 WHERE id = $1",
|
||||||
)
|
)
|
||||||
@@ -355,14 +437,24 @@ pub async fn analyze_library_books(
|
|||||||
cancel_handle.abort();
|
cancel_handle.abort();
|
||||||
|
|
||||||
if cancelled_flag.load(Ordering::Relaxed) {
|
if cancelled_flag.load(Ordering::Relaxed) {
|
||||||
info!("[ANALYZER] Job {} cancelled by user, stopping analysis", job_id);
|
info!("[ANALYZER] Job {} cancelled during resize phase", job_id);
|
||||||
return Err(anyhow::anyhow!("Job cancelled by user"));
|
return Err(anyhow::anyhow!("Job cancelled by user"));
|
||||||
}
|
}
|
||||||
|
|
||||||
let final_count = processed_count.load(Ordering::Relaxed);
|
let final_count = resize_count.load(Ordering::Relaxed);
|
||||||
|
let phase_b_elapsed = phase_b_start.elapsed();
|
||||||
info!(
|
info!(
|
||||||
"[ANALYZER] Analysis complete: {}/{} books processed",
|
"[ANALYZER] Sub-phase B complete: {}/{} thumbnails generated in {:.1}s ({:.0} ms/book)",
|
||||||
final_count, total
|
final_count,
|
||||||
|
extracted_total,
|
||||||
|
phase_b_elapsed.as_secs_f64(),
|
||||||
|
if final_count > 0 { phase_b_elapsed.as_millis() as f64 / final_count as f64 } else { 0.0 }
|
||||||
|
);
|
||||||
|
info!(
|
||||||
|
"[ANALYZER] Total: {:.1}s (extraction {:.1}s + resize {:.1}s)",
|
||||||
|
(phase_a_elapsed + phase_b_elapsed).as_secs_f64(),
|
||||||
|
phase_a_elapsed.as_secs_f64(),
|
||||||
|
phase_b_elapsed.as_secs_f64(),
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -376,7 +468,6 @@ pub async fn regenerate_thumbnails(
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let config = load_thumbnail_config(&state.pool).await;
|
let config = load_thumbnail_config(&state.pool).await;
|
||||||
|
|
||||||
// Delete thumbnail files for all books in scope
|
|
||||||
let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
|
let book_ids_to_clear: Vec<Uuid> = sqlx::query_scalar(
|
||||||
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
|
r#"SELECT id FROM books WHERE (library_id = $1 OR $1 IS NULL) AND thumbnail_path IS NOT NULL"#,
|
||||||
)
|
)
|
||||||
@@ -387,34 +478,26 @@ pub async fn regenerate_thumbnails(
|
|||||||
|
|
||||||
let mut deleted_count = 0usize;
|
let mut deleted_count = 0usize;
|
||||||
for book_id in &book_ids_to_clear {
|
for book_id in &book_ids_to_clear {
|
||||||
let filename = format!("{}.webp", book_id);
|
// Delete WebP thumbnail
|
||||||
let thumbnail_path = Path::new(&config.directory).join(&filename);
|
let webp_path = Path::new(&config.directory).join(format!("{}.webp", book_id));
|
||||||
if thumbnail_path.exists() {
|
if webp_path.exists() {
|
||||||
if let Err(e) = std::fs::remove_file(&thumbnail_path) {
|
if let Err(e) = std::fs::remove_file(&webp_path) {
|
||||||
warn!(
|
warn!("[ANALYZER] Failed to delete thumbnail {}: {}", webp_path.display(), e);
|
||||||
"[ANALYZER] Failed to delete thumbnail {}: {}",
|
|
||||||
thumbnail_path.display(),
|
|
||||||
e
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
deleted_count += 1;
|
deleted_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Delete raw file if it exists (interrupted previous run)
|
||||||
|
let raw_path = Path::new(&config.directory).join(format!("{}.raw", book_id));
|
||||||
|
let _ = std::fs::remove_file(&raw_path);
|
||||||
}
|
}
|
||||||
info!(
|
info!("[ANALYZER] Deleted {} thumbnail files for regeneration", deleted_count);
|
||||||
"[ANALYZER] Deleted {} thumbnail files for regeneration",
|
|
||||||
deleted_count
|
|
||||||
);
|
|
||||||
|
|
||||||
// Clear thumbnail_path in DB
|
sqlx::query(r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#)
|
||||||
sqlx::query(
|
.bind(library_id)
|
||||||
r#"UPDATE books SET thumbnail_path = NULL WHERE (library_id = $1 OR $1 IS NULL)"#,
|
.execute(&state.pool)
|
||||||
)
|
.await?;
|
||||||
.bind(library_id)
|
|
||||||
.execute(&state.pool)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
// Re-analyze all books (now thumbnail_path IS NULL for all)
|
|
||||||
analyze_library_books(state, job_id, library_id, true).await
|
analyze_library_books(state, job_id, library_id, true).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -422,16 +505,13 @@ pub async fn regenerate_thumbnails(
|
|||||||
pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
|
pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
|
||||||
let config = load_thumbnail_config(&state.pool).await;
|
let config = load_thumbnail_config(&state.pool).await;
|
||||||
|
|
||||||
// Load ALL book IDs across all libraries — we need the complete set to avoid
|
let existing_book_ids: std::collections::HashSet<Uuid> =
|
||||||
// deleting thumbnails that belong to other libraries during a per-library rebuild.
|
sqlx::query_scalar(r#"SELECT id FROM books"#)
|
||||||
let existing_book_ids: std::collections::HashSet<Uuid> = sqlx::query_scalar(
|
.fetch_all(&state.pool)
|
||||||
r#"SELECT id FROM books"#,
|
.await
|
||||||
)
|
.unwrap_or_default()
|
||||||
.fetch_all(&state.pool)
|
.into_iter()
|
||||||
.await
|
.collect();
|
||||||
.unwrap_or_default()
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let thumbnail_dir = Path::new(&config.directory);
|
let thumbnail_dir = Path::new(&config.directory);
|
||||||
if !thumbnail_dir.exists() {
|
if !thumbnail_dir.exists() {
|
||||||
@@ -441,21 +521,23 @@ pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
|
|||||||
let mut deleted_count = 0usize;
|
let mut deleted_count = 0usize;
|
||||||
if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
|
if let Ok(entries) = std::fs::read_dir(thumbnail_dir) {
|
||||||
for entry in entries.flatten() {
|
for entry in entries.flatten() {
|
||||||
if let Some(file_name) = entry.file_name().to_str() {
|
let file_name = entry.file_name();
|
||||||
if file_name.ends_with(".webp") {
|
let file_name = file_name.to_string_lossy();
|
||||||
if let Some(book_id_str) = file_name.strip_suffix(".webp") {
|
// Clean up both .webp and orphaned .raw files
|
||||||
if let Ok(book_id) = Uuid::parse_str(book_id_str) {
|
let stem = if let Some(s) = file_name.strip_suffix(".webp") {
|
||||||
if !existing_book_ids.contains(&book_id) {
|
Some(s.to_string())
|
||||||
if let Err(e) = std::fs::remove_file(entry.path()) {
|
} else if let Some(s) = file_name.strip_suffix(".raw") {
|
||||||
warn!(
|
Some(s.to_string())
|
||||||
"Failed to delete orphaned thumbnail {}: {}",
|
} else {
|
||||||
entry.path().display(),
|
None
|
||||||
e
|
};
|
||||||
);
|
if let Some(book_id_str) = stem {
|
||||||
} else {
|
if let Ok(book_id) = Uuid::parse_str(&book_id_str) {
|
||||||
deleted_count += 1;
|
if !existing_book_ids.contains(&book_id) {
|
||||||
}
|
if let Err(e) = std::fs::remove_file(entry.path()) {
|
||||||
}
|
warn!("Failed to delete orphaned file {}: {}", entry.path().display(), e);
|
||||||
|
} else {
|
||||||
|
deleted_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -463,9 +545,6 @@ pub async fn cleanup_orphaned_thumbnails(state: &AppState) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(
|
info!("[ANALYZER] Deleted {} orphaned thumbnail files", deleted_count);
|
||||||
"[ANALYZER] Deleted {} orphaned thumbnail files",
|
|
||||||
deleted_count
|
|
||||||
);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
natord.workspace = true
|
image.workspace = true
|
||||||
lopdf = "0.35"
|
lopdf = "0.35"
|
||||||
|
natord.workspace = true
|
||||||
|
pdfium-render.workspace = true
|
||||||
regex = "1"
|
regex = "1"
|
||||||
uuid.workspace = true
|
unrar.workspace = true
|
||||||
walkdir.workspace = true
|
|
||||||
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::Command;
|
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
use uuid::Uuid;
|
|
||||||
use walkdir::WalkDir;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum BookFormat {
|
pub enum BookFormat {
|
||||||
@@ -152,8 +149,7 @@ pub fn parse_metadata(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Open an archive once and return (page_count, first_page_bytes).
|
/// Open an archive once and return (page_count, first_page_bytes).
|
||||||
/// This is more efficient than calling parse_metadata + extract_first_page separately.
|
/// `pdf_render_scale`: max dimension used for PDF rasterization; 0 means use default (400).
|
||||||
/// `pdf_render_scale`: max dimension (width or height) used by pdftoppm; 0 means use default (400).
|
|
||||||
pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
pub fn analyze_book(path: &Path, format: BookFormat, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||||
match format {
|
match format {
|
||||||
BookFormat::Cbz => analyze_cbz(path),
|
BookFormat::Cbz => analyze_cbz(path),
|
||||||
@@ -189,105 +185,98 @@ fn analyze_cbz(path: &Path) -> Result<(i32, Vec<u8>)> {
|
|||||||
Ok((count, buf))
|
Ok((count, buf))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn list_cbr_images(path: &Path) -> Result<Vec<String>> {
|
|
||||||
// Try unrar lb first (fast)
|
|
||||||
let output = std::process::Command::new("unrar")
|
|
||||||
.arg("lb")
|
|
||||||
.arg(path)
|
|
||||||
.output()
|
|
||||||
.with_context(|| format!("failed to execute unrar lb for {}", path.display()))?;
|
|
||||||
|
|
||||||
if output.status.success() {
|
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
||||||
let mut images: Vec<String> = stdout
|
|
||||||
.lines()
|
|
||||||
.map(|l| l.trim().to_string())
|
|
||||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
|
||||||
.collect();
|
|
||||||
if !images.is_empty() {
|
|
||||||
images.sort_by(|a, b| natord::compare(a, b));
|
|
||||||
return Ok(images);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: lsar (from unar package) handles UTF-16BE encoded filenames
|
|
||||||
let lsar_output = std::process::Command::new("lsar")
|
|
||||||
.arg(path)
|
|
||||||
.output()
|
|
||||||
.with_context(|| format!("failed to execute lsar for {}", path.display()))?;
|
|
||||||
|
|
||||||
if !lsar_output.status.success() {
|
|
||||||
return Err(anyhow::anyhow!(
|
|
||||||
"both unrar lb and lsar failed for {}",
|
|
||||||
path.display()
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let stdout = String::from_utf8_lossy(&lsar_output.stdout);
|
|
||||||
// lsar output: first line is archive info, then one file per line (indented)
|
|
||||||
let mut images: Vec<String> = stdout
|
|
||||||
.lines()
|
|
||||||
.skip(1) // skip the archive header line
|
|
||||||
.map(|l| l.trim().to_string())
|
|
||||||
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
|
|
||||||
.collect();
|
|
||||||
images.sort_by(|a, b| natord::compare(a, b));
|
|
||||||
|
|
||||||
Ok(images)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn analyze_cbr(path: &Path) -> Result<(i32, Vec<u8>)> {
|
fn analyze_cbr(path: &Path) -> Result<(i32, Vec<u8>)> {
|
||||||
let mut image_names = list_cbr_images(path)?;
|
// Pass 1: list all image names via unrar (in-process, no subprocess)
|
||||||
image_names.sort();
|
let mut image_names: Vec<String> = {
|
||||||
|
let archive = unrar::Archive::new(path)
|
||||||
|
.open_for_listing()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?;
|
||||||
|
let mut names = Vec::new();
|
||||||
|
for entry in archive {
|
||||||
|
let entry = entry.map_err(|e| anyhow::anyhow!("unrar entry error: {}", e))?;
|
||||||
|
let name = entry.filename.to_string_lossy().to_string();
|
||||||
|
if is_image_name(&name.to_ascii_lowercase()) {
|
||||||
|
names.push(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
names
|
||||||
|
};
|
||||||
|
|
||||||
let count = image_names.len() as i32;
|
if image_names.is_empty() {
|
||||||
if count == 0 {
|
|
||||||
return Err(anyhow::anyhow!("no images found in cbr: {}", path.display()));
|
return Err(anyhow::anyhow!("no images found in cbr: {}", path.display()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let first_name = &image_names[0];
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
|
let count = image_names.len() as i32;
|
||||||
|
let first_name = image_names[0].clone();
|
||||||
|
|
||||||
// Try unrar p to extract first image to stdout (faster — no temp dir)
|
// Pass 2: extract first image to memory
|
||||||
let p_output = std::process::Command::new("unrar")
|
let mut archive = unrar::Archive::new(path)
|
||||||
.args(["p", "-inul"])
|
.open_for_processing()
|
||||||
.arg(path)
|
.map_err(|e| anyhow::anyhow!("unrar open for processing failed for {}: {}", path.display(), e))?;
|
||||||
.arg(first_name)
|
|
||||||
.output();
|
|
||||||
|
|
||||||
match p_output {
|
while let Some(header) = archive
|
||||||
Ok(out) if out.status.success() && looks_like_image(&out.stdout) => Ok((count, out.stdout)),
|
.read_header()
|
||||||
_ => {
|
.map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
|
||||||
// Fallback: targeted extraction with unar (handles special chars, encoding issues)
|
{
|
||||||
let image_bytes = extract_cbr_first_page(path, first_name)?;
|
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||||
Ok((count, image_bytes))
|
if entry_name == first_name {
|
||||||
|
let (data, _) = header
|
||||||
|
.read()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar read data: {}", e))?;
|
||||||
|
return Ok((count, data));
|
||||||
}
|
}
|
||||||
|
archive = header
|
||||||
|
.skip()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Check image magic bytes to validate that bytes are a real image before decoding.
|
Err(anyhow::anyhow!(
|
||||||
fn looks_like_image(bytes: &[u8]) -> bool {
|
"could not find '{}' in {}",
|
||||||
if bytes.len() < 12 {
|
first_name,
|
||||||
return false;
|
path.display()
|
||||||
}
|
))
|
||||||
// JPEG: FF D8 FF
|
|
||||||
if bytes.starts_with(&[0xFF, 0xD8, 0xFF]) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// PNG: 89 50 4E 47 0D 0A 1A 0A
|
|
||||||
if bytes.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// WebP: RIFF....WEBP
|
|
||||||
if &bytes[0..4] == b"RIFF" && &bytes[8..12] == b"WEBP" {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
fn analyze_pdf(path: &Path, pdf_render_scale: u32) -> Result<(i32, Vec<u8>)> {
|
||||||
let count = parse_pdf_page_count(path)?;
|
use pdfium_render::prelude::*;
|
||||||
let image_bytes = extract_pdf_first_page(path, pdf_render_scale)?;
|
|
||||||
Ok((count, image_bytes))
|
// Open PDF once — get page count and render first page in a single pass
|
||||||
|
let pdfium = Pdfium::new(
|
||||||
|
Pdfium::bind_to_system_library()
|
||||||
|
.map_err(|e| anyhow::anyhow!("pdfium library not available: {:?}", e))?,
|
||||||
|
);
|
||||||
|
|
||||||
|
let document = pdfium
|
||||||
|
.load_pdf_from_file(path, None)
|
||||||
|
.map_err(|e| anyhow::anyhow!("pdfium load failed for {}: {:?}", path.display(), e))?;
|
||||||
|
|
||||||
|
let count = document.pages().len() as i32;
|
||||||
|
if count == 0 {
|
||||||
|
return Err(anyhow::anyhow!("PDF has no pages: {}", path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale } as i32;
|
||||||
|
let config = PdfRenderConfig::new()
|
||||||
|
.set_target_width(scale)
|
||||||
|
.set_maximum_height(scale);
|
||||||
|
|
||||||
|
let page = document
|
||||||
|
.pages()
|
||||||
|
.get(0)
|
||||||
|
.map_err(|e| anyhow::anyhow!("cannot get first page of {}: {:?}", path.display(), e))?;
|
||||||
|
|
||||||
|
let bitmap = page
|
||||||
|
.render_with_config(&config)
|
||||||
|
.map_err(|e| anyhow::anyhow!("pdfium render failed for {}: {:?}", path.display(), e))?;
|
||||||
|
|
||||||
|
let image = bitmap.as_image();
|
||||||
|
let mut buf = std::io::Cursor::new(Vec::new());
|
||||||
|
image
|
||||||
|
.write_to(&mut buf, image::ImageFormat::Png)
|
||||||
|
.context("failed to encode rendered PDF page as PNG")?;
|
||||||
|
|
||||||
|
Ok((count, buf.into_inner()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_cbz_page_count(path: &Path) -> Result<i32> {
|
fn parse_cbz_page_count(path: &Path) -> Result<i32> {
|
||||||
@@ -306,34 +295,23 @@ fn parse_cbz_page_count(path: &Path) -> Result<i32> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_cbr_page_count(path: &Path) -> Result<i32> {
|
fn parse_cbr_page_count(path: &Path) -> Result<i32> {
|
||||||
let images = list_cbr_images(path)?;
|
let archive = unrar::Archive::new(path)
|
||||||
Ok(images.len() as i32)
|
.open_for_listing()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar listing failed for {}: {}", path.display(), e))?;
|
||||||
|
let count = archive
|
||||||
|
.filter(|r| {
|
||||||
|
r.as_ref()
|
||||||
|
.map(|e| is_image_name(&e.filename.to_string_lossy().to_ascii_lowercase()))
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
|
.count() as i32;
|
||||||
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_pdf_page_count(path: &Path) -> Result<i32> {
|
fn parse_pdf_page_count(path: &Path) -> Result<i32> {
|
||||||
let output = std::process::Command::new("pdfinfo")
|
let doc = lopdf::Document::load(path)
|
||||||
.arg(path)
|
.with_context(|| format!("cannot open pdf: {}", path.display()))?;
|
||||||
.output()
|
Ok(doc.get_pages().len() as i32)
|
||||||
.with_context(|| format!("failed to execute pdfinfo for {}", path.display()))?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
return Err(anyhow::anyhow!("pdfinfo failed for {}", path.display()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
||||||
for line in stdout.lines() {
|
|
||||||
if line.starts_with("Pages:") {
|
|
||||||
if let Some(pages_str) = line.split_whitespace().nth(1) {
|
|
||||||
return pages_str
|
|
||||||
.parse::<i32>()
|
|
||||||
.with_context(|| format!("cannot parse page count: {}", pages_str));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(anyhow::anyhow!(
|
|
||||||
"could not find page count in pdfinfo output"
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_image_name(name: &str) -> bool {
|
fn is_image_name(name: &str) -> bool {
|
||||||
@@ -351,13 +329,8 @@ fn is_image_name(name: &str) -> bool {
|
|||||||
pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
|
pub fn extract_first_page(path: &Path, format: BookFormat) -> Result<Vec<u8>> {
|
||||||
match format {
|
match format {
|
||||||
BookFormat::Cbz => extract_cbz_first_page(path),
|
BookFormat::Cbz => extract_cbz_first_page(path),
|
||||||
BookFormat::Cbr => {
|
BookFormat::Cbr => analyze_cbr(path).map(|(_, bytes)| bytes),
|
||||||
let mut image_names = list_cbr_images(path)?;
|
BookFormat::Pdf => analyze_pdf(path, 0).map(|(_, bytes)| bytes),
|
||||||
image_names.sort();
|
|
||||||
let first_name = image_names.into_iter().next().context("no images found in cbr")?;
|
|
||||||
extract_cbr_first_page(path, &first_name)
|
|
||||||
}
|
|
||||||
BookFormat::Pdf => extract_pdf_first_page(path, 0),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -386,98 +359,13 @@ fn extract_cbz_first_page(path: &Path) -> Result<Vec<u8>> {
|
|||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_cbr_first_page(path: &Path, _first_name: &str) -> Result<Vec<u8>> {
|
|
||||||
let work_dir = std::env::temp_dir().join(format!("stripstream-cbr-thumb-{}", Uuid::new_v4()));
|
|
||||||
let extract_dir = work_dir.join("out");
|
|
||||||
std::fs::create_dir_all(&extract_dir).context("cannot create temp dir")?;
|
|
||||||
|
|
||||||
// unar constructs internal regexes from (archive_path + "/" + internal_path).
|
|
||||||
// Archive filenames containing regex special chars like `[`, `]`, `(`, `)` cause
|
|
||||||
// XADRegexException. Work around by giving unar a safe symlink name.
|
|
||||||
let safe_path = work_dir.join("archive.cbr");
|
|
||||||
if std::os::unix::fs::symlink(path, &safe_path).is_err() {
|
|
||||||
// Cross-filesystem fallback: copy (slower but safe)
|
|
||||||
std::fs::copy(path, &safe_path).context("cannot copy cbr to temp dir")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let output = std::process::Command::new("env")
|
|
||||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
|
||||||
.arg(&extract_dir)
|
|
||||||
.arg(&safe_path)
|
|
||||||
.output()
|
|
||||||
.context("unar failed")?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let _ = std::fs::remove_dir_all(&work_dir);
|
|
||||||
return Err(anyhow::anyhow!(
|
|
||||||
"unar extract failed: {:?}",
|
|
||||||
String::from_utf8_lossy(&output.stderr)
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut image_files: Vec<_> = WalkDir::new(&extract_dir)
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|e| e.ok())
|
|
||||||
.filter(|e| {
|
|
||||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
|
||||||
is_image_name(&name)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
|
||||||
|
|
||||||
let first_image = image_files.first().context("no images found in cbr")?;
|
|
||||||
|
|
||||||
let data = std::fs::read(first_image.path())?;
|
|
||||||
let _ = std::fs::remove_dir_all(&work_dir);
|
|
||||||
Ok(data)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract_pdf_first_page(path: &Path, pdf_render_scale: u32) -> Result<Vec<u8>> {
|
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-thumb-{}", Uuid::new_v4()));
|
|
||||||
std::fs::create_dir_all(&tmp_dir)?;
|
|
||||||
let output_prefix = tmp_dir.join("page");
|
|
||||||
let scale = if pdf_render_scale == 0 { 400 } else { pdf_render_scale };
|
|
||||||
let scale_str = scale.to_string();
|
|
||||||
|
|
||||||
let output = Command::new("pdftoppm")
|
|
||||||
.args([
|
|
||||||
"-f",
|
|
||||||
"1",
|
|
||||||
"-singlefile",
|
|
||||||
"-png",
|
|
||||||
"-scale-to",
|
|
||||||
&scale_str,
|
|
||||||
path.to_str().unwrap(),
|
|
||||||
output_prefix.to_str().unwrap(),
|
|
||||||
])
|
|
||||||
.output()
|
|
||||||
.context("pdftoppm failed")?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
return Err(anyhow::anyhow!("pdftoppm failed"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let image_path = output_prefix.with_extension("png");
|
|
||||||
let data = std::fs::read(&image_path)?;
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
Ok(data)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a CBR file to CBZ in-place (same directory, same stem).
|
/// Convert a CBR file to CBZ in-place (same directory, same stem).
|
||||||
///
|
///
|
||||||
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then
|
/// The conversion is safe: a `.cbz.tmp` file is written first, verified, then
|
||||||
/// atomically renamed to `.cbz`. The original CBR is **not** deleted by this
|
/// atomically renamed to `.cbz`. The original CBR is **not** deleted by this
|
||||||
/// function — the caller is responsible for removing it after a successful DB
|
/// function — the caller is responsible for removing it after a successful DB update.
|
||||||
/// update.
|
|
||||||
///
|
///
|
||||||
/// Returns the path of the newly created `.cbz` file.
|
/// Returns the path of the newly created `.cbz` file.
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
/// - Returns an error if a `.cbz` file with the same stem already exists.
|
|
||||||
/// - Returns an error if extraction, packing, or verification fails.
|
|
||||||
/// - Returns an error if `cbr_path` has no parent directory or no file stem.
|
|
||||||
pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
||||||
let parent = cbr_path
|
let parent = cbr_path
|
||||||
.parent()
|
.parent()
|
||||||
@@ -489,7 +377,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
|||||||
let cbz_path = parent.join(format!("{}.cbz", stem.to_string_lossy()));
|
let cbz_path = parent.join(format!("{}.cbz", stem.to_string_lossy()));
|
||||||
let tmp_path = parent.join(format!("{}.cbz.tmp", stem.to_string_lossy()));
|
let tmp_path = parent.join(format!("{}.cbz.tmp", stem.to_string_lossy()));
|
||||||
|
|
||||||
// Refuse if target CBZ already exists
|
|
||||||
if cbz_path.exists() {
|
if cbz_path.exists() {
|
||||||
return Err(anyhow::anyhow!(
|
return Err(anyhow::anyhow!(
|
||||||
"CBZ file already exists: {}",
|
"CBZ file already exists: {}",
|
||||||
@@ -497,46 +384,45 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract CBR to a temp dir
|
// Extract all images from CBR into memory using unrar crate (no subprocess)
|
||||||
let tmp_dir =
|
let mut images: Vec<(String, Vec<u8>)> = Vec::new();
|
||||||
std::env::temp_dir().join(format!("stripstream-cbr-convert-{}", Uuid::new_v4()));
|
let mut archive = unrar::Archive::new(cbr_path)
|
||||||
std::fs::create_dir_all(&tmp_dir).context("cannot create temp dir")?;
|
.open_for_processing()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar open failed for {}: {}", cbr_path.display(), e))?;
|
||||||
|
|
||||||
let output = std::process::Command::new("env")
|
while let Some(header) = archive
|
||||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
.read_header()
|
||||||
.arg(&tmp_dir)
|
.map_err(|e| anyhow::anyhow!("unrar read header: {}", e))?
|
||||||
.arg(cbr_path)
|
{
|
||||||
.output()
|
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||||
.context("unar failed to start")?;
|
let file_name = Path::new(&entry_name)
|
||||||
|
.file_name()
|
||||||
|
.map(|n| n.to_string_lossy().to_string())
|
||||||
|
.unwrap_or_else(|| entry_name.clone());
|
||||||
|
|
||||||
if !output.status.success() {
|
if is_image_name(&entry_name.to_ascii_lowercase()) {
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
let (data, next) = header
|
||||||
return Err(anyhow::anyhow!(
|
.read()
|
||||||
"unar extraction failed: {}",
|
.map_err(|e| anyhow::anyhow!("unrar read: {}", e))?;
|
||||||
String::from_utf8_lossy(&output.stderr)
|
images.push((file_name, data));
|
||||||
));
|
archive = next;
|
||||||
|
} else {
|
||||||
|
archive = header
|
||||||
|
.skip()
|
||||||
|
.map_err(|e| anyhow::anyhow!("unrar skip: {}", e))?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect and sort image files
|
if images.is_empty() {
|
||||||
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|e| e.ok())
|
|
||||||
.filter(|e| {
|
|
||||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
|
||||||
is_image_name(&name)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
image_files.sort_by(|a, b| natord::compare(&a.path().to_string_lossy(), &b.path().to_string_lossy()));
|
|
||||||
|
|
||||||
let image_count = image_files.len();
|
|
||||||
if image_count == 0 {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
return Err(anyhow::anyhow!(
|
return Err(anyhow::anyhow!(
|
||||||
"no images found in CBR: {}",
|
"no images found in CBR: {}",
|
||||||
cbr_path.display()
|
cbr_path.display()
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
images.sort_by(|(a, _), (b, _)| natord::compare(a, b));
|
||||||
|
let image_count = images.len();
|
||||||
|
|
||||||
// Pack images into the .cbz.tmp file
|
// Pack images into the .cbz.tmp file
|
||||||
let pack_result = (|| -> Result<()> {
|
let pack_result = (|| -> Result<()> {
|
||||||
let cbz_file = std::fs::File::create(&tmp_path)
|
let cbz_file = std::fs::File::create(&tmp_path)
|
||||||
@@ -545,21 +431,16 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
|||||||
let options = zip::write::SimpleFileOptions::default()
|
let options = zip::write::SimpleFileOptions::default()
|
||||||
.compression_method(zip::CompressionMethod::Deflated);
|
.compression_method(zip::CompressionMethod::Deflated);
|
||||||
|
|
||||||
for entry in &image_files {
|
for (file_name, data) in &images {
|
||||||
let file_name = entry.file_name().to_string_lossy().to_string();
|
zip.start_file(file_name, options)
|
||||||
zip.start_file(&file_name, options)
|
|
||||||
.with_context(|| format!("cannot add file {} to zip", file_name))?;
|
.with_context(|| format!("cannot add file {} to zip", file_name))?;
|
||||||
let data = std::fs::read(entry.path())
|
zip.write_all(data)
|
||||||
.with_context(|| format!("cannot read {}", entry.path().display()))?;
|
|
||||||
zip.write_all(&data)
|
|
||||||
.with_context(|| format!("cannot write {} to zip", file_name))?;
|
.with_context(|| format!("cannot write {} to zip", file_name))?;
|
||||||
}
|
}
|
||||||
zip.finish().context("cannot finalize zip")?;
|
zip.finish().context("cannot finalize zip")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
})();
|
})();
|
||||||
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
|
|
||||||
if let Err(err) = pack_result {
|
if let Err(err) = pack_result {
|
||||||
let _ = std::fs::remove_file(&tmp_path);
|
let _ = std::fs::remove_file(&tmp_path);
|
||||||
return Err(err);
|
return Err(err);
|
||||||
@@ -593,7 +474,6 @@ pub fn convert_cbr_to_cbz(cbr_path: &Path) -> Result<PathBuf> {
|
|||||||
return Err(err);
|
return Err(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Atomic rename .cbz.tmp → .cbz
|
|
||||||
std::fs::rename(&tmp_path, &cbz_path)
|
std::fs::rename(&tmp_path, &cbz_path)
|
||||||
.with_context(|| format!("cannot rename {} to {}", tmp_path.display(), cbz_path.display()))?;
|
.with_context(|| format!("cannot rename {} to {}", tmp_path.display(), cbz_path.display()))?;
|
||||||
|
|
||||||
|
|||||||
7
infra/migrations/0018_add_extracting_pages_status.sql
Normal file
7
infra/migrations/0018_add_extracting_pages_status.sql
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
-- Migration: Add status 'extracting_pages' for the first sub-phase of thumbnail generation
|
||||||
|
-- Phase 1 (extracting_pages): extract raw first-page image from archive, store as-is
|
||||||
|
-- Phase 2 (generating_thumbnails): resize and encode as WebP
|
||||||
|
ALTER TABLE index_jobs
|
||||||
|
DROP CONSTRAINT IF EXISTS index_jobs_status_check,
|
||||||
|
ADD CONSTRAINT index_jobs_status_check
|
||||||
|
CHECK (status IN ('pending', 'running', 'extracting_pages', 'generating_thumbnails', 'success', 'failed', 'cancelled'));
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
-- Add timestamp for Phase 2b (generating_thumbnails) so we can show separate durations:
|
||||||
|
-- Phase 2a: phase2_started_at → generating_thumbnails_started_at (extracting_pages)
|
||||||
|
-- Phase 2b: generating_thumbnails_started_at → finished_at
|
||||||
|
ALTER TABLE index_jobs
|
||||||
|
ADD COLUMN IF NOT EXISTS generating_thumbnails_started_at TIMESTAMPTZ;
|
||||||
Reference in New Issue
Block a user