perf(api): remplacer unar/pdftoppm par unrar crate et pdfium-render
CBR: extract_cbr_page extrayait TOUT le CBR sur disque pour lire une seule page. Reécrit avec le crate unrar : listing en mémoire + extraction ciblée de la page demandée uniquement. Zéro subprocess, zéro temp dir. PDF: render_pdf_page utilisait pdftoppm subprocess + temp dir. Reécrit avec pdfium-render in-process. Zéro subprocess, zéro temp dir. CBZ: sort naturel (natord) pour l'ordre des pages. Dockerfile API: retire unar et poppler-utils, ajoute libpdfium.so. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -62,6 +62,8 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"image",
|
"image",
|
||||||
"lru",
|
"lru",
|
||||||
|
"natord",
|
||||||
|
"pdfium-render",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -75,10 +77,10 @@ dependencies = [
|
|||||||
"tower-http",
|
"tower-http",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"unrar",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"utoipa-swagger-ui",
|
"utoipa-swagger-ui",
|
||||||
"uuid",
|
"uuid",
|
||||||
"walkdir",
|
|
||||||
"webp",
|
"webp",
|
||||||
"zip 2.4.2",
|
"zip 2.4.2",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -28,8 +28,10 @@ tower-http = { version = "0.6", features = ["cors"] }
|
|||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
tracing-subscriber.workspace = true
|
tracing-subscriber.workspace = true
|
||||||
uuid.workspace = true
|
uuid.workspace = true
|
||||||
|
natord.workspace = true
|
||||||
|
pdfium-render.workspace = true
|
||||||
|
unrar.workspace = true
|
||||||
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
zip = { version = "2.2", default-features = false, features = ["deflate"] }
|
||||||
utoipa.workspace = true
|
utoipa.workspace = true
|
||||||
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
|
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
|
||||||
webp.workspace = true
|
webp.workspace = true
|
||||||
walkdir = "2"
|
|
||||||
|
|||||||
@@ -22,12 +22,26 @@ RUN --mount=type=cache,target=/sccache \
|
|||||||
cargo install sqlx-cli --no-default-features --features postgres --locked
|
cargo install sqlx-cli --no-default-features --features postgres --locked
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ca-certificates wget unar poppler-utils locales postgresql-client \
|
ca-certificates wget locales postgresql-client \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
||||||
ENV LANG=en_US.UTF-8
|
ENV LANG=en_US.UTF-8
|
||||||
ENV LC_ALL=en_US.UTF-8
|
ENV LC_ALL=en_US.UTF-8
|
||||||
|
|
||||||
|
# Download pdfium shared library (replaces pdftoppm subprocess)
|
||||||
|
RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
|
case "$ARCH" in \
|
||||||
|
amd64) PDFIUM_ARCH="linux-x64" ;; \
|
||||||
|
arm64) PDFIUM_ARCH="linux-arm64" ;; \
|
||||||
|
*) echo "Unsupported arch: $ARCH" && exit 1 ;; \
|
||||||
|
esac && \
|
||||||
|
wget -q "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-${PDFIUM_ARCH}.tgz" -O /tmp/pdfium.tgz && \
|
||||||
|
tar -xzf /tmp/pdfium.tgz -C /tmp && \
|
||||||
|
cp /tmp/lib/libpdfium.so /usr/local/lib/ && \
|
||||||
|
rm -rf /tmp/pdfium.tgz /tmp/lib /tmp/include && \
|
||||||
|
ldconfig
|
||||||
COPY --from=builder /app/target/release/api /usr/local/bin/api
|
COPY --from=builder /app/target/release/api /usr/local/bin/api
|
||||||
COPY --from=builder /usr/local/cargo/bin/sqlx /usr/local/bin/sqlx
|
COPY --from=builder /usr/local/cargo/bin/sqlx /usr/local/bin/sqlx
|
||||||
COPY infra/migrations /app/migrations
|
COPY infra/migrations /app/migrations
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ use sha2::{Digest, Sha256};
|
|||||||
use sqlx::Row;
|
use sqlx::Row;
|
||||||
use tracing::{debug, error, info, instrument, warn};
|
use tracing::{debug, error, info, instrument, warn};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use walkdir::WalkDir;
|
|
||||||
|
|
||||||
use crate::{error::ApiError, state::AppState};
|
use crate::{error::ApiError, state::AppState};
|
||||||
|
|
||||||
@@ -389,7 +388,7 @@ fn extract_cbz_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiErro
|
|||||||
image_names.push(entry.name().to_string());
|
image_names.push(entry.name().to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
image_names.sort();
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
|
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
|
||||||
|
|
||||||
let index = page_number as usize - 1;
|
let index = page_number as usize - 1;
|
||||||
@@ -413,107 +412,94 @@ fn extract_cbz_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiErro
|
|||||||
|
|
||||||
fn extract_cbr_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
|
fn extract_cbr_page(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
|
||||||
info!("Opening CBR archive: {}", abs_path);
|
info!("Opening CBR archive: {}", abs_path);
|
||||||
|
|
||||||
let index = page_number as usize - 1;
|
let index = page_number as usize - 1;
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-{}", Uuid::new_v4()));
|
|
||||||
debug!("Creating temp dir for CBR extraction: {}", tmp_dir.display());
|
|
||||||
|
|
||||||
std::fs::create_dir_all(&tmp_dir).map_err(|e| {
|
|
||||||
error!("Cannot create temp dir: {}", e);
|
|
||||||
ApiError::internal(format!("temp dir error: {}", e))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Extract directly - skip listing which fails on UTF-16 encoded filenames
|
// Pass 1: list all image names (in-process, no subprocess)
|
||||||
let extract_output = std::process::Command::new("env")
|
let mut image_names: Vec<String> = {
|
||||||
.args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"])
|
let archive = unrar::Archive::new(abs_path)
|
||||||
.arg(&tmp_dir)
|
.open_for_listing()
|
||||||
.arg(abs_path)
|
.map_err(|e| ApiError::internal(format!("unrar listing failed: {}", e)))?;
|
||||||
.output()
|
let mut names = Vec::new();
|
||||||
.map_err(|e| {
|
for entry in archive {
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
let entry = entry.map_err(|e| ApiError::internal(format!("unrar entry error: {}", e)))?;
|
||||||
error!("unar extract failed: {}", e);
|
let name = entry.filename.to_string_lossy().to_string();
|
||||||
ApiError::internal(format!("unar extract failed: {e}"))
|
if is_image_name(&name.to_ascii_lowercase()) {
|
||||||
})?;
|
names.push(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
names
|
||||||
|
};
|
||||||
|
|
||||||
if !extract_output.status.success() {
|
image_names.sort_by(|a, b| natord::compare(a, b));
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
let stderr = String::from_utf8_lossy(&extract_output.stderr);
|
let target = image_names
|
||||||
error!("unar extract failed {}: {}", abs_path, stderr);
|
.get(index)
|
||||||
return Err(ApiError::internal("unar extract failed"));
|
.ok_or_else(|| {
|
||||||
|
error!("Page {} out of range (total: {})", page_number, image_names.len());
|
||||||
|
ApiError::not_found("page out of range")
|
||||||
|
})?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// Pass 2: extract only the target page to memory
|
||||||
|
let mut archive = unrar::Archive::new(abs_path)
|
||||||
|
.open_for_processing()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar processing failed: {}", e)))?;
|
||||||
|
|
||||||
|
while let Some(header) = archive
|
||||||
|
.read_header()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar read header: {}", e)))?
|
||||||
|
{
|
||||||
|
let entry_name = header.entry().filename.to_string_lossy().to_string();
|
||||||
|
if entry_name == target {
|
||||||
|
let (data, _) = header
|
||||||
|
.read()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar read: {}", e)))?;
|
||||||
|
info!("Extracted CBR page {} ({} bytes)", page_number, data.len());
|
||||||
|
return Ok(data);
|
||||||
|
}
|
||||||
|
archive = header
|
||||||
|
.skip()
|
||||||
|
.map_err(|e| ApiError::internal(format!("unrar skip: {}", e)))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find and read the requested image (recursive search for CBR files with subdirectories)
|
Err(ApiError::not_found("page not found in archive"))
|
||||||
let mut image_files: Vec<_> = WalkDir::new(&tmp_dir)
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|e| e.ok())
|
|
||||||
.filter(|e| {
|
|
||||||
let name = e.file_name().to_string_lossy().to_lowercase();
|
|
||||||
is_image_name(&name)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase());
|
|
||||||
|
|
||||||
let selected = image_files.get(index).ok_or_else(|| {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("Page {} not found (total: {})", page_number, image_files.len());
|
|
||||||
ApiError::not_found("page out of range")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let data = std::fs::read(selected.path()).map_err(|e| {
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("read failed: {}", e);
|
|
||||||
ApiError::internal(format!("read error: {}", e))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
|
|
||||||
info!("Successfully extracted CBR page {} ({} bytes)", page_number, data.len());
|
|
||||||
Ok(data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
|
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
|
||||||
let tmp_dir = std::env::temp_dir().join(format!("stripstream-pdf-{}", Uuid::new_v4()));
|
use pdfium_render::prelude::*;
|
||||||
debug!("Creating temp dir for PDF rendering: {}", tmp_dir.display());
|
|
||||||
std::fs::create_dir_all(&tmp_dir).map_err(|e| {
|
|
||||||
error!("Cannot create temp dir {}: {}", tmp_dir.display(), e);
|
|
||||||
ApiError::internal(format!("cannot create temp dir: {e}"))
|
|
||||||
})?;
|
|
||||||
let output_prefix = tmp_dir.join("page");
|
|
||||||
|
|
||||||
let mut cmd = std::process::Command::new("pdftoppm");
|
debug!("Rendering PDF page {} of {} (width: {})", page_number, abs_path, width);
|
||||||
cmd.arg("-f")
|
|
||||||
.arg(page_number.to_string())
|
|
||||||
.arg("-singlefile")
|
|
||||||
.arg("-png");
|
|
||||||
if width > 0 {
|
|
||||||
cmd.arg("-scale-to-x").arg(width.to_string()).arg("-scale-to-y").arg("-1");
|
|
||||||
}
|
|
||||||
cmd.arg(abs_path).arg(&output_prefix);
|
|
||||||
|
|
||||||
debug!("Running pdftoppm for page {} of {} (width: {})", page_number, abs_path, width);
|
let pdfium = Pdfium::new(
|
||||||
let output = cmd
|
Pdfium::bind_to_system_library()
|
||||||
.output()
|
.map_err(|e| ApiError::internal(format!("pdfium not available: {:?}", e)))?,
|
||||||
.map_err(|e| {
|
);
|
||||||
error!("pdftoppm command failed for {} page {}: {}", abs_path, page_number, e);
|
|
||||||
ApiError::internal(format!("pdf render failed: {e}"))
|
|
||||||
})?;
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
|
||||||
error!("pdftoppm failed for {} page {}: {}", abs_path, page_number, stderr);
|
|
||||||
return Err(ApiError::internal("pdf render command failed"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let image_path = output_prefix.with_extension("png");
|
let document = pdfium
|
||||||
debug!("Reading rendered PDF page from: {}", image_path.display());
|
.load_pdf_from_file(abs_path, None)
|
||||||
let bytes = std::fs::read(&image_path).map_err(|e| {
|
.map_err(|e| ApiError::internal(format!("pdf load failed: {:?}", e)))?;
|
||||||
error!("Failed to read rendered PDF output {}: {}", image_path.display(), e);
|
|
||||||
ApiError::internal(format!("render output missing: {e}"))
|
let page_index = (page_number - 1) as u16;
|
||||||
})?;
|
let page = document
|
||||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
.pages()
|
||||||
debug!("Successfully rendered PDF page {} to {} bytes", page_number, bytes.len());
|
.get(page_index)
|
||||||
Ok(bytes)
|
.map_err(|_| ApiError::not_found("page out of range"))?;
|
||||||
|
|
||||||
|
let render_width = if width > 0 { width as i32 } else { 1200 };
|
||||||
|
let config = PdfRenderConfig::new().set_target_width(render_width);
|
||||||
|
|
||||||
|
let bitmap = page
|
||||||
|
.render_with_config(&config)
|
||||||
|
.map_err(|e| ApiError::internal(format!("pdf render failed: {:?}", e)))?;
|
||||||
|
|
||||||
|
let image = bitmap.as_image();
|
||||||
|
let mut buf = std::io::Cursor::new(Vec::new());
|
||||||
|
image
|
||||||
|
.write_to(&mut buf, image::ImageFormat::Png)
|
||||||
|
.map_err(|e| ApiError::internal(format!("png encode failed: {}", e)))?;
|
||||||
|
|
||||||
|
debug!("Rendered PDF page {} ({} bytes)", page_number, buf.get_ref().len());
|
||||||
|
Ok(buf.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
|
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
|
||||||
|
|||||||
Reference in New Issue
Block a user