diff --git a/Cargo.lock b/Cargo.lock index 309023f..6d17fb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,7 @@ dependencies = [ "utoipa", "utoipa-swagger-ui", "uuid", + "walkdir", "webp", "zip 2.4.2", ] diff --git a/apps/api/Cargo.toml b/apps/api/Cargo.toml index 60bd46a..c49f1e6 100644 --- a/apps/api/Cargo.toml +++ b/apps/api/Cargo.toml @@ -32,3 +32,4 @@ zip = { version = "2.2", default-features = false, features = ["deflate"] } utoipa.workspace = true utoipa-swagger-ui = { workspace = true, features = ["axum"] } webp = "0.3" +walkdir = "2" diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index a3dfcbf..7ff27c6 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -21,7 +21,10 @@ RUN --mount=type=cache,target=/sccache \ cargo build --release -p api FROM debian:bookworm-slim -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates wget unrar-free poppler-utils && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates wget unar poppler-utils locales && rm -rf /var/lib/apt/lists/* +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen +ENV LANG=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 COPY --from=builder /app/target/release/api /usr/local/bin/api EXPOSE 8080 CMD ["/usr/local/bin/api"] diff --git a/apps/api/src/error.rs b/apps/api/src/error.rs index 6b3f072..b60204d 100644 --- a/apps/api/src/error.rs +++ b/apps/api/src/error.rs @@ -1,4 +1,8 @@ -use axum::{http::StatusCode, response::{IntoResponse, Response}, Json}; +use axum::{ + http::StatusCode, + response::{IntoResponse, Response}, + Json, +}; use serde::Serialize; #[derive(Debug)] @@ -51,7 +55,13 @@ impl ApiError { impl IntoResponse for ApiError { fn into_response(self) -> Response { - (self.status, Json(ErrorBody { error: &self.message })).into_response() + ( + self.status, + Json(ErrorBody { + error: &self.message, + }), + ) + .into_response() } } @@ -60,3 +70,9 @@ impl From for ApiError { Self::internal(format!("database error: {err}")) } } + +impl From for ApiError { + fn from(err: std::io::Error) -> Self { + Self::internal(format!("IO error: {err}")) + } +} diff --git a/apps/api/src/pages.rs b/apps/api/src/pages.rs index 9edab2e..f50a935 100644 --- a/apps/api/src/pages.rs +++ b/apps/api/src/pages.rs @@ -18,6 +18,7 @@ use sha2::{Digest, Sha256}; use sqlx::Row; use tracing::{debug, error, info, instrument, warn}; use uuid::Uuid; +use walkdir::WalkDir; use crate::{error::ApiError, AppState}; @@ -220,7 +221,7 @@ pub async fn get_page( let start_time = std::time::Instant::now(); let bytes = tokio::time::timeout( - Duration::from_secs(30), + Duration::from_secs(60), tokio::task::spawn_blocking(move || { render_page(&abs_path_clone, &input_format, n, &format_clone, quality, width) }), @@ -342,93 +343,64 @@ fn extract_cbz_page(abs_path: &str, page_number: u32) -> Result, ApiErro } fn extract_cbr_page(abs_path: &str, page_number: u32) -> Result, ApiError> { - debug!("Listing CBR archive: {}", abs_path); - let list_output = std::process::Command::new("unrar") - .arg("lb") + info!("Opening CBR archive: {}", abs_path); + + let index = page_number as usize - 1; + let tmp_dir = std::env::temp_dir().join(format!("stripstream-cbr-{}", Uuid::new_v4())); + debug!("Creating temp dir for CBR extraction: {}", tmp_dir.display()); + + std::fs::create_dir_all(&tmp_dir).map_err(|e| { + error!("Cannot create temp dir: {}", e); + ApiError::internal(format!("temp dir error: {}", e)) + })?; + + // Extract directly - skip listing which fails on UTF-16 encoded filenames + let extract_output = std::process::Command::new("env") + .args(["LC_ALL=en_US.UTF-8", "LANG=en_US.UTF-8", "unar", "-o"]) + .arg(&tmp_dir) .arg(abs_path) .output() .map_err(|e| { - error!("unrar list command failed for {}: {}", abs_path, e); - ApiError::internal(format!("unrar list failed: {e}")) + let _ = std::fs::remove_dir_all(&tmp_dir); + error!("unar extract failed: {}", e); + ApiError::internal(format!("unar extract failed: {e}")) })?; - if !list_output.status.success() { - let stderr = String::from_utf8_lossy(&list_output.stderr); - error!("unrar could not list archive {}: {}", abs_path, stderr); - return Err(ApiError::internal("unrar could not list archive")); + + if !extract_output.status.success() { + let _ = std::fs::remove_dir_all(&tmp_dir); + let stderr = String::from_utf8_lossy(&extract_output.stderr); + error!("unar extract failed {}: {}", abs_path, stderr); + return Err(ApiError::internal("unar extract failed")); } - let mut entries: Vec = String::from_utf8_lossy(&list_output.stdout) - .lines() - .filter(|line| is_image_name(&line.to_ascii_lowercase())) - .map(|s| s.to_string()) + // Find and read the requested image (recursive search for CBR files with subdirectories) + let mut image_files: Vec<_> = WalkDir::new(&tmp_dir) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| { + let name = e.file_name().to_string_lossy().to_lowercase(); + is_image_name(&name) + }) .collect(); - entries.sort(); - debug!("First 10 entries in CBR {}: {:?}", abs_path, &entries[..entries.len().min(10)]); - debug!("Found {} images in CBR {}", entries.len(), abs_path); + image_files.sort_by_key(|e| e.path().to_string_lossy().to_lowercase()); - // Try multiple entries starting from the requested page number - // Some archives have non-image files (readme.txt, etc.) at the start - let mut tried_entries: Vec = Vec::new(); - let start_index = page_number.saturating_sub(1) as usize; + let selected = image_files.get(index).ok_or_else(|| { + let _ = std::fs::remove_dir_all(&tmp_dir); + error!("Page {} not found (total: {})", page_number, image_files.len()); + ApiError::not_found("page out of range") + })?; + + let data = std::fs::read(selected.path()).map_err(|e| { + let _ = std::fs::remove_dir_all(&tmp_dir); + error!("read failed: {}", e); + ApiError::internal(format!("read error: {}", e)) + })?; + + let _ = std::fs::remove_dir_all(&tmp_dir); - for offset in 0..10 { - let idx = start_index + offset; - if idx >= entries.len() { - break; - } - - let selected = &entries[idx]; - tried_entries.push(selected.clone()); - - debug!("Trying to extract entry {} ({}) from CBR {}", idx, selected, abs_path); - - let page_output = std::process::Command::new("unrar") - .arg("p") - .arg("-inul") - .arg("-y") - .arg(abs_path) - .arg(selected) - .output() - .map_err(|e| { - error!("unrar extract command failed for {} page {}: {}", abs_path, selected, e); - ApiError::internal(format!("unrar extract failed: {e}")) - })?; - - if !page_output.status.success() { - let stderr = String::from_utf8_lossy(&page_output.stderr); - warn!("unrar could not extract page {} from {}: {}", selected, abs_path, stderr); - continue; - } - - let extracted_data = &page_output.stdout; - - // Validate it's actually an image - if extracted_data.len() < 1000 { - warn!("Entry {} ({}) too small ({} bytes), trying next", idx, selected, extracted_data.len()); - continue; - } - - let is_valid_image = extracted_data.len() > 4 && ( - extracted_data.starts_with(&[0xFF, 0xD8, 0xFF]) || // JPEG - extracted_data.starts_with(&[0x89, 0x50, 0x4E, 0x47]) || // PNG - extracted_data.starts_with(b"GIF87a") || extracted_data.starts_with(b"GIF89a") || // GIF - (extracted_data.starts_with(b"RIFF") && extracted_data.len() > 12 && &extracted_data[8..12] == b"WEBP") || // WebP - extracted_data.starts_with(&[0x49, 0x49, 0x2A, 0x00]) || extracted_data.starts_with(&[0x4D, 0x4D, 0x00, 0x2A]) // TIFF - ); - - if !is_valid_image { - warn!("Entry {} ({}) is not a valid image, trying next", idx, selected); - continue; - } - - // Found valid image! - info!("Successfully extracted valid image from CBR entry {} ({}) at offset {}", idx, selected, offset); - return Ok(extracted_data.to_vec()); - } - - error!("Could not find valid image in CBR {} after trying: {:?}", abs_path, tried_entries); - Err(ApiError::not_found("no valid image found in archive after trying multiple entries")) + info!("Successfully extracted CBR page {} ({} bytes)", page_number, data.len()); + Ok(data) } fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result, ApiError> { @@ -539,11 +511,16 @@ fn format_matches(source: &ImageFormat, target: &OutputFormat) -> bool { } fn is_image_name(name: &str) -> bool { - name.ends_with(".jpg") - || name.ends_with(".jpeg") - || name.ends_with(".png") - || name.ends_with(".webp") - || name.ends_with(".avif") + let lower = name.to_lowercase(); + lower.ends_with(".jpg") + || lower.ends_with(".jpeg") + || lower.ends_with(".png") + || lower.ends_with(".webp") + || lower.ends_with(".avif") + || lower.ends_with(".gif") + || lower.ends_with(".tif") + || lower.ends_with(".tiff") + || lower.ends_with(".bmp") } #[allow(dead_code)] diff --git a/apps/indexer/src/main.rs b/apps/indexer/src/main.rs index 31fdd37..3bcfca8 100644 --- a/apps/indexer/src/main.rs +++ b/apps/indexer/src/main.rs @@ -186,7 +186,6 @@ async fn run_file_watcher(state: AppState) -> anyhow::Result<()> { let pool = state.pool.clone(); tokio::spawn(async move { - let mut watcher: Option = None; let mut watched_libraries: HashMap = HashMap::new(); loop { @@ -217,17 +216,12 @@ async fn run_file_watcher(state: AppState) -> anyhow::Result<()> { if needs_restart { info!("[WATCHER] Restarting watcher for {} libraries", current_libraries.len()); - // Drop old watcher - watcher = None; - watched_libraries.clear(); - if !current_libraries.is_empty() { let tx_clone = tx.clone(); let libraries_clone = current_libraries.clone(); match setup_watcher(libraries_clone, tx_clone) { - Ok(new_watcher) => { - watcher = Some(new_watcher); + Ok(_new_watcher) => { watched_libraries = current_libraries; info!("[WATCHER] Watching {} libraries", watched_libraries.len()); }