refactor: update AppState references to use state module

- Change all instances of AppState to reference the new state module across multiple files for consistency. - Clean up imports in auth, books, index_jobs, libraries, pages, search, settings, thumbnails, and tokens modules. - Simplify main.rs by removing unused code and organizing middleware and route handlers under the new handlers module.
2026-03-08 21:19:22 +01:00
parent 539dc77d57
commit 6d4c400017
25 changed files with 1590 additions and 1460 deletions
--- a/apps/indexer/src/scanner.rs
+++ b/apps/indexer/src/scanner.rs
@@ -0,0 +1,360 @@
+use anyhow::{Context, Result};
+use chrono::{DateTime, Utc};
+use parsers::{detect_format, parse_metadata, BookFormat, ParsedMetadata};
+use rayon::prelude::*;
+use serde::Serialize;
+use sqlx::Row;
+use std::{collections::HashMap, path::Path, time::Duration};
+use tracing::{error, info, trace, warn};
+use uuid::Uuid;
+use walkdir::WalkDir;
+
+use crate::{
+    batch::{flush_all_batches, BookInsert, BookUpdate, ErrorInsert, FileInsert, FileUpdate},
+    job::is_job_cancelled,
+    utils,
+    AppState,
+};
+
+#[derive(Serialize)]
+pub struct JobStats {
+    pub scanned_files: usize,
+    pub indexed_files: usize,
+    pub removed_files: usize,
+    pub errors: usize,
+}
+
+const BATCH_SIZE: usize = 100;
+
+pub async fn scan_library(
+    state: &AppState,
+    job_id: Uuid,
+    library_id: Uuid,
+    root: &Path,
+    stats: &mut JobStats,
+    total_processed_count: &mut i32,
+    total_files: usize,
+    is_full_rebuild: bool,
+) -> Result<()> {
+    info!("[SCAN] Starting scan of library {} at path: {} (full_rebuild={})", library_id, root.display(), is_full_rebuild);
+    
+    let existing_rows = sqlx::query(
+        r#"
+        SELECT bf.id AS file_id, bf.book_id, bf.abs_path, bf.fingerprint
+        FROM book_files bf
+        JOIN books b ON b.id = bf.book_id
+        WHERE b.library_id = $1
+        "#,
+    )
+    .bind(library_id)
+    .fetch_all(&state.pool)
+    .await?;
+
+    let mut existing: HashMap<String, (Uuid, Uuid, String)> = HashMap::new();
+    if !is_full_rebuild {
+        for row in existing_rows {
+            let abs_path: String = row.get("abs_path");
+            let remapped_path = utils::remap_libraries_path(&abs_path);
+            existing.insert(
+                remapped_path,
+                (row.get("file_id"), row.get("book_id"), row.get("fingerprint")),
+            );
+        }
+        info!("[SCAN] Found {} existing files in database for library {}", existing.len(), library_id);
+    } else {
+        info!("[SCAN] Full rebuild: skipping existing files lookup (all will be treated as new)");
+    }
+
+    let mut seen: HashMap<String, bool> = HashMap::new();
+    let mut library_processed_count = 0i32;
+    let mut last_progress_update = std::time::Instant::now();
+    
+    // Batching buffers
+    let mut books_to_update: Vec<BookUpdate> = Vec::with_capacity(BATCH_SIZE);
+    let mut files_to_update: Vec<FileUpdate> = Vec::with_capacity(BATCH_SIZE);
+    let mut books_to_insert: Vec<BookInsert> = Vec::with_capacity(BATCH_SIZE);
+    let mut files_to_insert: Vec<FileInsert> = Vec::with_capacity(BATCH_SIZE);
+    let mut errors_to_insert: Vec<ErrorInsert> = Vec::with_capacity(BATCH_SIZE);
+
+    // Step 1: Collect all book files first
+    #[derive(Clone)]
+    struct FileInfo {
+        path: std::path::PathBuf,
+        format: BookFormat,
+        abs_path: String,
+        file_name: String,
+        metadata: std::fs::Metadata,
+        mtime: DateTime<Utc>,
+        fingerprint: String,
+        lookup_path: String,
+    }
+
+    let mut file_infos: Vec<FileInfo> = Vec::new();
+    for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
+        if !entry.file_type().is_file() {
+            continue;
+        }
+
+        let path = entry.path().to_path_buf();
+        let Some(format) = detect_format(&path) else {
+            trace!("[SCAN] Skipping non-book file: {}", path.display());
+            continue;
+        };
+
+        info!("[SCAN] Found book file: {} (format: {:?})", path.display(), format);
+        stats.scanned_files += 1;
+        
+        let abs_path_local = path.to_string_lossy().to_string();
+        let abs_path = utils::unmap_libraries_path(&abs_path_local);
+        let file_name = path.file_name()
+            .map(|s| s.to_string_lossy().to_string())
+            .unwrap_or_else(|| abs_path.clone());
+
+        let metadata = std::fs::metadata(&path)
+            .with_context(|| format!("cannot stat {}", path.display()))?;
+        let mtime: DateTime<Utc> = metadata
+            .modified()
+            .map(DateTime::<Utc>::from)
+            .unwrap_or_else(|_| Utc::now());
+        let fingerprint = utils::compute_fingerprint(&path, metadata.len(), &mtime)?;
+        let lookup_path = utils::remap_libraries_path(&abs_path);
+
+        file_infos.push(FileInfo {
+            path,
+            format,
+            abs_path,
+            file_name,
+            metadata,
+            mtime,
+            fingerprint,
+            lookup_path,
+        });
+    }
+
+    info!("[SCAN] Collected {} files, starting parallel parsing", file_infos.len());
+
+    // Step 2: Parse metadata in parallel
+    let parsed_results: Vec<(FileInfo, Result<ParsedMetadata>)> = file_infos
+        .into_par_iter()
+        .map(|file_info| {
+            let parse_result = parse_metadata(&file_info.path, file_info.format, root);
+            (file_info, parse_result)
+        })
+        .collect();
+
+    info!("[SCAN] Completed parallel parsing, processing {} results", parsed_results.len());
+
+    // Step 3: Process results sequentially for batch inserts
+    for (file_info, parse_result) in parsed_results {
+        library_processed_count += 1;
+        *total_processed_count += 1;
+
+        // Update progress in DB every 1 second or every 10 files
+        let should_update_progress = last_progress_update.elapsed() > Duration::from_secs(1) || library_processed_count % 10 == 0;
+        if should_update_progress {
+            let progress_percent = if total_files > 0 {
+                ((*total_processed_count as f64 / total_files as f64) * 100.0) as i32
+            } else {
+                0
+            };
+
+            sqlx::query(
+                "UPDATE index_jobs SET current_file = $2, processed_files = $3, progress_percent = $4 WHERE id = $1"
+            )
+            .bind(job_id)
+            .bind(&file_info.file_name)
+            .bind(*total_processed_count)
+            .bind(progress_percent)
+            .execute(&state.pool)
+            .await
+            .map_err(|e| {
+                error!("[BDD] Failed to update progress for job {}: {}", job_id, e);
+                e
+            })?;
+            
+            last_progress_update = std::time::Instant::now();
+            
+            // Check if job has been cancelled
+            if is_job_cancelled(&state.pool, job_id).await? {
+                info!("[JOB] Job {} cancelled by user, stopping...", job_id);
+                // Flush any pending batches before exiting
+                flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
+                return Err(anyhow::anyhow!("Job cancelled by user"));
+            }
+        }
+
+        let seen_key = utils::remap_libraries_path(&file_info.abs_path);
+        seen.insert(seen_key.clone(), true);
+
+        if let Some((file_id, book_id, old_fingerprint)) = existing.get(&file_info.lookup_path).cloned() {
+            if !is_full_rebuild && old_fingerprint == file_info.fingerprint {
+                trace!("[PROCESS] Skipping unchanged file: {}", file_info.file_name);
+                continue;
+            }
+
+            info!("[PROCESS] Updating existing file: {} (full_rebuild={}, fingerprint_match={})", file_info.file_name, is_full_rebuild, old_fingerprint == file_info.fingerprint);
+
+            match parse_result {
+                Ok(parsed) => {
+                    books_to_update.push(BookUpdate {
+                        book_id,
+                        title: parsed.title,
+                        kind: utils::kind_from_format(file_info.format).to_string(),
+                        series: parsed.series,
+                        volume: parsed.volume,
+                        page_count: parsed.page_count,
+                    });
+
+                    files_to_update.push(FileUpdate {
+                        file_id,
+                        format: file_info.format.as_str().to_string(),
+                        size_bytes: file_info.metadata.len() as i64,
+                        mtime: file_info.mtime,
+                        fingerprint: file_info.fingerprint,
+                    });
+
+                    stats.indexed_files += 1;
+                }
+                Err(err) => {
+                    warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
+                    stats.errors += 1;
+                    
+                    files_to_update.push(FileUpdate {
+                        file_id,
+                        format: file_info.format.as_str().to_string(),
+                        size_bytes: file_info.metadata.len() as i64,
+                        mtime: file_info.mtime,
+                        fingerprint: file_info.fingerprint.clone(),
+                    });
+                    
+                    errors_to_insert.push(ErrorInsert {
+                        job_id,
+                        file_path: file_info.abs_path.clone(),
+                        error_message: err.to_string(),
+                    });
+                    
+                    // Also need to mark file as error - we'll do this separately
+                    sqlx::query(
+                        "UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE id = $1"
+                    )
+                    .bind(file_id)
+                    .bind(err.to_string())
+                    .execute(&state.pool)
+                    .await?;
+                }
+            }
+
+            // Flush if batch is full
+            if books_to_update.len() >= BATCH_SIZE || files_to_update.len() >= BATCH_SIZE {
+                flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
+            }
+            
+            continue;
+        }
+
+        // New file (thumbnails generated by API after job handoff)
+        info!("[PROCESS] Inserting new file: {}", file_info.file_name);
+        let book_id = Uuid::new_v4();
+
+        match parse_result {
+            Ok(parsed) => {
+                let file_id = Uuid::new_v4();
+
+                books_to_insert.push(BookInsert {
+                    book_id,
+                    library_id,
+                    kind: utils::kind_from_format(file_info.format).to_string(),
+                    title: parsed.title,
+                    series: parsed.series,
+                    volume: parsed.volume,
+                    page_count: parsed.page_count,
+                    thumbnail_path: None,
+                });
+
+                files_to_insert.push(FileInsert {
+                    file_id,
+                    book_id,
+                    format: file_info.format.as_str().to_string(),
+                    abs_path: file_info.abs_path.clone(),
+                    size_bytes: file_info.metadata.len() as i64,
+                    mtime: file_info.mtime,
+                    fingerprint: file_info.fingerprint,
+                    parse_status: "ok".to_string(),
+                    parse_error: None,
+                });
+
+                stats.indexed_files += 1;
+            }
+            Err(err) => {
+                warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
+                stats.errors += 1;
+                let book_id = Uuid::new_v4();
+                let file_id = Uuid::new_v4();
+                
+                books_to_insert.push(BookInsert {
+                    book_id,
+                    library_id,
+                    kind: utils::kind_from_format(file_info.format).to_string(),
+                    title: utils::file_display_name(&file_info.path),
+                    series: None,
+                    volume: None,
+                    page_count: None,
+                    thumbnail_path: None,
+                });
+
+                files_to_insert.push(FileInsert {
+                    file_id,
+                    book_id,
+                    format: file_info.format.as_str().to_string(),
+                    abs_path: file_info.abs_path.clone(),
+                    size_bytes: file_info.metadata.len() as i64,
+                    mtime: file_info.mtime,
+                    fingerprint: file_info.fingerprint,
+                    parse_status: "error".to_string(),
+                    parse_error: Some(err.to_string()),
+                });
+
+                errors_to_insert.push(ErrorInsert {
+                    job_id,
+                    file_path: file_info.abs_path,
+                    error_message: err.to_string(),
+                });
+            }
+        }
+
+        // Flush if batch is full
+        if books_to_insert.len() >= BATCH_SIZE || files_to_insert.len() >= BATCH_SIZE {
+            flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
+        }
+    }
+
+    // Final flush of any remaining items
+    flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
+
+    info!("[SCAN] Library {} scan complete: {} files scanned, {} indexed, {} errors", 
+          library_id, library_processed_count, stats.indexed_files, stats.errors);
+
+    // Handle deletions
+    let mut removed_count = 0usize;
+    for (abs_path, (file_id, book_id, _)) in existing {
+        if seen.contains_key(&abs_path) {
+            continue;
+        }
+        sqlx::query("DELETE FROM book_files WHERE id = $1")
+            .bind(file_id)
+            .execute(&state.pool)
+            .await?;
+        sqlx::query("DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)")
+            .bind(book_id)
+            .execute(&state.pool)
+            .await?;
+        stats.removed_files += 1;
+        removed_count += 1;
+    }
+    
+    if removed_count > 0 {
+        info!("[SCAN] Removed {} stale files from database", removed_count);
+    }
+
+    Ok(())
+}