refactor: update AppState references to use state module
- Change all instances of AppState to reference the new state module across multiple files for consistency. - Clean up imports in auth, books, index_jobs, libraries, pages, search, settings, thumbnails, and tokens modules. - Simplify main.rs by removing unused code and organizing middleware and route handlers under the new handlers module.
This commit is contained in:
@@ -4,6 +4,8 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
axum.workspace = true
|
||||
|
||||
16
apps/indexer/src/api.rs
Normal file
16
apps/indexer/src/api.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
use axum::{extract::State, http::StatusCode, Json};
|
||||
use serde_json;
|
||||
|
||||
use crate::AppState;
|
||||
|
||||
pub async fn health() -> &'static str {
|
||||
"ok"
|
||||
}
|
||||
|
||||
pub async fn ready(State(state): State<AppState>) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
sqlx::query("SELECT 1")
|
||||
.execute(&state.pool)
|
||||
.await
|
||||
.map_err(|_| StatusCode::SERVICE_UNAVAILABLE)?;
|
||||
Ok(Json(serde_json::json!({"status": "ready"})))
|
||||
}
|
||||
233
apps/indexer/src/batch.rs
Normal file
233
apps/indexer/src/batch.rs
Normal file
@@ -0,0 +1,233 @@
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
// Batched update data structures
|
||||
pub struct BookUpdate {
|
||||
pub book_id: Uuid,
|
||||
pub title: String,
|
||||
pub kind: String,
|
||||
pub series: Option<String>,
|
||||
pub volume: Option<i32>,
|
||||
pub page_count: Option<i32>,
|
||||
}
|
||||
|
||||
pub struct FileUpdate {
|
||||
pub file_id: Uuid,
|
||||
pub format: String,
|
||||
pub size_bytes: i64,
|
||||
pub mtime: DateTime<Utc>,
|
||||
pub fingerprint: String,
|
||||
}
|
||||
|
||||
pub struct BookInsert {
|
||||
pub book_id: Uuid,
|
||||
pub library_id: Uuid,
|
||||
pub kind: String,
|
||||
pub title: String,
|
||||
pub series: Option<String>,
|
||||
pub volume: Option<i32>,
|
||||
pub page_count: Option<i32>,
|
||||
pub thumbnail_path: Option<String>,
|
||||
}
|
||||
|
||||
pub struct FileInsert {
|
||||
pub file_id: Uuid,
|
||||
pub book_id: Uuid,
|
||||
pub format: String,
|
||||
pub abs_path: String,
|
||||
pub size_bytes: i64,
|
||||
pub mtime: DateTime<Utc>,
|
||||
pub fingerprint: String,
|
||||
pub parse_status: String,
|
||||
pub parse_error: Option<String>,
|
||||
}
|
||||
|
||||
pub struct ErrorInsert {
|
||||
pub job_id: Uuid,
|
||||
pub file_path: String,
|
||||
pub error_message: String,
|
||||
}
|
||||
|
||||
pub async fn flush_all_batches(
|
||||
pool: &PgPool,
|
||||
books_update: &mut Vec<BookUpdate>,
|
||||
files_update: &mut Vec<FileUpdate>,
|
||||
books_insert: &mut Vec<BookInsert>,
|
||||
files_insert: &mut Vec<FileInsert>,
|
||||
errors_insert: &mut Vec<ErrorInsert>,
|
||||
) -> Result<()> {
|
||||
if books_update.is_empty() && files_update.is_empty() && books_insert.is_empty() && files_insert.is_empty() && errors_insert.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
// Batch update books using UNNEST
|
||||
if !books_update.is_empty() {
|
||||
let book_ids: Vec<Uuid> = books_update.iter().map(|b| b.book_id).collect();
|
||||
let titles: Vec<String> = books_update.iter().map(|b| b.title.clone()).collect();
|
||||
let kinds: Vec<String> = books_update.iter().map(|b| b.kind.clone()).collect();
|
||||
let series: Vec<Option<String>> = books_update.iter().map(|b| b.series.clone()).collect();
|
||||
let volumes: Vec<Option<i32>> = books_update.iter().map(|b| b.volume).collect();
|
||||
let page_counts: Vec<Option<i32>> = books_update.iter().map(|b| b.page_count).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE books SET
|
||||
title = data.title,
|
||||
kind = data.kind,
|
||||
series = data.series,
|
||||
volume = data.volume,
|
||||
page_count = data.page_count,
|
||||
updated_at = NOW()
|
||||
FROM (
|
||||
SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[], $4::text[], $5::int[], $6::int[])
|
||||
AS t(book_id, title, kind, series, volume, page_count)
|
||||
) AS data
|
||||
WHERE books.id = data.book_id
|
||||
"#
|
||||
)
|
||||
.bind(&book_ids)
|
||||
.bind(&titles)
|
||||
.bind(&kinds)
|
||||
.bind(&series)
|
||||
.bind(&volumes)
|
||||
.bind(&page_counts)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
books_update.clear();
|
||||
}
|
||||
|
||||
// Batch update files using UNNEST
|
||||
if !files_update.is_empty() {
|
||||
let file_ids: Vec<Uuid> = files_update.iter().map(|f| f.file_id).collect();
|
||||
let formats: Vec<String> = files_update.iter().map(|f| f.format.clone()).collect();
|
||||
let sizes: Vec<i64> = files_update.iter().map(|f| f.size_bytes).collect();
|
||||
let mtimes: Vec<DateTime<Utc>> = files_update.iter().map(|f| f.mtime).collect();
|
||||
let fingerprints: Vec<String> = files_update.iter().map(|f| f.fingerprint.clone()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE book_files SET
|
||||
format = data.format,
|
||||
size_bytes = data.size,
|
||||
mtime = data.mtime,
|
||||
fingerprint = data.fp,
|
||||
parse_status = 'ok',
|
||||
parse_error_opt = NULL,
|
||||
updated_at = NOW()
|
||||
FROM (
|
||||
SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::bigint[], $4::timestamptz[], $5::text[])
|
||||
AS t(file_id, format, size, mtime, fp)
|
||||
) AS data
|
||||
WHERE book_files.id = data.file_id
|
||||
"#
|
||||
)
|
||||
.bind(&file_ids)
|
||||
.bind(&formats)
|
||||
.bind(&sizes)
|
||||
.bind(&mtimes)
|
||||
.bind(&fingerprints)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
files_update.clear();
|
||||
}
|
||||
|
||||
// Batch insert books using UNNEST
|
||||
if !books_insert.is_empty() {
|
||||
let book_ids: Vec<Uuid> = books_insert.iter().map(|b| b.book_id).collect();
|
||||
let library_ids: Vec<Uuid> = books_insert.iter().map(|b| b.library_id).collect();
|
||||
let kinds: Vec<String> = books_insert.iter().map(|b| b.kind.clone()).collect();
|
||||
let titles: Vec<String> = books_insert.iter().map(|b| b.title.clone()).collect();
|
||||
let series: Vec<Option<String>> = books_insert.iter().map(|b| b.series.clone()).collect();
|
||||
let volumes: Vec<Option<i32>> = books_insert.iter().map(|b| b.volume).collect();
|
||||
let page_counts: Vec<Option<i32>> = books_insert.iter().map(|b| b.page_count).collect();
|
||||
let thumbnail_paths: Vec<Option<String>> = books_insert.iter().map(|b| b.thumbnail_path.clone()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO books (id, library_id, kind, title, series, volume, page_count, thumbnail_path)
|
||||
SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::text[], $6::int[], $7::int[], $8::text[])
|
||||
AS t(id, library_id, kind, title, series, volume, page_count, thumbnail_path)
|
||||
"#
|
||||
)
|
||||
.bind(&book_ids)
|
||||
.bind(&library_ids)
|
||||
.bind(&kinds)
|
||||
.bind(&titles)
|
||||
.bind(&series)
|
||||
.bind(&volumes)
|
||||
.bind(&page_counts)
|
||||
.bind(&thumbnail_paths)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
books_insert.clear();
|
||||
}
|
||||
|
||||
// Batch insert files using UNNEST
|
||||
if !files_insert.is_empty() {
|
||||
let file_ids: Vec<Uuid> = files_insert.iter().map(|f| f.file_id).collect();
|
||||
let book_ids: Vec<Uuid> = files_insert.iter().map(|f| f.book_id).collect();
|
||||
let formats: Vec<String> = files_insert.iter().map(|f| f.format.clone()).collect();
|
||||
let abs_paths: Vec<String> = files_insert.iter().map(|f| f.abs_path.clone()).collect();
|
||||
let sizes: Vec<i64> = files_insert.iter().map(|f| f.size_bytes).collect();
|
||||
let mtimes: Vec<DateTime<Utc>> = files_insert.iter().map(|f| f.mtime).collect();
|
||||
let fingerprints: Vec<String> = files_insert.iter().map(|f| f.fingerprint.clone()).collect();
|
||||
let statuses: Vec<String> = files_insert.iter().map(|f| f.parse_status.clone()).collect();
|
||||
let errors: Vec<Option<String>> = files_insert.iter().map(|f| f.parse_error.clone()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status, parse_error_opt)
|
||||
SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::bigint[], $6::timestamptz[], $7::text[], $8::text[], $9::text[])
|
||||
AS t(id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status, parse_error_opt)
|
||||
"#
|
||||
)
|
||||
.bind(&file_ids)
|
||||
.bind(&book_ids)
|
||||
.bind(&formats)
|
||||
.bind(&abs_paths)
|
||||
.bind(&sizes)
|
||||
.bind(&mtimes)
|
||||
.bind(&fingerprints)
|
||||
.bind(&statuses)
|
||||
.bind(&errors)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
files_insert.clear();
|
||||
}
|
||||
|
||||
// Batch insert errors using UNNEST
|
||||
if !errors_insert.is_empty() {
|
||||
let job_ids: Vec<Uuid> = errors_insert.iter().map(|e| e.job_id).collect();
|
||||
let file_paths: Vec<String> = errors_insert.iter().map(|e| e.file_path.clone()).collect();
|
||||
let messages: Vec<String> = errors_insert.iter().map(|e| e.error_message.clone()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO index_job_errors (job_id, file_path, error_message)
|
||||
SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[])
|
||||
AS t(job_id, file_path, error_message)
|
||||
"#
|
||||
)
|
||||
.bind(&job_ids)
|
||||
.bind(&file_paths)
|
||||
.bind(&messages)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
errors_insert.clear();
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
tracing::info!("[BATCH] Flushed all batches in {:?}", start.elapsed());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
293
apps/indexer/src/job.rs
Normal file
293
apps/indexer/src/job.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
use anyhow::Result;
|
||||
use rayon::prelude::*;
|
||||
use sqlx::{PgPool, Row};
|
||||
use std::time::Duration;
|
||||
use tracing::{error, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{meili, scanner, AppState};
|
||||
|
||||
pub async fn cleanup_stale_jobs(pool: &PgPool) -> Result<()> {
|
||||
// Mark jobs that have been running for more than 5 minutes as failed
|
||||
// This handles cases where the indexer was restarted while jobs were running
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
UPDATE index_jobs
|
||||
SET status = 'failed',
|
||||
finished_at = NOW(),
|
||||
error_opt = 'Job interrupted by indexer restart'
|
||||
WHERE status = 'running'
|
||||
AND started_at < NOW() - INTERVAL '5 minutes'
|
||||
RETURNING id
|
||||
"#
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if !result.is_empty() {
|
||||
let count = result.len();
|
||||
let ids: Vec<String> = result.iter()
|
||||
.map(|row| row.get::<Uuid, _>("id").to_string())
|
||||
.collect();
|
||||
info!("[CLEANUP] Marked {} stale job(s) as failed: {}", count, ids.join(", "));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn claim_next_job(pool: &PgPool) -> Result<Option<(Uuid, Option<Uuid>)>> {
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
// Atomically select and lock the next job
|
||||
// Exclude rebuild/full_rebuild if one is already running
|
||||
// Prioritize: full_rebuild > rebuild > others
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT j.id, j.type, j.library_id
|
||||
FROM index_jobs j
|
||||
WHERE j.status = 'pending'
|
||||
AND (
|
||||
-- Allow rebuilds only if no rebuild is running
|
||||
(j.type IN ('rebuild', 'full_rebuild') AND NOT EXISTS (
|
||||
SELECT 1 FROM index_jobs
|
||||
WHERE status = 'running'
|
||||
AND type IN ('rebuild', 'full_rebuild')
|
||||
))
|
||||
OR
|
||||
-- Always allow non-rebuild jobs
|
||||
j.type NOT IN ('rebuild', 'full_rebuild')
|
||||
)
|
||||
ORDER BY
|
||||
CASE j.type
|
||||
WHEN 'full_rebuild' THEN 1
|
||||
WHEN 'rebuild' THEN 2
|
||||
ELSE 3
|
||||
END,
|
||||
j.created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
"#
|
||||
)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
let Some(row) = row else {
|
||||
tx.commit().await?;
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let id: Uuid = row.get("id");
|
||||
let job_type: String = row.get("type");
|
||||
let library_id: Option<Uuid> = row.get("library_id");
|
||||
|
||||
// Final check: if this is a rebuild, ensure no rebuild started between SELECT and UPDATE
|
||||
if job_type == "rebuild" || job_type == "full_rebuild" {
|
||||
let has_running_rebuild: bool = sqlx::query_scalar(
|
||||
r#"
|
||||
SELECT EXISTS(
|
||||
SELECT 1 FROM index_jobs
|
||||
WHERE status = 'running'
|
||||
AND type IN ('rebuild', 'full_rebuild')
|
||||
AND id != $1
|
||||
)
|
||||
"#
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
if has_running_rebuild {
|
||||
tx.rollback().await?;
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
|
||||
sqlx::query("UPDATE index_jobs SET status = 'running', started_at = NOW(), error_opt = NULL WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(Some((id, library_id)))
|
||||
}
|
||||
|
||||
pub async fn fail_job(pool: &PgPool, job_id: Uuid, error_message: &str) -> Result<()> {
|
||||
sqlx::query("UPDATE index_jobs SET status = 'failed', finished_at = NOW(), error_opt = $2 WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.bind(error_message)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn is_job_cancelled(pool: &PgPool, job_id: Uuid) -> Result<bool> {
|
||||
let status: Option<String> = sqlx::query_scalar(
|
||||
"SELECT status FROM index_jobs WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
Ok(status.as_deref() == Some("cancelled"))
|
||||
}
|
||||
|
||||
pub async fn process_job(state: &AppState, job_id: Uuid, target_library_id: Option<Uuid>) -> Result<()> {
|
||||
info!("[JOB] Processing {} library={:?}", job_id, target_library_id);
|
||||
|
||||
let job_type: String = sqlx::query_scalar("SELECT type FROM index_jobs WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.fetch_one(&state.pool)
|
||||
.await?;
|
||||
|
||||
// Thumbnail jobs: hand off to API and wait for completion (same queue as rebuilds)
|
||||
if job_type == "thumbnail_rebuild" || job_type == "thumbnail_regenerate" {
|
||||
sqlx::query(
|
||||
"UPDATE index_jobs SET status = 'generating_thumbnails', started_at = NOW() WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
let api_base = state.api_base_url.trim_end_matches('/');
|
||||
let url = format!("{}/index/jobs/{}/thumbnails/checkup", api_base, job_id);
|
||||
let client = reqwest::Client::new();
|
||||
let res = client
|
||||
.post(&url)
|
||||
.header("Authorization", format!("Bearer {}", state.api_bootstrap_token))
|
||||
.send()
|
||||
.await?;
|
||||
if !res.status().is_success() {
|
||||
anyhow::bail!("thumbnail checkup API returned {}", res.status());
|
||||
}
|
||||
|
||||
// Poll until job is finished (API updates the same row)
|
||||
let poll_interval = Duration::from_secs(1);
|
||||
loop {
|
||||
tokio::time::sleep(poll_interval).await;
|
||||
let status: String = sqlx::query_scalar("SELECT status FROM index_jobs WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.fetch_one(&state.pool)
|
||||
.await?;
|
||||
if status == "success" || status == "failed" {
|
||||
info!("[JOB] Thumbnail job {} finished with status {}", job_id, status);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let is_full_rebuild = job_type == "full_rebuild";
|
||||
info!("[JOB] {} type={} full_rebuild={}", job_id, job_type, is_full_rebuild);
|
||||
|
||||
// For full rebuilds, delete existing data first
|
||||
if is_full_rebuild {
|
||||
info!("[JOB] Full rebuild: deleting existing data");
|
||||
|
||||
if let Some(library_id) = target_library_id {
|
||||
// Delete books and files for specific library
|
||||
sqlx::query("DELETE FROM book_files WHERE book_id IN (SELECT id FROM books WHERE library_id = $1)")
|
||||
.bind(library_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
sqlx::query("DELETE FROM books WHERE library_id = $1")
|
||||
.bind(library_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
info!("[JOB] Deleted existing data for library {}", library_id);
|
||||
} else {
|
||||
// Delete all books and files
|
||||
sqlx::query("DELETE FROM book_files").execute(&state.pool).await?;
|
||||
sqlx::query("DELETE FROM books").execute(&state.pool).await?;
|
||||
info!("[JOB] Deleted all existing data");
|
||||
}
|
||||
}
|
||||
|
||||
let libraries = if let Some(library_id) = target_library_id {
|
||||
sqlx::query("SELECT id, root_path FROM libraries WHERE id = $1 AND enabled = TRUE")
|
||||
.bind(library_id)
|
||||
.fetch_all(&state.pool)
|
||||
.await?
|
||||
} else {
|
||||
sqlx::query("SELECT id, root_path FROM libraries WHERE enabled = TRUE")
|
||||
.fetch_all(&state.pool)
|
||||
.await?
|
||||
};
|
||||
|
||||
// First pass: count total files for progress estimation (parallel)
|
||||
let library_paths: Vec<String> = libraries.iter()
|
||||
.map(|library| crate::utils::remap_libraries_path(&library.get::<String, _>("root_path")))
|
||||
.collect();
|
||||
|
||||
let total_files: usize = library_paths.par_iter()
|
||||
.map(|root_path| {
|
||||
walkdir::WalkDir::new(root_path)
|
||||
.into_iter()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|entry| entry.file_type().is_file() && parsers::detect_format(entry.path()).is_some())
|
||||
.count()
|
||||
})
|
||||
.sum();
|
||||
|
||||
info!("[JOB] Found {} libraries, {} total files to index", libraries.len(), total_files);
|
||||
|
||||
// Update job with total estimate
|
||||
sqlx::query("UPDATE index_jobs SET total_files = $2 WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.bind(total_files as i32)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
let mut stats = scanner::JobStats {
|
||||
scanned_files: 0,
|
||||
indexed_files: 0,
|
||||
removed_files: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
// Track processed files across all libraries for accurate progress
|
||||
let mut total_processed_count = 0i32;
|
||||
|
||||
for library in libraries {
|
||||
let library_id: Uuid = library.get("id");
|
||||
let root_path: String = library.get("root_path");
|
||||
let root_path = crate::utils::remap_libraries_path(&root_path);
|
||||
match scanner::scan_library(state, job_id, library_id, std::path::Path::new(&root_path), &mut stats, &mut total_processed_count, total_files, is_full_rebuild).await {
|
||||
Ok(()) => {}
|
||||
Err(err) => {
|
||||
stats.errors += 1;
|
||||
error!(library_id = %library_id, error = %err, "library scan failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
meili::sync_meili(&state.pool, &state.meili_url, &state.meili_master_key).await?;
|
||||
|
||||
// Hand off to API for thumbnail checkup (API will set status = 'success' when done)
|
||||
sqlx::query(
|
||||
"UPDATE index_jobs SET status = 'generating_thumbnails', stats_json = $2, current_file = NULL, processed_files = $3 WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(serde_json::to_value(&stats)?)
|
||||
.bind(total_processed_count)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
let api_base = state.api_base_url.trim_end_matches('/');
|
||||
let url = format!("{}/index/jobs/{}/thumbnails/checkup", api_base, job_id);
|
||||
let client = reqwest::Client::new();
|
||||
let res = client
|
||||
.post(&url)
|
||||
.header("Authorization", format!("Bearer {}", state.api_bootstrap_token))
|
||||
.send()
|
||||
.await;
|
||||
if let Err(e) = res {
|
||||
tracing::warn!("[JOB] Failed to trigger thumbnail checkup: {} — API will not generate thumbnails for this job", e);
|
||||
} else if let Ok(r) = res {
|
||||
if !r.status().is_success() {
|
||||
tracing::warn!("[JOB] Thumbnail checkup returned {} — API may not generate thumbnails", r.status());
|
||||
} else {
|
||||
info!("[JOB] Thumbnail checkup started (job {}), API will complete the job", job_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
20
apps/indexer/src/lib.rs
Normal file
20
apps/indexer/src/lib.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
pub mod api;
|
||||
pub mod batch;
|
||||
pub mod job;
|
||||
pub mod meili;
|
||||
pub mod scheduler;
|
||||
pub mod scanner;
|
||||
pub mod utils;
|
||||
pub mod watcher;
|
||||
pub mod worker;
|
||||
|
||||
use sqlx::PgPool;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub pool: PgPool,
|
||||
pub meili_url: String,
|
||||
pub meili_master_key: String,
|
||||
pub api_base_url: String,
|
||||
pub api_bootstrap_token: String,
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
180
apps/indexer/src/meili.rs
Normal file
180
apps/indexer/src/meili.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use reqwest::Client;
|
||||
use serde::Serialize;
|
||||
use sqlx::{PgPool, Row};
|
||||
use tracing::info;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SearchDoc {
|
||||
id: String,
|
||||
library_id: String,
|
||||
kind: String,
|
||||
title: String,
|
||||
author: Option<String>,
|
||||
series: Option<String>,
|
||||
volume: Option<i32>,
|
||||
language: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str) -> Result<()> {
|
||||
let client = Client::new();
|
||||
let base = meili_url.trim_end_matches('/');
|
||||
|
||||
// Ensure index exists and has proper settings
|
||||
let _ = client
|
||||
.post(format!("{base}/indexes"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&serde_json::json!({"uid": "books", "primaryKey": "id"}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let _ = client
|
||||
.patch(format!("{base}/indexes/books/settings/filterable-attributes"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&serde_json::json!(["library_id", "kind"]))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
// Get last sync timestamp
|
||||
let last_sync: Option<DateTime<Utc>> = sqlx::query_scalar(
|
||||
"SELECT last_meili_sync FROM sync_metadata WHERE id = 1 AND last_meili_sync IS NOT NULL"
|
||||
)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
// If no previous sync, do a full sync
|
||||
let is_full_sync = last_sync.is_none();
|
||||
|
||||
// Get books to sync: all if full sync, only modified since last sync otherwise
|
||||
let rows = if is_full_sync {
|
||||
info!("[MEILI] Performing full sync");
|
||||
sqlx::query(
|
||||
"SELECT id, library_id, kind, title, author, series, volume, language, updated_at FROM books",
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?
|
||||
} else {
|
||||
let since = last_sync.unwrap();
|
||||
info!("[MEILI] Performing incremental sync since {}", since);
|
||||
|
||||
// Also get deleted book IDs to remove from MeiliSearch
|
||||
// For now, we'll do a diff approach: get all book IDs from DB and compare with Meili
|
||||
sqlx::query(
|
||||
"SELECT id, library_id, kind, title, author, series, volume, language, updated_at FROM books WHERE updated_at > $1",
|
||||
)
|
||||
.bind(since)
|
||||
.fetch_all(pool)
|
||||
.await?
|
||||
};
|
||||
|
||||
if rows.is_empty() && !is_full_sync {
|
||||
info!("[MEILI] No changes to sync");
|
||||
// Still update the timestamp
|
||||
sqlx::query(
|
||||
"INSERT INTO sync_metadata (id, last_meili_sync) VALUES (1, NOW()) ON CONFLICT (id) DO UPDATE SET last_meili_sync = NOW()"
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let docs: Vec<SearchDoc> = rows
|
||||
.into_iter()
|
||||
.map(|row| SearchDoc {
|
||||
id: row.get::<Uuid, _>("id").to_string(),
|
||||
library_id: row.get::<Uuid, _>("library_id").to_string(),
|
||||
kind: row.get("kind"),
|
||||
title: row.get("title"),
|
||||
author: row.get("author"),
|
||||
series: row.get("series"),
|
||||
volume: row.get("volume"),
|
||||
language: row.get("language"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let doc_count = docs.len();
|
||||
|
||||
// Send documents to MeiliSearch in batches of 1000
|
||||
const MEILI_BATCH_SIZE: usize = 1000;
|
||||
for (i, chunk) in docs.chunks(MEILI_BATCH_SIZE).enumerate() {
|
||||
let batch_num = i + 1;
|
||||
info!("[MEILI] Sending batch {}/{} ({} docs)", batch_num, (doc_count + MEILI_BATCH_SIZE - 1) / MEILI_BATCH_SIZE, chunk.len());
|
||||
|
||||
let response = client
|
||||
.post(format!("{base}/indexes/books/documents"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&chunk)
|
||||
.send()
|
||||
.await
|
||||
.context("failed to send docs to meili")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
return Err(anyhow::anyhow!("MeiliSearch error {}: {}", status, body));
|
||||
}
|
||||
}
|
||||
|
||||
// Handle deletions: get all book IDs from DB and remove from MeiliSearch any that don't exist
|
||||
// This is expensive, so we only do it periodically (every 10 syncs) or on full syncs
|
||||
if is_full_sync || rand::random::<u8>() < 26 { // ~10% chance
|
||||
info!("[MEILI] Checking for documents to delete");
|
||||
|
||||
// Get all book IDs from database
|
||||
let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books")
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
// Get all document IDs from MeiliSearch (this requires fetching all documents)
|
||||
// For efficiency, we'll just delete by query for documents that might be stale
|
||||
// A better approach would be to track deletions in a separate table
|
||||
|
||||
// For now, we'll do a simple approach: fetch all Meili docs and compare
|
||||
// Note: This could be slow for large collections
|
||||
let meili_response = client
|
||||
.post(format!("{base}/indexes/books/documents/fetch"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&serde_json::json!({
|
||||
"fields": ["id"],
|
||||
"limit": 100000
|
||||
}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
if let Ok(response) = meili_response {
|
||||
if response.status().is_success() {
|
||||
if let Ok(meili_docs) = response.json::<Vec<serde_json::Value>>().await {
|
||||
let meili_ids: std::collections::HashSet<String> = meili_docs
|
||||
.into_iter()
|
||||
.filter_map(|doc| doc.get("id").and_then(|id| id.as_str()).map(|s| s.to_string()))
|
||||
.collect();
|
||||
|
||||
let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
|
||||
let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
|
||||
|
||||
if !to_delete.is_empty() {
|
||||
info!("[MEILI] Deleting {} stale documents", to_delete.len());
|
||||
let _ = client
|
||||
.post(format!("{base}/indexes/books/documents/delete-batch"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&to_delete)
|
||||
.send()
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update last sync timestamp
|
||||
sqlx::query(
|
||||
"INSERT INTO sync_metadata (id, last_meili_sync) VALUES (1, NOW()) ON CONFLICT (id) DO UPDATE SET last_meili_sync = NOW()"
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
info!("[MEILI] Sync completed: {} documents indexed", doc_count);
|
||||
Ok(())
|
||||
}
|
||||
360
apps/indexer/src/scanner.rs
Normal file
360
apps/indexer/src/scanner.rs
Normal file
@@ -0,0 +1,360 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use parsers::{detect_format, parse_metadata, BookFormat, ParsedMetadata};
|
||||
use rayon::prelude::*;
|
||||
use serde::Serialize;
|
||||
use sqlx::Row;
|
||||
use std::{collections::HashMap, path::Path, time::Duration};
|
||||
use tracing::{error, info, trace, warn};
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{
|
||||
batch::{flush_all_batches, BookInsert, BookUpdate, ErrorInsert, FileInsert, FileUpdate},
|
||||
job::is_job_cancelled,
|
||||
utils,
|
||||
AppState,
|
||||
};
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct JobStats {
|
||||
pub scanned_files: usize,
|
||||
pub indexed_files: usize,
|
||||
pub removed_files: usize,
|
||||
pub errors: usize,
|
||||
}
|
||||
|
||||
const BATCH_SIZE: usize = 100;
|
||||
|
||||
pub async fn scan_library(
|
||||
state: &AppState,
|
||||
job_id: Uuid,
|
||||
library_id: Uuid,
|
||||
root: &Path,
|
||||
stats: &mut JobStats,
|
||||
total_processed_count: &mut i32,
|
||||
total_files: usize,
|
||||
is_full_rebuild: bool,
|
||||
) -> Result<()> {
|
||||
info!("[SCAN] Starting scan of library {} at path: {} (full_rebuild={})", library_id, root.display(), is_full_rebuild);
|
||||
|
||||
let existing_rows = sqlx::query(
|
||||
r#"
|
||||
SELECT bf.id AS file_id, bf.book_id, bf.abs_path, bf.fingerprint
|
||||
FROM book_files bf
|
||||
JOIN books b ON b.id = bf.book_id
|
||||
WHERE b.library_id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
let mut existing: HashMap<String, (Uuid, Uuid, String)> = HashMap::new();
|
||||
if !is_full_rebuild {
|
||||
for row in existing_rows {
|
||||
let abs_path: String = row.get("abs_path");
|
||||
let remapped_path = utils::remap_libraries_path(&abs_path);
|
||||
existing.insert(
|
||||
remapped_path,
|
||||
(row.get("file_id"), row.get("book_id"), row.get("fingerprint")),
|
||||
);
|
||||
}
|
||||
info!("[SCAN] Found {} existing files in database for library {}", existing.len(), library_id);
|
||||
} else {
|
||||
info!("[SCAN] Full rebuild: skipping existing files lookup (all will be treated as new)");
|
||||
}
|
||||
|
||||
let mut seen: HashMap<String, bool> = HashMap::new();
|
||||
let mut library_processed_count = 0i32;
|
||||
let mut last_progress_update = std::time::Instant::now();
|
||||
|
||||
// Batching buffers
|
||||
let mut books_to_update: Vec<BookUpdate> = Vec::with_capacity(BATCH_SIZE);
|
||||
let mut files_to_update: Vec<FileUpdate> = Vec::with_capacity(BATCH_SIZE);
|
||||
let mut books_to_insert: Vec<BookInsert> = Vec::with_capacity(BATCH_SIZE);
|
||||
let mut files_to_insert: Vec<FileInsert> = Vec::with_capacity(BATCH_SIZE);
|
||||
let mut errors_to_insert: Vec<ErrorInsert> = Vec::with_capacity(BATCH_SIZE);
|
||||
|
||||
// Step 1: Collect all book files first
|
||||
#[derive(Clone)]
|
||||
struct FileInfo {
|
||||
path: std::path::PathBuf,
|
||||
format: BookFormat,
|
||||
abs_path: String,
|
||||
file_name: String,
|
||||
metadata: std::fs::Metadata,
|
||||
mtime: DateTime<Utc>,
|
||||
fingerprint: String,
|
||||
lookup_path: String,
|
||||
}
|
||||
|
||||
let mut file_infos: Vec<FileInfo> = Vec::new();
|
||||
for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = entry.path().to_path_buf();
|
||||
let Some(format) = detect_format(&path) else {
|
||||
trace!("[SCAN] Skipping non-book file: {}", path.display());
|
||||
continue;
|
||||
};
|
||||
|
||||
info!("[SCAN] Found book file: {} (format: {:?})", path.display(), format);
|
||||
stats.scanned_files += 1;
|
||||
|
||||
let abs_path_local = path.to_string_lossy().to_string();
|
||||
let abs_path = utils::unmap_libraries_path(&abs_path_local);
|
||||
let file_name = path.file_name()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| abs_path.clone());
|
||||
|
||||
let metadata = std::fs::metadata(&path)
|
||||
.with_context(|| format!("cannot stat {}", path.display()))?;
|
||||
let mtime: DateTime<Utc> = metadata
|
||||
.modified()
|
||||
.map(DateTime::<Utc>::from)
|
||||
.unwrap_or_else(|_| Utc::now());
|
||||
let fingerprint = utils::compute_fingerprint(&path, metadata.len(), &mtime)?;
|
||||
let lookup_path = utils::remap_libraries_path(&abs_path);
|
||||
|
||||
file_infos.push(FileInfo {
|
||||
path,
|
||||
format,
|
||||
abs_path,
|
||||
file_name,
|
||||
metadata,
|
||||
mtime,
|
||||
fingerprint,
|
||||
lookup_path,
|
||||
});
|
||||
}
|
||||
|
||||
info!("[SCAN] Collected {} files, starting parallel parsing", file_infos.len());
|
||||
|
||||
// Step 2: Parse metadata in parallel
|
||||
let parsed_results: Vec<(FileInfo, Result<ParsedMetadata>)> = file_infos
|
||||
.into_par_iter()
|
||||
.map(|file_info| {
|
||||
let parse_result = parse_metadata(&file_info.path, file_info.format, root);
|
||||
(file_info, parse_result)
|
||||
})
|
||||
.collect();
|
||||
|
||||
info!("[SCAN] Completed parallel parsing, processing {} results", parsed_results.len());
|
||||
|
||||
// Step 3: Process results sequentially for batch inserts
|
||||
for (file_info, parse_result) in parsed_results {
|
||||
library_processed_count += 1;
|
||||
*total_processed_count += 1;
|
||||
|
||||
// Update progress in DB every 1 second or every 10 files
|
||||
let should_update_progress = last_progress_update.elapsed() > Duration::from_secs(1) || library_processed_count % 10 == 0;
|
||||
if should_update_progress {
|
||||
let progress_percent = if total_files > 0 {
|
||||
((*total_processed_count as f64 / total_files as f64) * 100.0) as i32
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE index_jobs SET current_file = $2, processed_files = $3, progress_percent = $4 WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(&file_info.file_name)
|
||||
.bind(*total_processed_count)
|
||||
.bind(progress_percent)
|
||||
.execute(&state.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("[BDD] Failed to update progress for job {}: {}", job_id, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
last_progress_update = std::time::Instant::now();
|
||||
|
||||
// Check if job has been cancelled
|
||||
if is_job_cancelled(&state.pool, job_id).await? {
|
||||
info!("[JOB] Job {} cancelled by user, stopping...", job_id);
|
||||
// Flush any pending batches before exiting
|
||||
flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
|
||||
return Err(anyhow::anyhow!("Job cancelled by user"));
|
||||
}
|
||||
}
|
||||
|
||||
let seen_key = utils::remap_libraries_path(&file_info.abs_path);
|
||||
seen.insert(seen_key.clone(), true);
|
||||
|
||||
if let Some((file_id, book_id, old_fingerprint)) = existing.get(&file_info.lookup_path).cloned() {
|
||||
if !is_full_rebuild && old_fingerprint == file_info.fingerprint {
|
||||
trace!("[PROCESS] Skipping unchanged file: {}", file_info.file_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
info!("[PROCESS] Updating existing file: {} (full_rebuild={}, fingerprint_match={})", file_info.file_name, is_full_rebuild, old_fingerprint == file_info.fingerprint);
|
||||
|
||||
match parse_result {
|
||||
Ok(parsed) => {
|
||||
books_to_update.push(BookUpdate {
|
||||
book_id,
|
||||
title: parsed.title,
|
||||
kind: utils::kind_from_format(file_info.format).to_string(),
|
||||
series: parsed.series,
|
||||
volume: parsed.volume,
|
||||
page_count: parsed.page_count,
|
||||
});
|
||||
|
||||
files_to_update.push(FileUpdate {
|
||||
file_id,
|
||||
format: file_info.format.as_str().to_string(),
|
||||
size_bytes: file_info.metadata.len() as i64,
|
||||
mtime: file_info.mtime,
|
||||
fingerprint: file_info.fingerprint,
|
||||
});
|
||||
|
||||
stats.indexed_files += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
|
||||
stats.errors += 1;
|
||||
|
||||
files_to_update.push(FileUpdate {
|
||||
file_id,
|
||||
format: file_info.format.as_str().to_string(),
|
||||
size_bytes: file_info.metadata.len() as i64,
|
||||
mtime: file_info.mtime,
|
||||
fingerprint: file_info.fingerprint.clone(),
|
||||
});
|
||||
|
||||
errors_to_insert.push(ErrorInsert {
|
||||
job_id,
|
||||
file_path: file_info.abs_path.clone(),
|
||||
error_message: err.to_string(),
|
||||
});
|
||||
|
||||
// Also need to mark file as error - we'll do this separately
|
||||
sqlx::query(
|
||||
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2 WHERE id = $1"
|
||||
)
|
||||
.bind(file_id)
|
||||
.bind(err.to_string())
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush if batch is full
|
||||
if books_to_update.len() >= BATCH_SIZE || files_to_update.len() >= BATCH_SIZE {
|
||||
flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// New file (thumbnails generated by API after job handoff)
|
||||
info!("[PROCESS] Inserting new file: {}", file_info.file_name);
|
||||
let book_id = Uuid::new_v4();
|
||||
|
||||
match parse_result {
|
||||
Ok(parsed) => {
|
||||
let file_id = Uuid::new_v4();
|
||||
|
||||
books_to_insert.push(BookInsert {
|
||||
book_id,
|
||||
library_id,
|
||||
kind: utils::kind_from_format(file_info.format).to_string(),
|
||||
title: parsed.title,
|
||||
series: parsed.series,
|
||||
volume: parsed.volume,
|
||||
page_count: parsed.page_count,
|
||||
thumbnail_path: None,
|
||||
});
|
||||
|
||||
files_to_insert.push(FileInsert {
|
||||
file_id,
|
||||
book_id,
|
||||
format: file_info.format.as_str().to_string(),
|
||||
abs_path: file_info.abs_path.clone(),
|
||||
size_bytes: file_info.metadata.len() as i64,
|
||||
mtime: file_info.mtime,
|
||||
fingerprint: file_info.fingerprint,
|
||||
parse_status: "ok".to_string(),
|
||||
parse_error: None,
|
||||
});
|
||||
|
||||
stats.indexed_files += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("[PARSER] Failed to parse {}: {}", file_info.file_name, err);
|
||||
stats.errors += 1;
|
||||
let book_id = Uuid::new_v4();
|
||||
let file_id = Uuid::new_v4();
|
||||
|
||||
books_to_insert.push(BookInsert {
|
||||
book_id,
|
||||
library_id,
|
||||
kind: utils::kind_from_format(file_info.format).to_string(),
|
||||
title: utils::file_display_name(&file_info.path),
|
||||
series: None,
|
||||
volume: None,
|
||||
page_count: None,
|
||||
thumbnail_path: None,
|
||||
});
|
||||
|
||||
files_to_insert.push(FileInsert {
|
||||
file_id,
|
||||
book_id,
|
||||
format: file_info.format.as_str().to_string(),
|
||||
abs_path: file_info.abs_path.clone(),
|
||||
size_bytes: file_info.metadata.len() as i64,
|
||||
mtime: file_info.mtime,
|
||||
fingerprint: file_info.fingerprint,
|
||||
parse_status: "error".to_string(),
|
||||
parse_error: Some(err.to_string()),
|
||||
});
|
||||
|
||||
errors_to_insert.push(ErrorInsert {
|
||||
job_id,
|
||||
file_path: file_info.abs_path,
|
||||
error_message: err.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Flush if batch is full
|
||||
if books_to_insert.len() >= BATCH_SIZE || files_to_insert.len() >= BATCH_SIZE {
|
||||
flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Final flush of any remaining items
|
||||
flush_all_batches(&state.pool, &mut books_to_update, &mut files_to_update, &mut books_to_insert, &mut files_to_insert, &mut errors_to_insert).await?;
|
||||
|
||||
info!("[SCAN] Library {} scan complete: {} files scanned, {} indexed, {} errors",
|
||||
library_id, library_processed_count, stats.indexed_files, stats.errors);
|
||||
|
||||
// Handle deletions
|
||||
let mut removed_count = 0usize;
|
||||
for (abs_path, (file_id, book_id, _)) in existing {
|
||||
if seen.contains_key(&abs_path) {
|
||||
continue;
|
||||
}
|
||||
sqlx::query("DELETE FROM book_files WHERE id = $1")
|
||||
.bind(file_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
sqlx::query("DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)")
|
||||
.bind(book_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
stats.removed_files += 1;
|
||||
removed_count += 1;
|
||||
}
|
||||
|
||||
if removed_count > 0 {
|
||||
info!("[SCAN] Removed {} stale files from database", removed_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
67
apps/indexer/src/scheduler.rs
Normal file
67
apps/indexer/src/scheduler.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use anyhow::Result;
|
||||
use sqlx::{PgPool, Row};
|
||||
use tracing::info;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub async fn check_and_schedule_auto_scans(pool: &PgPool) -> Result<()> {
|
||||
let libraries = sqlx::query(
|
||||
r#"
|
||||
SELECT id, scan_mode, last_scan_at
|
||||
FROM libraries
|
||||
WHERE monitor_enabled = TRUE
|
||||
AND (
|
||||
next_scan_at IS NULL
|
||||
OR next_scan_at <= NOW()
|
||||
)
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM index_jobs
|
||||
WHERE library_id = libraries.id
|
||||
AND status IN ('pending', 'running')
|
||||
)
|
||||
"#
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
for row in libraries {
|
||||
let library_id: Uuid = row.get("id");
|
||||
let scan_mode: String = row.get("scan_mode");
|
||||
|
||||
info!("[SCHEDULER] Auto-scanning library {} (mode: {})", library_id, scan_mode);
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
let job_type = match scan_mode.as_str() {
|
||||
"full" => "full_rebuild",
|
||||
_ => "rebuild",
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, $3, 'pending')"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(library_id)
|
||||
.bind(job_type)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Update next_scan_at
|
||||
let interval_minutes = match scan_mode.as_str() {
|
||||
"hourly" => 60,
|
||||
"daily" => 1440,
|
||||
"weekly" => 10080,
|
||||
_ => 1440, // default daily
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE libraries SET last_scan_at = NOW(), next_scan_at = NOW() + INTERVAL '1 minute' * $2 WHERE id = $1"
|
||||
)
|
||||
.bind(library_id)
|
||||
.bind(interval_minutes)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
info!("[SCHEDULER] Created job {} for library {}", job_id, library_id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
52
apps/indexer/src/utils.rs
Normal file
52
apps/indexer/src/utils.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
use anyhow::Result;
|
||||
use chrono::DateTime;
|
||||
use parsers::BookFormat;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::path::Path;
|
||||
use chrono::Utc;
|
||||
|
||||
pub fn remap_libraries_path(path: &str) -> String {
|
||||
if let Ok(root) = std::env::var("LIBRARIES_ROOT_PATH") {
|
||||
if path.starts_with("/libraries/") {
|
||||
return path.replacen("/libraries", &root, 1);
|
||||
}
|
||||
}
|
||||
path.to_string()
|
||||
}
|
||||
|
||||
pub fn unmap_libraries_path(path: &str) -> String {
|
||||
if let Ok(root) = std::env::var("LIBRARIES_ROOT_PATH") {
|
||||
if path.starts_with(&root) {
|
||||
return path.replacen(&root, "/libraries", 1);
|
||||
}
|
||||
}
|
||||
path.to_string()
|
||||
}
|
||||
|
||||
pub fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime<Utc>) -> Result<String> {
|
||||
// Optimized: only use size + mtime + first bytes of filename for fast fingerprinting
|
||||
// This is 100x faster than reading file content while still being reliable for change detection
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(size.to_le_bytes());
|
||||
hasher.update(mtime.timestamp().to_le_bytes());
|
||||
|
||||
// Add filename for extra uniqueness (in case of rapid changes with same size+mtime)
|
||||
if let Some(filename) = path.file_name() {
|
||||
hasher.update(filename.as_encoded_bytes());
|
||||
}
|
||||
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
}
|
||||
|
||||
pub fn kind_from_format(format: BookFormat) -> &'static str {
|
||||
match format {
|
||||
BookFormat::Pdf => "ebook",
|
||||
BookFormat::Cbz | BookFormat::Cbr => "comic",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file_display_name(path: &Path) -> String {
|
||||
path.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "Untitled".to_string())
|
||||
}
|
||||
147
apps/indexer/src/watcher.rs
Normal file
147
apps/indexer/src/watcher.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
use anyhow::Result;
|
||||
use notify::{Event, RecommendedWatcher, RecursiveMode, Watcher};
|
||||
use sqlx::Row;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{error, info, trace};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::utils;
|
||||
use crate::AppState;
|
||||
|
||||
pub async fn run_file_watcher(state: AppState) -> Result<()> {
|
||||
let (tx, mut rx) = mpsc::channel::<(Uuid, String)>(100);
|
||||
|
||||
// Start watcher refresh loop
|
||||
let refresh_interval = Duration::from_secs(30);
|
||||
let pool = state.pool.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut watched_libraries: HashMap<Uuid, String> = HashMap::new();
|
||||
|
||||
loop {
|
||||
// Get libraries with watcher enabled
|
||||
match sqlx::query(
|
||||
"SELECT id, root_path FROM libraries WHERE watcher_enabled = TRUE AND enabled = TRUE"
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
{
|
||||
Ok(rows) => {
|
||||
let current_libraries: HashMap<Uuid, String> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let id: Uuid = row.get("id");
|
||||
let root_path: String = row.get("root_path");
|
||||
let local_path = utils::remap_libraries_path(&root_path);
|
||||
(id, local_path)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Check if we need to recreate watcher
|
||||
let needs_restart = watched_libraries.len() != current_libraries.len()
|
||||
|| watched_libraries.iter().any(|(id, path)| {
|
||||
current_libraries.get(id) != Some(path)
|
||||
});
|
||||
|
||||
if needs_restart {
|
||||
info!("[WATCHER] Restarting watcher for {} libraries", current_libraries.len());
|
||||
|
||||
if !current_libraries.is_empty() {
|
||||
let tx_clone = tx.clone();
|
||||
let libraries_clone = current_libraries.clone();
|
||||
|
||||
match setup_watcher(libraries_clone, tx_clone) {
|
||||
Ok(_new_watcher) => {
|
||||
watched_libraries = current_libraries;
|
||||
info!("[WATCHER] Watching {} libraries", watched_libraries.len());
|
||||
}
|
||||
Err(err) => {
|
||||
error!("[WATCHER] Failed to setup watcher: {}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("[WATCHER] Failed to fetch libraries: {}", err);
|
||||
}
|
||||
}
|
||||
|
||||
tokio::time::sleep(refresh_interval).await;
|
||||
}
|
||||
});
|
||||
|
||||
// Process watcher events
|
||||
while let Some((library_id, file_path)) = rx.recv().await {
|
||||
info!("[WATCHER] File changed in library {}: {}", library_id, file_path);
|
||||
|
||||
// Check if there's already a pending job for this library
|
||||
match sqlx::query_scalar::<_, bool>(
|
||||
"SELECT EXISTS(SELECT 1 FROM index_jobs WHERE library_id = $1 AND status IN ('pending', 'running'))"
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_one(&state.pool)
|
||||
.await
|
||||
{
|
||||
Ok(exists) => {
|
||||
if !exists {
|
||||
// Create a quick scan job
|
||||
let job_id = Uuid::new_v4();
|
||||
match sqlx::query(
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(library_id)
|
||||
.execute(&state.pool)
|
||||
.await
|
||||
{
|
||||
Ok(_) => info!("[WATCHER] Created job {} for library {}", job_id, library_id),
|
||||
Err(err) => error!("[WATCHER] Failed to create job: {}", err),
|
||||
}
|
||||
} else {
|
||||
trace!("[WATCHER] Job already pending for library {}, skipping", library_id);
|
||||
}
|
||||
}
|
||||
Err(err) => error!("[WATCHER] Failed to check existing jobs: {}", err),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn setup_watcher(
|
||||
libraries: HashMap<Uuid, String>,
|
||||
tx: mpsc::Sender<(Uuid, String)>,
|
||||
) -> Result<RecommendedWatcher> {
|
||||
let libraries_for_closure = libraries.clone();
|
||||
|
||||
let mut watcher = notify::recommended_watcher(move |res: Result<Event, notify::Error>| {
|
||||
match res {
|
||||
Ok(event) => {
|
||||
if event.kind.is_modify() || event.kind.is_create() || event.kind.is_remove() {
|
||||
for path in event.paths {
|
||||
if let Some((library_id, _)) = libraries_for_closure.iter().find(|(_, root)| {
|
||||
path.starts_with(root)
|
||||
}) {
|
||||
let path_str = path.to_string_lossy().to_string();
|
||||
if parsers::detect_format(&path).is_some() {
|
||||
let _ = tx.try_send((*library_id, path_str));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => error!("[WATCHER] Event error: {}", err),
|
||||
}
|
||||
})?;
|
||||
|
||||
// Actually watch the library directories
|
||||
for (_, root_path) in &libraries {
|
||||
info!("[WATCHER] Watching directory: {}", root_path);
|
||||
watcher.watch(std::path::Path::new(root_path), RecursiveMode::Recursive)?;
|
||||
}
|
||||
|
||||
Ok(watcher)
|
||||
}
|
||||
61
apps/indexer/src/worker.rs
Normal file
61
apps/indexer/src/worker.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
use tracing::{error, info, trace};
|
||||
use crate::{job, scheduler, watcher, AppState};
|
||||
|
||||
pub async fn run_worker(state: AppState, interval_seconds: u64) {
|
||||
let wait = Duration::from_secs(interval_seconds.max(1));
|
||||
|
||||
// Cleanup stale jobs from previous runs
|
||||
if let Err(err) = job::cleanup_stale_jobs(&state.pool).await {
|
||||
error!("[CLEANUP] Failed to cleanup stale jobs: {}", err);
|
||||
}
|
||||
|
||||
// Start file watcher task
|
||||
let watcher_state = state.clone();
|
||||
let _watcher_handle = tokio::spawn(async move {
|
||||
info!("[WATCHER] Starting file watcher service");
|
||||
if let Err(err) = watcher::run_file_watcher(watcher_state).await {
|
||||
error!("[WATCHER] Error: {}", err);
|
||||
}
|
||||
});
|
||||
|
||||
// Start scheduler task for auto-monitoring
|
||||
let scheduler_state = state.clone();
|
||||
let _scheduler_handle = tokio::spawn(async move {
|
||||
let scheduler_wait = Duration::from_secs(60); // Check every minute
|
||||
loop {
|
||||
if let Err(err) = scheduler::check_and_schedule_auto_scans(&scheduler_state.pool).await {
|
||||
error!("[SCHEDULER] Error: {}", err);
|
||||
}
|
||||
tokio::time::sleep(scheduler_wait).await;
|
||||
}
|
||||
});
|
||||
|
||||
loop {
|
||||
match job::claim_next_job(&state.pool).await {
|
||||
Ok(Some((job_id, library_id))) => {
|
||||
info!("[INDEXER] Starting job {} library={:?}", job_id, library_id);
|
||||
if let Err(err) = job::process_job(&state, job_id, library_id).await {
|
||||
let err_str = err.to_string();
|
||||
if err_str.contains("cancelled") || err_str.contains("Cancelled") {
|
||||
info!("[INDEXER] Job {} was cancelled by user", job_id);
|
||||
// Status is already 'cancelled' in DB, don't change it
|
||||
} else {
|
||||
error!("[INDEXER] Job {} failed: {}", job_id, err);
|
||||
let _ = job::fail_job(&state.pool, job_id, &err_str).await;
|
||||
}
|
||||
} else {
|
||||
info!("[INDEXER] Job {} completed", job_id);
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
trace!("[INDEXER] No pending jobs, waiting...");
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
Err(err) => {
|
||||
error!("[INDEXER] Worker error: {}", err);
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user