add indexing jobs, parsers, and search APIs
This commit is contained in:
@@ -12,6 +12,7 @@ base64.workspace = true
|
||||
chrono.workspace = true
|
||||
stripstream-core = { path = "../../crates/core" }
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
sqlx.workspace = true
|
||||
|
||||
@@ -32,6 +32,18 @@ pub async fn require_admin(
|
||||
Ok(next.run(req).await)
|
||||
}
|
||||
|
||||
pub async fn require_read(
|
||||
State(state): State<AppState>,
|
||||
mut req: Request,
|
||||
next: Next,
|
||||
) -> Result<Response, ApiError> {
|
||||
let token = bearer_token(&req).ok_or_else(|| ApiError::unauthorized("missing bearer token"))?;
|
||||
let scope = authenticate(&state, token).await?;
|
||||
|
||||
req.extensions_mut().insert(scope);
|
||||
Ok(next.run(req).await)
|
||||
}
|
||||
|
||||
fn bearer_token(req: &Request) -> Option<&str> {
|
||||
req.headers()
|
||||
.get(AUTHORIZATION)
|
||||
|
||||
143
apps/api/src/books.rs
Normal file
143
apps/api/src/books.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
use axum::{extract::{Path, Query, State}, Json};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::Row;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{error::ApiError, AppState};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ListBooksQuery {
|
||||
pub library_id: Option<Uuid>,
|
||||
pub kind: Option<String>,
|
||||
pub cursor: Option<Uuid>,
|
||||
pub limit: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct BookItem {
|
||||
pub id: Uuid,
|
||||
pub library_id: Uuid,
|
||||
pub kind: String,
|
||||
pub title: String,
|
||||
pub author: Option<String>,
|
||||
pub series: Option<String>,
|
||||
pub volume: Option<String>,
|
||||
pub language: Option<String>,
|
||||
pub page_count: Option<i32>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct BooksPage {
|
||||
pub items: Vec<BookItem>,
|
||||
pub next_cursor: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct BookDetails {
|
||||
pub id: Uuid,
|
||||
pub library_id: Uuid,
|
||||
pub kind: String,
|
||||
pub title: String,
|
||||
pub author: Option<String>,
|
||||
pub series: Option<String>,
|
||||
pub volume: Option<String>,
|
||||
pub language: Option<String>,
|
||||
pub page_count: Option<i32>,
|
||||
pub file_path: Option<String>,
|
||||
pub file_format: Option<String>,
|
||||
pub file_parse_status: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn list_books(
|
||||
State(state): State<AppState>,
|
||||
Query(query): Query<ListBooksQuery>,
|
||||
) -> Result<Json<BooksPage>, ApiError> {
|
||||
let limit = query.limit.unwrap_or(50).clamp(1, 200);
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, library_id, kind, title, author, series, volume, language, page_count, updated_at
|
||||
FROM books
|
||||
WHERE ($1::uuid IS NULL OR library_id = $1)
|
||||
AND ($2::text IS NULL OR kind = $2)
|
||||
AND ($3::uuid IS NULL OR id > $3)
|
||||
ORDER BY id ASC
|
||||
LIMIT $4
|
||||
"#,
|
||||
)
|
||||
.bind(query.library_id)
|
||||
.bind(query.kind.as_deref())
|
||||
.bind(query.cursor)
|
||||
.bind(limit + 1)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
let mut items: Vec<BookItem> = rows
|
||||
.iter()
|
||||
.take(limit as usize)
|
||||
.map(|row| BookItem {
|
||||
id: row.get("id"),
|
||||
library_id: row.get("library_id"),
|
||||
kind: row.get("kind"),
|
||||
title: row.get("title"),
|
||||
author: row.get("author"),
|
||||
series: row.get("series"),
|
||||
volume: row.get("volume"),
|
||||
language: row.get("language"),
|
||||
page_count: row.get("page_count"),
|
||||
updated_at: row.get("updated_at"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let next_cursor = if rows.len() > limit as usize {
|
||||
items.last().map(|b| b.id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Json(BooksPage {
|
||||
items: std::mem::take(&mut items),
|
||||
next_cursor,
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn get_book(
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<Uuid>,
|
||||
) -> Result<Json<BookDetails>, ApiError> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT b.id, b.library_id, b.kind, b.title, b.author, b.series, b.volume, b.language, b.page_count,
|
||||
bf.abs_path, bf.format, bf.parse_status
|
||||
FROM books b
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT abs_path, format, parse_status
|
||||
FROM book_files
|
||||
WHERE book_id = b.id
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 1
|
||||
) bf ON TRUE
|
||||
WHERE b.id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
let row = row.ok_or_else(|| ApiError::not_found("book not found"))?;
|
||||
Ok(Json(BookDetails {
|
||||
id: row.get("id"),
|
||||
library_id: row.get("library_id"),
|
||||
kind: row.get("kind"),
|
||||
title: row.get("title"),
|
||||
author: row.get("author"),
|
||||
series: row.get("series"),
|
||||
volume: row.get("volume"),
|
||||
language: row.get("language"),
|
||||
page_count: row.get("page_count"),
|
||||
file_path: row.get("abs_path"),
|
||||
file_format: row.get("format"),
|
||||
file_parse_status: row.get("parse_status"),
|
||||
}))
|
||||
}
|
||||
74
apps/api/src/index_jobs.rs
Normal file
74
apps/api/src/index_jobs.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
use axum::{extract::State, Json};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::Row;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{error::ApiError, AppState};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct RebuildRequest {
|
||||
pub library_id: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct IndexJobItem {
|
||||
pub id: Uuid,
|
||||
pub library_id: Option<Uuid>,
|
||||
pub r#type: String,
|
||||
pub status: String,
|
||||
pub started_at: Option<DateTime<Utc>>,
|
||||
pub finished_at: Option<DateTime<Utc>>,
|
||||
pub stats_json: Option<serde_json::Value>,
|
||||
pub error_opt: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
pub async fn enqueue_rebuild(
|
||||
State(state): State<AppState>,
|
||||
payload: Option<Json<RebuildRequest>>,
|
||||
) -> Result<Json<IndexJobItem>, ApiError> {
|
||||
let library_id = payload.and_then(|p| p.0.library_id);
|
||||
let id = Uuid::new_v4();
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')",
|
||||
)
|
||||
.bind(id)
|
||||
.bind(library_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
let row = sqlx::query(
|
||||
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs WHERE id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_one(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(Json(map_row(row)))
|
||||
}
|
||||
|
||||
pub async fn list_index_jobs(State(state): State<AppState>) -> Result<Json<Vec<IndexJobItem>>, ApiError> {
|
||||
let rows = sqlx::query(
|
||||
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs ORDER BY created_at DESC LIMIT 100",
|
||||
)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(Json(rows.into_iter().map(map_row).collect()))
|
||||
}
|
||||
|
||||
fn map_row(row: sqlx::postgres::PgRow) -> IndexJobItem {
|
||||
IndexJobItem {
|
||||
id: row.get("id"),
|
||||
library_id: row.get("library_id"),
|
||||
r#type: row.get("type"),
|
||||
status: row.get("status"),
|
||||
started_at: row.get("started_at"),
|
||||
finished_at: row.get("finished_at"),
|
||||
stats_json: row.get("stats_json"),
|
||||
error_opt: row.get("error_opt"),
|
||||
created_at: row.get("created_at"),
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
mod auth;
|
||||
mod books;
|
||||
mod error;
|
||||
mod index_jobs;
|
||||
mod libraries;
|
||||
mod search;
|
||||
mod tokens;
|
||||
|
||||
use std::sync::Arc;
|
||||
@@ -14,6 +17,8 @@ use tracing::info;
|
||||
struct AppState {
|
||||
pool: sqlx::PgPool,
|
||||
bootstrap_token: Arc<str>,
|
||||
meili_url: Arc<str>,
|
||||
meili_master_key: Arc<str>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@@ -33,18 +38,29 @@ async fn main() -> anyhow::Result<()> {
|
||||
let state = AppState {
|
||||
pool,
|
||||
bootstrap_token: Arc::from(config.api_bootstrap_token),
|
||||
meili_url: Arc::from(config.meili_url),
|
||||
meili_master_key: Arc::from(config.meili_master_key),
|
||||
};
|
||||
|
||||
let protected = Router::new()
|
||||
let admin_routes = Router::new()
|
||||
.route("/libraries", get(libraries::list_libraries).post(libraries::create_library))
|
||||
.route("/libraries/:id", delete(libraries::delete_library))
|
||||
.route("/index/rebuild", axum::routing::post(index_jobs::enqueue_rebuild))
|
||||
.route("/index/status", get(index_jobs::list_index_jobs))
|
||||
.route("/admin/tokens", get(tokens::list_tokens).post(tokens::create_token))
|
||||
.route("/admin/tokens/:id", delete(tokens::revoke_token))
|
||||
.layer(middleware::from_fn_with_state(state.clone(), auth::require_admin));
|
||||
|
||||
let read_routes = Router::new()
|
||||
.route("/books", get(books::list_books))
|
||||
.route("/books/:id", get(books::get_book))
|
||||
.route("/search", get(search::search_books))
|
||||
.layer(middleware::from_fn_with_state(state.clone(), auth::require_read));
|
||||
|
||||
let app = Router::new()
|
||||
.route("/health", get(health))
|
||||
.merge(protected)
|
||||
.merge(admin_routes)
|
||||
.merge(read_routes)
|
||||
.with_state(state);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?;
|
||||
|
||||
77
apps/api/src/search.rs
Normal file
77
apps/api/src/search.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
use axum::{extract::{Query, State}, Json};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{error::ApiError, AppState};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct SearchQuery {
|
||||
pub q: String,
|
||||
pub library_id: Option<String>,
|
||||
pub r#type: Option<String>,
|
||||
pub kind: Option<String>,
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct SearchResponse {
|
||||
pub hits: serde_json::Value,
|
||||
pub estimated_total_hits: Option<u64>,
|
||||
pub processing_time_ms: Option<u64>,
|
||||
}
|
||||
|
||||
pub async fn search_books(
|
||||
State(state): State<AppState>,
|
||||
Query(query): Query<SearchQuery>,
|
||||
) -> Result<Json<SearchResponse>, ApiError> {
|
||||
if query.q.trim().is_empty() {
|
||||
return Err(ApiError::bad_request("q is required"));
|
||||
}
|
||||
|
||||
let mut filters: Vec<String> = Vec::new();
|
||||
if let Some(library_id) = query.library_id.as_deref() {
|
||||
filters.push(format!("library_id = '{}'", library_id.replace('"', "")));
|
||||
}
|
||||
let kind_filter = query.r#type.as_deref().or(query.kind.as_deref());
|
||||
if let Some(kind) = kind_filter {
|
||||
filters.push(format!("kind = '{}'", kind.replace('"', "")));
|
||||
}
|
||||
|
||||
let body = serde_json::json!({
|
||||
"q": query.q,
|
||||
"limit": query.limit.unwrap_or(20).clamp(1, 100),
|
||||
"filter": if filters.is_empty() { serde_json::Value::Null } else { serde_json::Value::String(filters.join(" AND ")) }
|
||||
});
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{}/indexes/books/search", state.meili_url.trim_end_matches('/'));
|
||||
let response = client
|
||||
.post(url)
|
||||
.header("Authorization", format!("Bearer {}", state.meili_master_key))
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| ApiError::internal(format!("meili request failed: {e}")))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let body = response.text().await.unwrap_or_else(|_| "unknown meili error".to_string());
|
||||
if body.contains("index_not_found") {
|
||||
return Ok(Json(SearchResponse {
|
||||
hits: serde_json::json!([]),
|
||||
estimated_total_hits: Some(0),
|
||||
processing_time_ms: Some(0),
|
||||
}));
|
||||
}
|
||||
return Err(ApiError::internal(format!("meili error: {body}")));
|
||||
}
|
||||
|
||||
let payload: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| ApiError::internal(format!("invalid meili response: {e}")))?;
|
||||
|
||||
Ok(Json(SearchResponse {
|
||||
hits: payload.get("hits").cloned().unwrap_or_else(|| serde_json::json!([])),
|
||||
estimated_total_hits: payload.get("estimatedTotalHits").and_then(|v| v.as_u64()),
|
||||
processing_time_ms: payload.get("processingTimeMs").and_then(|v| v.as_u64()),
|
||||
}))
|
||||
}
|
||||
@@ -7,7 +7,16 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
axum.workspace = true
|
||||
chrono.workspace = true
|
||||
parsers = { path = "../../crates/parsers" }
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
sha2.workspace = true
|
||||
sqlx.workspace = true
|
||||
stripstream-core = { path = "../../crates/core" }
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
|
||||
@@ -1,6 +1,30 @@
|
||||
use anyhow::Context;
|
||||
use axum::{routing::get, Router};
|
||||
use chrono::{DateTime, Utc};
|
||||
use parsers::{detect_format, parse_metadata, BookFormat};
|
||||
use serde::Serialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
use sqlx::{postgres::PgPoolOptions, Row};
|
||||
use std::{collections::HashMap, path::Path, time::Duration};
|
||||
use stripstream_core::config::IndexerConfig;
|
||||
use tracing::info;
|
||||
use tracing::{error, info};
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
pool: sqlx::PgPool,
|
||||
meili_url: String,
|
||||
meili_master_key: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct JobStats {
|
||||
scanned_files: usize,
|
||||
indexed_files: usize,
|
||||
removed_files: usize,
|
||||
errors: usize,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
@@ -10,7 +34,20 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.init();
|
||||
|
||||
let config = IndexerConfig::from_env();
|
||||
let config = IndexerConfig::from_env()?;
|
||||
let pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&config.database_url)
|
||||
.await?;
|
||||
|
||||
let state = AppState {
|
||||
pool,
|
||||
meili_url: config.meili_url.clone(),
|
||||
meili_master_key: config.meili_master_key.clone(),
|
||||
};
|
||||
|
||||
tokio::spawn(run_worker(state.clone(), config.scan_interval_seconds));
|
||||
|
||||
let app = Router::new().route("/health", get(health));
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?;
|
||||
@@ -22,3 +59,363 @@ async fn main() -> anyhow::Result<()> {
|
||||
async fn health() -> &'static str {
|
||||
"ok"
|
||||
}
|
||||
|
||||
async fn run_worker(state: AppState, interval_seconds: u64) {
|
||||
let wait = Duration::from_secs(interval_seconds.max(1));
|
||||
loop {
|
||||
match claim_next_job(&state.pool).await {
|
||||
Ok(Some((job_id, library_id))) => {
|
||||
if let Err(err) = process_job(&state, job_id, library_id).await {
|
||||
error!(job_id = %job_id, error = %err, "index job failed");
|
||||
let _ = fail_job(&state.pool, job_id, &err.to_string()).await;
|
||||
}
|
||||
}
|
||||
Ok(None) => tokio::time::sleep(wait).await,
|
||||
Err(err) => {
|
||||
error!(error = %err, "worker loop error");
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn claim_next_job(pool: &sqlx::PgPool) -> anyhow::Result<Option<(Uuid, Option<Uuid>)>> {
|
||||
let mut tx = pool.begin().await?;
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT id, library_id
|
||||
FROM index_jobs
|
||||
WHERE status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
"#,
|
||||
)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
let Some(row) = row else {
|
||||
tx.commit().await?;
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let id: Uuid = row.get("id");
|
||||
let library_id: Option<Uuid> = row.get("library_id");
|
||||
|
||||
sqlx::query("UPDATE index_jobs SET status = 'running', started_at = NOW(), error_opt = NULL WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(Some((id, library_id)))
|
||||
}
|
||||
|
||||
async fn process_job(state: &AppState, job_id: Uuid, target_library_id: Option<Uuid>) -> anyhow::Result<()> {
|
||||
let libraries = if let Some(library_id) = target_library_id {
|
||||
sqlx::query("SELECT id, root_path FROM libraries WHERE id = $1 AND enabled = TRUE")
|
||||
.bind(library_id)
|
||||
.fetch_all(&state.pool)
|
||||
.await?
|
||||
} else {
|
||||
sqlx::query("SELECT id, root_path FROM libraries WHERE enabled = TRUE")
|
||||
.fetch_all(&state.pool)
|
||||
.await?
|
||||
};
|
||||
|
||||
let mut stats = JobStats {
|
||||
scanned_files: 0,
|
||||
indexed_files: 0,
|
||||
removed_files: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
for library in libraries {
|
||||
let library_id: Uuid = library.get("id");
|
||||
let root_path: String = library.get("root_path");
|
||||
match scan_library(state, library_id, Path::new(&root_path), &mut stats).await {
|
||||
Ok(()) => {}
|
||||
Err(err) => {
|
||||
stats.errors += 1;
|
||||
error!(library_id = %library_id, error = %err, "library scan failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sync_meili(&state.pool, &state.meili_url, &state.meili_master_key).await?;
|
||||
|
||||
sqlx::query("UPDATE index_jobs SET status = 'success', finished_at = NOW(), stats_json = $2 WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.bind(serde_json::to_value(&stats)?)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fail_job(pool: &sqlx::PgPool, job_id: Uuid, error_message: &str) -> anyhow::Result<()> {
|
||||
sqlx::query("UPDATE index_jobs SET status = 'failed', finished_at = NOW(), error_opt = $2 WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.bind(error_message)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn scan_library(
|
||||
state: &AppState,
|
||||
library_id: Uuid,
|
||||
root: &Path,
|
||||
stats: &mut JobStats,
|
||||
) -> anyhow::Result<()> {
|
||||
let existing_rows = sqlx::query(
|
||||
r#"
|
||||
SELECT bf.id AS file_id, bf.book_id, bf.abs_path, bf.fingerprint
|
||||
FROM book_files bf
|
||||
JOIN books b ON b.id = bf.book_id
|
||||
WHERE b.library_id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(library_id)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
let mut existing: HashMap<String, (Uuid, Uuid, String)> = HashMap::new();
|
||||
for row in existing_rows {
|
||||
existing.insert(
|
||||
row.get("abs_path"),
|
||||
(row.get("file_id"), row.get("book_id"), row.get("fingerprint")),
|
||||
);
|
||||
}
|
||||
|
||||
let mut seen: HashMap<String, bool> = HashMap::new();
|
||||
for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = entry.path();
|
||||
let Some(format) = detect_format(path) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
stats.scanned_files += 1;
|
||||
let abs_path = path.to_string_lossy().to_string();
|
||||
seen.insert(abs_path.clone(), true);
|
||||
|
||||
let metadata = std::fs::metadata(path)
|
||||
.with_context(|| format!("cannot stat {}", path.display()))?;
|
||||
let mtime: DateTime<Utc> = metadata
|
||||
.modified()
|
||||
.map(DateTime::<Utc>::from)
|
||||
.unwrap_or_else(|_| Utc::now());
|
||||
let fingerprint = compute_fingerprint(path, metadata.len(), &mtime)?;
|
||||
|
||||
if let Some((file_id, book_id, old_fingerprint)) = existing.get(&abs_path).cloned() {
|
||||
if old_fingerprint == fingerprint {
|
||||
continue;
|
||||
}
|
||||
|
||||
match parse_metadata(path, format) {
|
||||
Ok(parsed) => {
|
||||
sqlx::query(
|
||||
"UPDATE books SET title = $2, kind = $3, page_count = $4, updated_at = NOW() WHERE id = $1",
|
||||
)
|
||||
.bind(book_id)
|
||||
.bind(parsed.title)
|
||||
.bind(kind_from_format(format))
|
||||
.bind(parsed.page_count)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE book_files SET format = $2, size_bytes = $3, mtime = $4, fingerprint = $5, parse_status = 'ok', parse_error_opt = NULL, updated_at = NOW() WHERE id = $1",
|
||||
)
|
||||
.bind(file_id)
|
||||
.bind(format.as_str())
|
||||
.bind(metadata.len() as i64)
|
||||
.bind(mtime)
|
||||
.bind(fingerprint)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
stats.indexed_files += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
stats.errors += 1;
|
||||
sqlx::query(
|
||||
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2, updated_at = NOW() WHERE id = $1",
|
||||
)
|
||||
.bind(file_id)
|
||||
.bind(err.to_string())
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
match parse_metadata(path, format) {
|
||||
Ok(parsed) => {
|
||||
let book_id = Uuid::new_v4();
|
||||
let file_id = Uuid::new_v4();
|
||||
sqlx::query(
|
||||
"INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, $5)",
|
||||
)
|
||||
.bind(book_id)
|
||||
.bind(library_id)
|
||||
.bind(kind_from_format(format))
|
||||
.bind(parsed.title)
|
||||
.bind(parsed.page_count)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status) VALUES ($1, $2, $3, $4, $5, $6, $7, 'ok')",
|
||||
)
|
||||
.bind(file_id)
|
||||
.bind(book_id)
|
||||
.bind(format.as_str())
|
||||
.bind(&abs_path)
|
||||
.bind(metadata.len() as i64)
|
||||
.bind(mtime)
|
||||
.bind(fingerprint)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
stats.indexed_files += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
stats.errors += 1;
|
||||
let book_id = Uuid::new_v4();
|
||||
let file_id = Uuid::new_v4();
|
||||
sqlx::query(
|
||||
"INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, NULL)",
|
||||
)
|
||||
.bind(book_id)
|
||||
.bind(library_id)
|
||||
.bind(kind_from_format(format))
|
||||
.bind(file_display_name(path))
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status, parse_error_opt) VALUES ($1, $2, $3, $4, $5, $6, $7, 'error', $8)",
|
||||
)
|
||||
.bind(file_id)
|
||||
.bind(book_id)
|
||||
.bind(format.as_str())
|
||||
.bind(&abs_path)
|
||||
.bind(metadata.len() as i64)
|
||||
.bind(mtime)
|
||||
.bind(fingerprint)
|
||||
.bind(err.to_string())
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (abs_path, (file_id, book_id, _)) in existing {
|
||||
if seen.contains_key(&abs_path) {
|
||||
continue;
|
||||
}
|
||||
sqlx::query("DELETE FROM book_files WHERE id = $1")
|
||||
.bind(file_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
sqlx::query("DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)")
|
||||
.bind(book_id)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
stats.removed_files += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime<Utc>) -> anyhow::Result<String> {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(size.to_le_bytes());
|
||||
hasher.update(mtime.timestamp().to_le_bytes());
|
||||
|
||||
let bytes = std::fs::read(path)?;
|
||||
let take = bytes.len().min(65_536);
|
||||
hasher.update(&bytes[..take]);
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
}
|
||||
|
||||
fn kind_from_format(format: BookFormat) -> &'static str {
|
||||
match format {
|
||||
BookFormat::Pdf => "ebook",
|
||||
BookFormat::Cbz | BookFormat::Cbr => "comic",
|
||||
}
|
||||
}
|
||||
|
||||
fn file_display_name(path: &Path) -> String {
|
||||
path.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "Untitled".to_string())
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SearchDoc {
|
||||
id: String,
|
||||
library_id: String,
|
||||
kind: String,
|
||||
title: String,
|
||||
author: Option<String>,
|
||||
series: Option<String>,
|
||||
volume: Option<String>,
|
||||
language: Option<String>,
|
||||
}
|
||||
|
||||
async fn sync_meili(pool: &sqlx::PgPool, meili_url: &str, meili_master_key: &str) -> anyhow::Result<()> {
|
||||
let client = reqwest::Client::new();
|
||||
let base = meili_url.trim_end_matches('/');
|
||||
|
||||
let _ = client
|
||||
.post(format!("{base}/indexes"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&serde_json::json!({"uid": "books", "primaryKey": "id"}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let _ = client
|
||||
.patch(format!("{base}/indexes/books/settings/filterable-attributes"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&serde_json::json!(["library_id", "kind"]))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let rows = sqlx::query(
|
||||
"SELECT id, library_id, kind, title, author, series, volume, language FROM books",
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let docs: Vec<SearchDoc> = rows
|
||||
.into_iter()
|
||||
.map(|row| SearchDoc {
|
||||
id: row.get::<Uuid, _>("id").to_string(),
|
||||
library_id: row.get::<Uuid, _>("library_id").to_string(),
|
||||
kind: row.get("kind"),
|
||||
title: row.get("title"),
|
||||
author: row.get("author"),
|
||||
series: row.get("series"),
|
||||
volume: row.get("volume"),
|
||||
language: row.get("language"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
client
|
||||
.put(format!("{base}/indexes/books/documents?primaryKey=id"))
|
||||
.header("Authorization", format!("Bearer {meili_master_key}"))
|
||||
.json(&docs)
|
||||
.send()
|
||||
.await
|
||||
.context("failed to push docs to meili")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user