feat(indexing): Lot 4 - Progression temps reel, Full Rebuild, Optimisations
- Ajout migrations DB: index_job_errors, library_monitoring, full_rebuild_type - API: endpoints progression temps reel (/jobs/:id/stream), active jobs, details - API: support full_rebuild avec suppression donnees existantes - Indexer: logs detailles avec timing [SCAN][META][PARSER][BDD] - Indexer: optimisation parsing PDF (lopdf -> pdfinfo) 235x plus rapide - Indexer: corrections chemins LIBRARIES_ROOT_PATH pour dev local - Backoffice: composants JobProgress, JobsIndicator (header), JobsList - Backoffice: SSE streaming pour progression temps reel - Backoffice: boutons Index/Index Full sur page libraries - Backoffice: highlight job apres creation avec redirection - Fix: parsing volume type i32, sync meilisearch cleanup Perf: parsing PDF passe de 8.7s a 37ms Perf: indexation 45 fichiers en ~15s vs plusieurs minutes avant
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
use axum::{extract::State, Json};
|
||||
use axum::{extract::State, response::sse::{Event, Sse}, Json};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::Row;
|
||||
use std::convert::Infallible;
|
||||
use std::time::Duration;
|
||||
use tokio_stream::Stream;
|
||||
use uuid::Uuid;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@@ -11,6 +14,8 @@ use crate::{error::ApiError, AppState};
|
||||
pub struct RebuildRequest {
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub library_id: Option<Uuid>,
|
||||
#[schema(value_type = Option<bool>, example = false)]
|
||||
pub full: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
@@ -37,6 +42,49 @@ pub struct FolderItem {
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct IndexJobDetailResponse {
|
||||
#[schema(value_type = String)]
|
||||
pub id: Uuid,
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub library_id: Option<Uuid>,
|
||||
pub r#type: String,
|
||||
pub status: String,
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub started_at: Option<DateTime<Utc>>,
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub finished_at: Option<DateTime<Utc>>,
|
||||
pub stats_json: Option<serde_json::Value>,
|
||||
pub error_opt: Option<String>,
|
||||
#[schema(value_type = String)]
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub current_file: Option<String>,
|
||||
pub progress_percent: Option<i32>,
|
||||
pub total_files: Option<i32>,
|
||||
pub processed_files: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct JobErrorResponse {
|
||||
#[schema(value_type = String)]
|
||||
pub id: Uuid,
|
||||
pub file_path: String,
|
||||
pub error_message: String,
|
||||
#[schema(value_type = String)]
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct ProgressEvent {
|
||||
pub job_id: String,
|
||||
pub status: String,
|
||||
pub current_file: Option<String>,
|
||||
pub progress_percent: Option<i32>,
|
||||
pub processed_files: Option<i32>,
|
||||
pub total_files: Option<i32>,
|
||||
pub stats_json: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
/// Enqueue a job to rebuild the search index for a library (or all libraries if no library_id specified)
|
||||
#[utoipa::path(
|
||||
post,
|
||||
@@ -54,14 +102,17 @@ pub async fn enqueue_rebuild(
|
||||
State(state): State<AppState>,
|
||||
payload: Option<Json<RebuildRequest>>,
|
||||
) -> Result<Json<IndexJobResponse>, ApiError> {
|
||||
let library_id = payload.and_then(|p| p.0.library_id);
|
||||
let library_id = payload.as_ref().and_then(|p| p.0.library_id);
|
||||
let is_full = payload.as_ref().and_then(|p| p.0.full).unwrap_or(false);
|
||||
let job_type = if is_full { "full_rebuild" } else { "rebuild" };
|
||||
let id = Uuid::new_v4();
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')",
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, $3, 'pending')",
|
||||
)
|
||||
.bind(id)
|
||||
.bind(library_id)
|
||||
.bind(job_type)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
@@ -138,6 +189,10 @@ pub async fn cancel_job(
|
||||
Ok(Json(map_row(row)))
|
||||
}
|
||||
|
||||
fn get_libraries_root() -> String {
|
||||
std::env::var("LIBRARIES_ROOT_PATH").unwrap_or_else(|_| "/libraries".to_string())
|
||||
}
|
||||
|
||||
/// List available folders in /libraries for library creation
|
||||
#[utoipa::path(
|
||||
get,
|
||||
@@ -151,7 +206,8 @@ pub async fn cancel_job(
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn list_folders(State(_state): State<AppState>) -> Result<Json<Vec<FolderItem>>, ApiError> {
|
||||
let libraries_path = std::path::Path::new("/libraries");
|
||||
let libraries_root = get_libraries_root();
|
||||
let libraries_path = std::path::Path::new(&libraries_root);
|
||||
let mut folders = Vec::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(libraries_path) {
|
||||
@@ -170,7 +226,7 @@ pub async fn list_folders(State(_state): State<AppState>) -> Result<Json<Vec<Fol
|
||||
Ok(Json(folders))
|
||||
}
|
||||
|
||||
fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
|
||||
pub fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
|
||||
IndexJobResponse {
|
||||
id: row.get("id"),
|
||||
library_id: row.get("library_id"),
|
||||
@@ -183,3 +239,213 @@ fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
|
||||
created_at: row.get("created_at"),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_row_detail(row: sqlx::postgres::PgRow) -> IndexJobDetailResponse {
|
||||
IndexJobDetailResponse {
|
||||
id: row.get("id"),
|
||||
library_id: row.get("library_id"),
|
||||
r#type: row.get("type"),
|
||||
status: row.get("status"),
|
||||
started_at: row.get("started_at"),
|
||||
finished_at: row.get("finished_at"),
|
||||
stats_json: row.get("stats_json"),
|
||||
error_opt: row.get("error_opt"),
|
||||
created_at: row.get("created_at"),
|
||||
current_file: row.get("current_file"),
|
||||
progress_percent: row.get("progress_percent"),
|
||||
total_files: row.get("total_files"),
|
||||
processed_files: row.get("processed_files"),
|
||||
}
|
||||
}
|
||||
|
||||
/// List active indexing jobs (pending or running)
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/index/jobs/active",
|
||||
tag = "indexing",
|
||||
responses(
|
||||
(status = 200, body = Vec<IndexJobResponse>),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn get_active_jobs(State(state): State<AppState>) -> Result<Json<Vec<IndexJobResponse>>, ApiError> {
|
||||
let rows = sqlx::query(
|
||||
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at
|
||||
FROM index_jobs
|
||||
WHERE status IN ('pending', 'running')
|
||||
ORDER BY created_at ASC"
|
||||
)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(Json(rows.into_iter().map(map_row).collect()))
|
||||
}
|
||||
|
||||
/// Get detailed job information including progress
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/index/jobs/{id}/details",
|
||||
tag = "indexing",
|
||||
params(
|
||||
("id" = String, Path, description = "Job UUID"),
|
||||
),
|
||||
responses(
|
||||
(status = 200, body = IndexJobDetailResponse),
|
||||
(status = 404, description = "Job not found"),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn get_job_details(
|
||||
State(state): State<AppState>,
|
||||
id: axum::extract::Path<Uuid>,
|
||||
) -> Result<Json<IndexJobDetailResponse>, ApiError> {
|
||||
let row = sqlx::query(
|
||||
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at,
|
||||
current_file, progress_percent, total_files, processed_files
|
||||
FROM index_jobs WHERE id = $1"
|
||||
)
|
||||
.bind(id.0)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
match row {
|
||||
Some(row) => Ok(Json(map_row_detail(row))),
|
||||
None => Err(ApiError::not_found("job not found")),
|
||||
}
|
||||
}
|
||||
|
||||
/// List errors for a specific job
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/index/jobs/{id}/errors",
|
||||
tag = "indexing",
|
||||
params(
|
||||
("id" = String, Path, description = "Job UUID"),
|
||||
),
|
||||
responses(
|
||||
(status = 200, body = Vec<JobErrorResponse>),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn get_job_errors(
|
||||
State(state): State<AppState>,
|
||||
id: axum::extract::Path<Uuid>,
|
||||
) -> Result<Json<Vec<JobErrorResponse>>, ApiError> {
|
||||
let rows = sqlx::query(
|
||||
"SELECT id, file_path, error_message, created_at
|
||||
FROM index_job_errors
|
||||
WHERE job_id = $1
|
||||
ORDER BY created_at ASC"
|
||||
)
|
||||
.bind(id.0)
|
||||
.fetch_all(&state.pool)
|
||||
.await?;
|
||||
|
||||
let errors: Vec<JobErrorResponse> = rows
|
||||
.into_iter()
|
||||
.map(|row| JobErrorResponse {
|
||||
id: row.get("id"),
|
||||
file_path: row.get("file_path"),
|
||||
error_message: row.get("error_message"),
|
||||
created_at: row.get("created_at"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(errors))
|
||||
}
|
||||
|
||||
/// Stream job progress via SSE
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/index/jobs/{id}/stream",
|
||||
tag = "indexing",
|
||||
params(
|
||||
("id" = String, Path, description = "Job UUID"),
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "SSE stream of progress events"),
|
||||
(status = 404, description = "Job not found"),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn stream_job_progress(
|
||||
State(state): State<AppState>,
|
||||
id: axum::extract::Path<Uuid>,
|
||||
) -> Result<Sse<impl Stream<Item = Result<Event, Infallible>>>, ApiError> {
|
||||
// Verify job exists
|
||||
let job_exists = sqlx::query("SELECT 1 FROM index_jobs WHERE id = $1")
|
||||
.bind(id.0)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
if job_exists.is_none() {
|
||||
return Err(ApiError::not_found("job not found"));
|
||||
}
|
||||
|
||||
let job_id = id.0;
|
||||
let pool = state.pool.clone();
|
||||
|
||||
let stream = async_stream::stream! {
|
||||
let mut last_status: Option<String> = None;
|
||||
let mut last_processed: Option<i32> = None;
|
||||
let mut interval = tokio::time::interval(Duration::from_millis(500));
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
let row = sqlx::query(
|
||||
"SELECT status, current_file, progress_percent, processed_files, total_files, stats_json
|
||||
FROM index_jobs WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.fetch_one(&pool)
|
||||
.await;
|
||||
|
||||
match row {
|
||||
Ok(row) => {
|
||||
let status: String = row.get("status");
|
||||
let processed_files: Option<i32> = row.get("processed_files");
|
||||
|
||||
// Send update if status changed or progress changed
|
||||
let should_send = last_status.as_ref() != Some(&status)
|
||||
|| last_processed != processed_files;
|
||||
|
||||
if should_send {
|
||||
last_status = Some(status.clone());
|
||||
last_processed = processed_files;
|
||||
|
||||
let event = ProgressEvent {
|
||||
job_id: job_id.to_string(),
|
||||
status: status.clone(),
|
||||
current_file: row.get("current_file"),
|
||||
progress_percent: row.get("progress_percent"),
|
||||
processed_files,
|
||||
total_files: row.get("total_files"),
|
||||
stats_json: row.get("stats_json"),
|
||||
};
|
||||
|
||||
if let Ok(json) = serde_json::to_string(&event) {
|
||||
yield Ok(Event::default().data(json));
|
||||
}
|
||||
|
||||
// Stop streaming if job is finished
|
||||
if status == "success" || status == "failed" || status == "cancelled" {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Sse::new(stream).keep_alive(axum::response::sse::KeepAlive::default()))
|
||||
}
|
||||
|
||||
@@ -152,3 +152,61 @@ fn canonicalize_library_root(root_path: &str) -> Result<PathBuf, ApiError> {
|
||||
|
||||
Ok(canonical)
|
||||
}
|
||||
|
||||
use crate::index_jobs::{IndexJobResponse, RebuildRequest};
|
||||
|
||||
/// Trigger a scan/indexing job for a specific library
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/libraries/{id}/scan",
|
||||
tag = "libraries",
|
||||
params(
|
||||
("id" = String, Path, description = "Library UUID"),
|
||||
),
|
||||
request_body = Option<RebuildRequest>,
|
||||
responses(
|
||||
(status = 200, body = IndexJobResponse),
|
||||
(status = 404, description = "Library not found"),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn scan_library(
|
||||
State(state): State<AppState>,
|
||||
AxumPath(library_id): AxumPath<Uuid>,
|
||||
payload: Option<Json<RebuildRequest>>,
|
||||
) -> Result<Json<IndexJobResponse>, ApiError> {
|
||||
// Verify library exists
|
||||
let library_exists = sqlx::query("SELECT 1 FROM libraries WHERE id = $1")
|
||||
.bind(library_id)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
if library_exists.is_none() {
|
||||
return Err(ApiError::not_found("library not found"));
|
||||
}
|
||||
|
||||
let is_full = payload.as_ref().and_then(|p| p.full).unwrap_or(false);
|
||||
let job_type = if is_full { "full_rebuild" } else { "rebuild" };
|
||||
|
||||
// Create indexing job for this library
|
||||
let job_id = Uuid::new_v4();
|
||||
sqlx::query(
|
||||
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, $3, 'pending')",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(library_id)
|
||||
.bind(job_type)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
let row = sqlx::query(
|
||||
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs WHERE id = $1",
|
||||
)
|
||||
.bind(job_id)
|
||||
.fetch_one(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(Json(crate::index_jobs::map_row(row)))
|
||||
}
|
||||
|
||||
@@ -95,8 +95,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
let admin_routes = Router::new()
|
||||
.route("/libraries", get(libraries::list_libraries).post(libraries::create_library))
|
||||
.route("/libraries/:id", delete(libraries::delete_library))
|
||||
.route("/libraries/:id/scan", axum::routing::post(libraries::scan_library))
|
||||
.route("/index/rebuild", axum::routing::post(index_jobs::enqueue_rebuild))
|
||||
.route("/index/status", get(index_jobs::list_index_jobs))
|
||||
.route("/index/jobs/active", get(index_jobs::get_active_jobs))
|
||||
.route("/index/jobs/:id", get(index_jobs::get_job_details))
|
||||
.route("/index/jobs/:id/stream", get(index_jobs::stream_job_progress))
|
||||
.route("/index/jobs/:id/errors", get(index_jobs::get_job_errors))
|
||||
.route("/index/cancel/:id", axum::routing::post(index_jobs::cancel_job))
|
||||
.route("/folders", get(index_jobs::list_folders))
|
||||
.route("/admin/tokens", get(tokens::list_tokens).post(tokens::create_token))
|
||||
|
||||
@@ -11,11 +11,16 @@ use utoipa::OpenApi;
|
||||
crate::search::search_books,
|
||||
crate::index_jobs::enqueue_rebuild,
|
||||
crate::index_jobs::list_index_jobs,
|
||||
crate::index_jobs::get_active_jobs,
|
||||
crate::index_jobs::get_job_details,
|
||||
crate::index_jobs::stream_job_progress,
|
||||
crate::index_jobs::get_job_errors,
|
||||
crate::index_jobs::cancel_job,
|
||||
crate::index_jobs::list_folders,
|
||||
crate::libraries::list_libraries,
|
||||
crate::libraries::create_library,
|
||||
crate::libraries::delete_library,
|
||||
crate::libraries::scan_library,
|
||||
crate::tokens::list_tokens,
|
||||
crate::tokens::create_token,
|
||||
crate::tokens::revoke_token,
|
||||
@@ -32,6 +37,9 @@ use utoipa::OpenApi;
|
||||
crate::search::SearchResponse,
|
||||
crate::index_jobs::RebuildRequest,
|
||||
crate::index_jobs::IndexJobResponse,
|
||||
crate::index_jobs::IndexJobDetailResponse,
|
||||
crate::index_jobs::JobErrorResponse,
|
||||
crate::index_jobs::ProgressEvent,
|
||||
crate::index_jobs::FolderItem,
|
||||
crate::libraries::LibraryResponse,
|
||||
crate::libraries::CreateLibraryRequest,
|
||||
|
||||
@@ -20,6 +20,15 @@ use uuid::Uuid;
|
||||
|
||||
use crate::{error::ApiError, AppState};
|
||||
|
||||
fn remap_libraries_path(path: &str) -> String {
|
||||
if let Ok(root) = std::env::var("LIBRARIES_ROOT_PATH") {
|
||||
if path.starts_with("/libraries/") {
|
||||
return path.replacen("/libraries", &root, 1);
|
||||
}
|
||||
}
|
||||
path.to_string()
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
pub struct PageQuery {
|
||||
#[schema(value_type = Option<String>, example = "webp")]
|
||||
@@ -122,6 +131,8 @@ pub async fn get_page(
|
||||
|
||||
let row = row.ok_or_else(|| ApiError::not_found("book file not found"))?;
|
||||
let abs_path: String = row.get("abs_path");
|
||||
// Remap /libraries to LIBRARIES_ROOT_PATH for local development
|
||||
let abs_path = remap_libraries_path(&abs_path);
|
||||
let input_format: String = row.get("format");
|
||||
|
||||
let _permit = state
|
||||
|
||||
Reference in New Issue
Block a user