feat(indexing): Lot 4 - Progression temps reel, Full Rebuild, Optimisations

- Ajout migrations DB: index_job_errors, library_monitoring, full_rebuild_type
- API: endpoints progression temps reel (/jobs/:id/stream), active jobs, details
- API: support full_rebuild avec suppression donnees existantes
- Indexer: logs detailles avec timing [SCAN][META][PARSER][BDD]
- Indexer: optimisation parsing PDF (lopdf -> pdfinfo) 235x plus rapide
- Indexer: corrections chemins LIBRARIES_ROOT_PATH pour dev local
- Backoffice: composants JobProgress, JobsIndicator (header), JobsList
- Backoffice: SSE streaming pour progression temps reel
- Backoffice: boutons Index/Index Full sur page libraries
- Backoffice: highlight job apres creation avec redirection
- Fix: parsing volume type i32, sync meilisearch cleanup

Perf: parsing PDF passe de 8.7s a 37ms
Perf: indexation 45 fichiers en ~15s vs plusieurs minutes avant
This commit is contained in:
2026-03-06 11:33:32 +01:00
parent 82294a1bee
commit 5f51955f4d
29 changed files with 1928 additions and 68 deletions

View File

@@ -9,11 +9,14 @@ anyhow.workspace = true
argon2.workspace = true
axum.workspace = true
base64.workspace = true
async-stream = "0.3"
chrono.workspace = true
futures = "0.3"
image.workspace = true
lru.workspace = true
stripstream-core = { path = "../../crates/core" }
rand.workspace = true
tokio-stream = "0.1"
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
@@ -21,6 +24,7 @@ sha2.workspace = true
sqlx.workspace = true
tokio.workspace = true
tower.workspace = true
tower-http = { version = "0.6", features = ["cors"] }
tracing.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true

View File

@@ -1,7 +1,10 @@
use axum::{extract::State, Json};
use axum::{extract::State, response::sse::{Event, Sse}, Json};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sqlx::Row;
use std::convert::Infallible;
use std::time::Duration;
use tokio_stream::Stream;
use uuid::Uuid;
use utoipa::ToSchema;
@@ -11,6 +14,8 @@ use crate::{error::ApiError, AppState};
pub struct RebuildRequest {
#[schema(value_type = Option<String>)]
pub library_id: Option<Uuid>,
#[schema(value_type = Option<bool>, example = false)]
pub full: Option<bool>,
}
#[derive(Serialize, ToSchema)]
@@ -37,6 +42,49 @@ pub struct FolderItem {
pub path: String,
}
#[derive(Serialize, ToSchema)]
pub struct IndexJobDetailResponse {
#[schema(value_type = String)]
pub id: Uuid,
#[schema(value_type = Option<String>)]
pub library_id: Option<Uuid>,
pub r#type: String,
pub status: String,
#[schema(value_type = Option<String>)]
pub started_at: Option<DateTime<Utc>>,
#[schema(value_type = Option<String>)]
pub finished_at: Option<DateTime<Utc>>,
pub stats_json: Option<serde_json::Value>,
pub error_opt: Option<String>,
#[schema(value_type = String)]
pub created_at: DateTime<Utc>,
pub current_file: Option<String>,
pub progress_percent: Option<i32>,
pub total_files: Option<i32>,
pub processed_files: Option<i32>,
}
#[derive(Serialize, ToSchema)]
pub struct JobErrorResponse {
#[schema(value_type = String)]
pub id: Uuid,
pub file_path: String,
pub error_message: String,
#[schema(value_type = String)]
pub created_at: DateTime<Utc>,
}
#[derive(Serialize, ToSchema)]
pub struct ProgressEvent {
pub job_id: String,
pub status: String,
pub current_file: Option<String>,
pub progress_percent: Option<i32>,
pub processed_files: Option<i32>,
pub total_files: Option<i32>,
pub stats_json: Option<serde_json::Value>,
}
/// Enqueue a job to rebuild the search index for a library (or all libraries if no library_id specified)
#[utoipa::path(
post,
@@ -54,14 +102,17 @@ pub async fn enqueue_rebuild(
State(state): State<AppState>,
payload: Option<Json<RebuildRequest>>,
) -> Result<Json<IndexJobResponse>, ApiError> {
let library_id = payload.and_then(|p| p.0.library_id);
let library_id = payload.as_ref().and_then(|p| p.0.library_id);
let is_full = payload.as_ref().and_then(|p| p.0.full).unwrap_or(false);
let job_type = if is_full { "full_rebuild" } else { "rebuild" };
let id = Uuid::new_v4();
sqlx::query(
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')",
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, $3, 'pending')",
)
.bind(id)
.bind(library_id)
.bind(job_type)
.execute(&state.pool)
.await?;
@@ -138,6 +189,10 @@ pub async fn cancel_job(
Ok(Json(map_row(row)))
}
fn get_libraries_root() -> String {
std::env::var("LIBRARIES_ROOT_PATH").unwrap_or_else(|_| "/libraries".to_string())
}
/// List available folders in /libraries for library creation
#[utoipa::path(
get,
@@ -151,7 +206,8 @@ pub async fn cancel_job(
security(("Bearer" = []))
)]
pub async fn list_folders(State(_state): State<AppState>) -> Result<Json<Vec<FolderItem>>, ApiError> {
let libraries_path = std::path::Path::new("/libraries");
let libraries_root = get_libraries_root();
let libraries_path = std::path::Path::new(&libraries_root);
let mut folders = Vec::new();
if let Ok(entries) = std::fs::read_dir(libraries_path) {
@@ -170,7 +226,7 @@ pub async fn list_folders(State(_state): State<AppState>) -> Result<Json<Vec<Fol
Ok(Json(folders))
}
fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
pub fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
IndexJobResponse {
id: row.get("id"),
library_id: row.get("library_id"),
@@ -183,3 +239,213 @@ fn map_row(row: sqlx::postgres::PgRow) -> IndexJobResponse {
created_at: row.get("created_at"),
}
}
fn map_row_detail(row: sqlx::postgres::PgRow) -> IndexJobDetailResponse {
IndexJobDetailResponse {
id: row.get("id"),
library_id: row.get("library_id"),
r#type: row.get("type"),
status: row.get("status"),
started_at: row.get("started_at"),
finished_at: row.get("finished_at"),
stats_json: row.get("stats_json"),
error_opt: row.get("error_opt"),
created_at: row.get("created_at"),
current_file: row.get("current_file"),
progress_percent: row.get("progress_percent"),
total_files: row.get("total_files"),
processed_files: row.get("processed_files"),
}
}
/// List active indexing jobs (pending or running)
#[utoipa::path(
get,
path = "/index/jobs/active",
tag = "indexing",
responses(
(status = 200, body = Vec<IndexJobResponse>),
(status = 401, description = "Unauthorized"),
(status = 403, description = "Forbidden - Admin scope required"),
),
security(("Bearer" = []))
)]
pub async fn get_active_jobs(State(state): State<AppState>) -> Result<Json<Vec<IndexJobResponse>>, ApiError> {
let rows = sqlx::query(
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at
FROM index_jobs
WHERE status IN ('pending', 'running')
ORDER BY created_at ASC"
)
.fetch_all(&state.pool)
.await?;
Ok(Json(rows.into_iter().map(map_row).collect()))
}
/// Get detailed job information including progress
#[utoipa::path(
get,
path = "/index/jobs/{id}/details",
tag = "indexing",
params(
("id" = String, Path, description = "Job UUID"),
),
responses(
(status = 200, body = IndexJobDetailResponse),
(status = 404, description = "Job not found"),
(status = 401, description = "Unauthorized"),
(status = 403, description = "Forbidden - Admin scope required"),
),
security(("Bearer" = []))
)]
pub async fn get_job_details(
State(state): State<AppState>,
id: axum::extract::Path<Uuid>,
) -> Result<Json<IndexJobDetailResponse>, ApiError> {
let row = sqlx::query(
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at,
current_file, progress_percent, total_files, processed_files
FROM index_jobs WHERE id = $1"
)
.bind(id.0)
.fetch_optional(&state.pool)
.await?;
match row {
Some(row) => Ok(Json(map_row_detail(row))),
None => Err(ApiError::not_found("job not found")),
}
}
/// List errors for a specific job
#[utoipa::path(
get,
path = "/index/jobs/{id}/errors",
tag = "indexing",
params(
("id" = String, Path, description = "Job UUID"),
),
responses(
(status = 200, body = Vec<JobErrorResponse>),
(status = 401, description = "Unauthorized"),
(status = 403, description = "Forbidden - Admin scope required"),
),
security(("Bearer" = []))
)]
pub async fn get_job_errors(
State(state): State<AppState>,
id: axum::extract::Path<Uuid>,
) -> Result<Json<Vec<JobErrorResponse>>, ApiError> {
let rows = sqlx::query(
"SELECT id, file_path, error_message, created_at
FROM index_job_errors
WHERE job_id = $1
ORDER BY created_at ASC"
)
.bind(id.0)
.fetch_all(&state.pool)
.await?;
let errors: Vec<JobErrorResponse> = rows
.into_iter()
.map(|row| JobErrorResponse {
id: row.get("id"),
file_path: row.get("file_path"),
error_message: row.get("error_message"),
created_at: row.get("created_at"),
})
.collect();
Ok(Json(errors))
}
/// Stream job progress via SSE
#[utoipa::path(
get,
path = "/index/jobs/{id}/stream",
tag = "indexing",
params(
("id" = String, Path, description = "Job UUID"),
),
responses(
(status = 200, description = "SSE stream of progress events"),
(status = 404, description = "Job not found"),
(status = 401, description = "Unauthorized"),
(status = 403, description = "Forbidden - Admin scope required"),
),
security(("Bearer" = []))
)]
pub async fn stream_job_progress(
State(state): State<AppState>,
id: axum::extract::Path<Uuid>,
) -> Result<Sse<impl Stream<Item = Result<Event, Infallible>>>, ApiError> {
// Verify job exists
let job_exists = sqlx::query("SELECT 1 FROM index_jobs WHERE id = $1")
.bind(id.0)
.fetch_optional(&state.pool)
.await?;
if job_exists.is_none() {
return Err(ApiError::not_found("job not found"));
}
let job_id = id.0;
let pool = state.pool.clone();
let stream = async_stream::stream! {
let mut last_status: Option<String> = None;
let mut last_processed: Option<i32> = None;
let mut interval = tokio::time::interval(Duration::from_millis(500));
loop {
interval.tick().await;
let row = sqlx::query(
"SELECT status, current_file, progress_percent, processed_files, total_files, stats_json
FROM index_jobs WHERE id = $1"
)
.bind(job_id)
.fetch_one(&pool)
.await;
match row {
Ok(row) => {
let status: String = row.get("status");
let processed_files: Option<i32> = row.get("processed_files");
// Send update if status changed or progress changed
let should_send = last_status.as_ref() != Some(&status)
|| last_processed != processed_files;
if should_send {
last_status = Some(status.clone());
last_processed = processed_files;
let event = ProgressEvent {
job_id: job_id.to_string(),
status: status.clone(),
current_file: row.get("current_file"),
progress_percent: row.get("progress_percent"),
processed_files,
total_files: row.get("total_files"),
stats_json: row.get("stats_json"),
};
if let Ok(json) = serde_json::to_string(&event) {
yield Ok(Event::default().data(json));
}
// Stop streaming if job is finished
if status == "success" || status == "failed" || status == "cancelled" {
break;
}
}
}
Err(_) => break,
}
}
};
Ok(Sse::new(stream).keep_alive(axum::response::sse::KeepAlive::default()))
}

View File

@@ -152,3 +152,61 @@ fn canonicalize_library_root(root_path: &str) -> Result<PathBuf, ApiError> {
Ok(canonical)
}
use crate::index_jobs::{IndexJobResponse, RebuildRequest};
/// Trigger a scan/indexing job for a specific library
#[utoipa::path(
post,
path = "/libraries/{id}/scan",
tag = "libraries",
params(
("id" = String, Path, description = "Library UUID"),
),
request_body = Option<RebuildRequest>,
responses(
(status = 200, body = IndexJobResponse),
(status = 404, description = "Library not found"),
(status = 401, description = "Unauthorized"),
(status = 403, description = "Forbidden - Admin scope required"),
),
security(("Bearer" = []))
)]
pub async fn scan_library(
State(state): State<AppState>,
AxumPath(library_id): AxumPath<Uuid>,
payload: Option<Json<RebuildRequest>>,
) -> Result<Json<IndexJobResponse>, ApiError> {
// Verify library exists
let library_exists = sqlx::query("SELECT 1 FROM libraries WHERE id = $1")
.bind(library_id)
.fetch_optional(&state.pool)
.await?;
if library_exists.is_none() {
return Err(ApiError::not_found("library not found"));
}
let is_full = payload.as_ref().and_then(|p| p.full).unwrap_or(false);
let job_type = if is_full { "full_rebuild" } else { "rebuild" };
// Create indexing job for this library
let job_id = Uuid::new_v4();
sqlx::query(
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, $3, 'pending')",
)
.bind(job_id)
.bind(library_id)
.bind(job_type)
.execute(&state.pool)
.await?;
let row = sqlx::query(
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs WHERE id = $1",
)
.bind(job_id)
.fetch_one(&state.pool)
.await?;
Ok(Json(crate::index_jobs::map_row(row)))
}

View File

@@ -95,8 +95,13 @@ async fn main() -> anyhow::Result<()> {
let admin_routes = Router::new()
.route("/libraries", get(libraries::list_libraries).post(libraries::create_library))
.route("/libraries/:id", delete(libraries::delete_library))
.route("/libraries/:id/scan", axum::routing::post(libraries::scan_library))
.route("/index/rebuild", axum::routing::post(index_jobs::enqueue_rebuild))
.route("/index/status", get(index_jobs::list_index_jobs))
.route("/index/jobs/active", get(index_jobs::get_active_jobs))
.route("/index/jobs/:id", get(index_jobs::get_job_details))
.route("/index/jobs/:id/stream", get(index_jobs::stream_job_progress))
.route("/index/jobs/:id/errors", get(index_jobs::get_job_errors))
.route("/index/cancel/:id", axum::routing::post(index_jobs::cancel_job))
.route("/folders", get(index_jobs::list_folders))
.route("/admin/tokens", get(tokens::list_tokens).post(tokens::create_token))

View File

@@ -11,11 +11,16 @@ use utoipa::OpenApi;
crate::search::search_books,
crate::index_jobs::enqueue_rebuild,
crate::index_jobs::list_index_jobs,
crate::index_jobs::get_active_jobs,
crate::index_jobs::get_job_details,
crate::index_jobs::stream_job_progress,
crate::index_jobs::get_job_errors,
crate::index_jobs::cancel_job,
crate::index_jobs::list_folders,
crate::libraries::list_libraries,
crate::libraries::create_library,
crate::libraries::delete_library,
crate::libraries::scan_library,
crate::tokens::list_tokens,
crate::tokens::create_token,
crate::tokens::revoke_token,
@@ -32,6 +37,9 @@ use utoipa::OpenApi;
crate::search::SearchResponse,
crate::index_jobs::RebuildRequest,
crate::index_jobs::IndexJobResponse,
crate::index_jobs::IndexJobDetailResponse,
crate::index_jobs::JobErrorResponse,
crate::index_jobs::ProgressEvent,
crate::index_jobs::FolderItem,
crate::libraries::LibraryResponse,
crate::libraries::CreateLibraryRequest,

View File

@@ -20,6 +20,15 @@ use uuid::Uuid;
use crate::{error::ApiError, AppState};
fn remap_libraries_path(path: &str) -> String {
if let Ok(root) = std::env::var("LIBRARIES_ROOT_PATH") {
if path.starts_with("/libraries/") {
return path.replacen("/libraries", &root, 1);
}
}
path.to_string()
}
#[derive(Deserialize, ToSchema)]
pub struct PageQuery {
#[schema(value_type = Option<String>, example = "webp")]
@@ -122,6 +131,8 @@ pub async fn get_page(
let row = row.ok_or_else(|| ApiError::not_found("book file not found"))?;
let abs_path: String = row.get("abs_path");
// Remap /libraries to LIBRARIES_ROOT_PATH for local development
let abs_path = remap_libraries_path(&abs_path);
let input_format: String = row.get("format");
let _permit = state