feat(indexing): Lot 4 - Progression temps reel, Full Rebuild, Optimisations
- Ajout migrations DB: index_job_errors, library_monitoring, full_rebuild_type - API: endpoints progression temps reel (/jobs/:id/stream), active jobs, details - API: support full_rebuild avec suppression donnees existantes - Indexer: logs detailles avec timing [SCAN][META][PARSER][BDD] - Indexer: optimisation parsing PDF (lopdf -> pdfinfo) 235x plus rapide - Indexer: corrections chemins LIBRARIES_ROOT_PATH pour dev local - Backoffice: composants JobProgress, JobsIndicator (header), JobsList - Backoffice: SSE streaming pour progression temps reel - Backoffice: boutons Index/Index Full sur page libraries - Backoffice: highlight job apres creation avec redirection - Fix: parsing volume type i32, sync meilisearch cleanup Perf: parsing PDF passe de 8.7s a 37ms Perf: indexation 45 fichiers en ~15s vs plusieurs minutes avant
This commit is contained in:
@@ -176,9 +176,30 @@ fn parse_cbr_page_count(path: &Path) -> Result<i32> {
|
||||
}
|
||||
|
||||
fn parse_pdf_page_count(path: &Path) -> Result<i32> {
|
||||
let doc = lopdf::Document::load(path)
|
||||
.with_context(|| format!("cannot open pdf: {}", path.display()))?;
|
||||
Ok(doc.get_pages().len() as i32)
|
||||
// Use pdfinfo command line tool instead of lopdf for better performance
|
||||
let output = std::process::Command::new("pdfinfo")
|
||||
.arg(path)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute pdfinfo for {}", path.display()))?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(anyhow::anyhow!("pdfinfo failed for {}", path.display()));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
for line in stdout.lines() {
|
||||
if line.starts_with("Pages:") {
|
||||
if let Some(pages_str) = line.split_whitespace().nth(1) {
|
||||
return pages_str
|
||||
.parse::<i32>()
|
||||
.with_context(|| format!("cannot parse page count: {}", pages_str));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow::anyhow!(
|
||||
"could not find page count in pdfinfo output"
|
||||
))
|
||||
}
|
||||
|
||||
fn is_image_name(name: &str) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user