fix(parsers,api,indexer,backoffice): corriger CBZ Unicode extra fields, centraliser extraction, nettoyer Meili, fixer header

- Parsers: raw ZIP reader (flate2) contournant la validation CRC32 des Unicode extra fields (0x7075) qui bloquait certains CBZ - Parsers: nouvelle API publique extract_page() pour extraire une page par index depuis CBZ/CBR/PDF avec fallbacks automatiques - API: suppression du code d'extraction dupliqué, délégation à parsers::extract_page() - API: retrait des dépendances directes zip/unrar/pdfium-render/natord - Indexer: nettoyage Meili systématique à chaque sync (au lieu de ~10%) avec pagination pour supporter les grosses collections — corrige les doublons dans la recherche - Indexer: retrait de la dépendance rand (plus utilisée) - Backoffice: popin jobs rendue via createPortal avec positionnement dynamique — corrige le débordement desktop et le header cassé en mobile Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 13:26:14 +01:00
parent 5db2a7501b
commit 7cca7e40c2
8 changed files with 692 additions and 605 deletions
--- a/apps/indexer/Cargo.toml
+++ b/apps/indexer/Cargo.toml
@@ -15,7 +15,6 @@ image.workspace = true
 notify = "8"
 num_cpus.workspace = true
 parsers = { path = "../../crates/parsers" }
-rand.workspace = true
 rayon.workspace = true
 reqwest.workspace = true
 serde.workspace = true
--- a/apps/indexer/src/meili.rs
+++ b/apps/indexer/src/meili.rs
@@ -117,59 +117,69 @@ pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str)
        }
    }

-    // Handle deletions: get all book IDs from DB and remove from MeiliSearch any that don't exist
-    // This is expensive, so we only do it periodically (every 10 syncs) or on full syncs
-    if is_full_sync || rand::random::<u8>() < 26 { // ~10% chance
-        info!("[MEILI] Checking for documents to delete");
-        
-        // Get all book IDs from database
+    // Clean up stale documents: remove from Meilisearch any IDs that no longer exist in DB.
+    // Runs on every sync — the cost is minimal (single fetch of IDs only).
+    {
        let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books")
            .fetch_all(pool)
            .await?;
-        
-        // Get all document IDs from MeiliSearch (this requires fetching all documents)
-        // For efficiency, we'll just delete by query for documents that might be stale
-        // A better approach would be to track deletions in a separate table
-        
-        // For now, we'll do a simple approach: fetch all Meili docs and compare
-        // Note: This could be slow for large collections
-        let meili_response = client
-            .post(format!("{base}/indexes/books/documents/fetch"))
-            .header("Authorization", format!("Bearer {meili_master_key}"))
-            .json(&serde_json::json!({
-                "fields": ["id"],
-                "limit": 100000
-            }))
-            .send()
-            .await;
-        
-        if let Ok(response) = meili_response {
-            if response.status().is_success() {
-                // Meilisearch returns { "results": [...], "offset": ..., "total": ... }
-                if let Ok(payload) = response.json::<serde_json::Value>().await {
-                    let docs = payload.get("results")
-                        .and_then(|v| v.as_array())
-                        .cloned()
-                        .unwrap_or_default();
-                    let meili_ids: std::collections::HashSet<String> = docs
-                        .into_iter()
-                        .filter_map(|doc| doc.get("id").and_then(|id| id.as_str()).map(|s| s.to_string()))
-                        .collect();
-                    
-                    let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
-                    let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
-                    
-                    if !to_delete.is_empty() {
-                        info!("[MEILI] Deleting {} stale documents", to_delete.len());
-                        let _ = client
-                            .post(format!("{base}/indexes/books/documents/delete-batch"))
-                            .header("Authorization", format!("Bearer {meili_master_key}"))
-                            .json(&to_delete)
-                            .send()
-                            .await;
-                    }
+
+        // Fetch all document IDs from Meilisearch (paginated to handle large collections)
+        let mut meili_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
+        let mut offset: usize = 0;
+        const PAGE_SIZE: usize = 10000;
+
+        loop {
+            let response = client
+                .post(format!("{base}/indexes/books/documents/fetch"))
+                .header("Authorization", format!("Bearer {meili_master_key}"))
+                .json(&serde_json::json!({
+                    "fields": ["id"],
+                    "limit": PAGE_SIZE,
+                    "offset": offset
+                }))
+                .send()
+                .await;
+
+            let response = match response {
+                Ok(r) if r.status().is_success() => r,
+                _ => break,
+            };
+
+            let payload: serde_json::Value = match response.json().await {
+                Ok(v) => v,
+                Err(_) => break,
+            };
+
+            let results = payload.get("results")
+                .and_then(|v| v.as_array())
+                .cloned()
+                .unwrap_or_default();
+
+            let page_count = results.len();
+            for doc in results {
+                if let Some(id) = doc.get("id").and_then(|v| v.as_str()) {
+                    meili_ids.insert(id.to_string());
                }
            }
+
+            if page_count < PAGE_SIZE {
+                break; // Last page
+            }
+            offset += PAGE_SIZE;
+        }
+
+        let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
+        let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
+
+        if !to_delete.is_empty() {
+            info!("[MEILI] Deleting {} stale documents", to_delete.len());
+            let _ = client
+                .post(format!("{base}/indexes/books/documents/delete-batch"))
+                .header("Authorization", format!("Bearer {meili_master_key}"))
+                .json(&to_delete)
+                .send()
+                .await;
        }
    }