refactor: replace Meilisearch with PostgreSQL full-text search

Remove Meilisearch dependency entirely. Search is now handled by
PostgreSQL ILIKE with pg_trgm indexes, joining series_metadata for
series-level authors. No external search engine needed.

- Replace search.rs Meilisearch HTTP calls with PostgreSQL queries
- Remove meili.rs from indexer, sync_meili call from job pipeline
- Remove MEILI_URL/MEILI_MASTER_KEY from config, state, env files
- Remove meilisearch service from docker-compose.yml
- Add migration 0027: drop sync_metadata, enable pg_trgm, add indexes
- Remove search resync button/endpoint (no longer needed)
- Update all documentation (CLAUDE.md, README.md, AGENTS.md, PLAN.md)

API contract unchanged — same SearchResponse shape returned.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-18 10:59:25 +01:00
parent 2985ef5561
commit 389d71b42f
20 changed files with 97 additions and 452 deletions

View File

@@ -68,8 +68,6 @@ async fn main() -> anyhow::Result<()> {
let state = AppState {
pool,
bootstrap_token: Arc::from(config.api_bootstrap_token),
meili_url: Arc::from(config.meili_url),
meili_master_key: Arc::from(config.meili_master_key),
page_cache: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(512).expect("non-zero")))),
page_render_limit: Arc::new(Semaphore::new(concurrent_renders)),
metrics: Arc::new(Metrics::new()),

View File

@@ -39,13 +39,13 @@ pub struct SearchResponse {
pub processing_time_ms: Option<u64>,
}
/// Search books across all libraries using Meilisearch
/// Search books across all libraries
#[utoipa::path(
get,
path = "/search",
tag = "books",
params(
("q" = String, Query, description = "Search query (books via Meilisearch + series via ILIKE)"),
("q" = String, Query, description = "Search query (books + series via PostgreSQL full-text)"),
("library_id" = Option<String>, Query, description = "Filter by library ID"),
("type" = Option<String>, Query, description = "Filter by type (cbz, cbr, pdf)"),
("kind" = Option<String>, Query, description = "Filter by kind (alias for type)"),
@@ -65,34 +65,38 @@ pub async fn search_books(
return Err(ApiError::bad_request("q is required"));
}
let mut filters: Vec<String> = Vec::new();
if let Some(library_id) = query.library_id.as_deref() {
filters.push(format!("library_id = '{}'", library_id.replace('"', "")));
}
let kind_filter = query.r#type.as_deref().or(query.kind.as_deref());
if let Some(kind) = kind_filter {
filters.push(format!("kind = '{}'", kind.replace('"', "")));
}
let body = serde_json::json!({
"q": query.q,
"limit": query.limit.unwrap_or(20).clamp(1, 100),
"filter": if filters.is_empty() { serde_json::Value::Null } else { serde_json::Value::String(filters.join(" AND ")) }
});
let limit_val = query.limit.unwrap_or(20).clamp(1, 100);
let limit_val = query.limit.unwrap_or(20).clamp(1, 100) as i64;
let q_pattern = format!("%{}%", query.q);
let library_id_uuid: Option<uuid::Uuid> = query.library_id.as_deref()
let library_id_uuid: Option<Uuid> = query.library_id.as_deref()
.and_then(|s| s.parse().ok());
let kind_filter: Option<&str> = query.r#type.as_deref().or(query.kind.as_deref());
// Recherche Meilisearch (books) + séries PG en parallèle
let client = reqwest::Client::new();
let url = format!("{}/indexes/books/search", state.meili_url.trim_end_matches('/'));
let meili_fut = client
.post(&url)
.header("Authorization", format!("Bearer {}", state.meili_master_key))
.json(&body)
.send();
let start = std::time::Instant::now();
// Book search via PostgreSQL ILIKE on title, authors, series
let books_sql = r#"
SELECT b.id, b.library_id, b.kind, b.title,
COALESCE(b.authors, CASE WHEN b.author IS NOT NULL AND b.author != '' THEN ARRAY[b.author] ELSE ARRAY[]::text[] END) as authors,
b.series, b.volume, b.language
FROM books b
LEFT JOIN series_metadata sm
ON sm.library_id = b.library_id
AND sm.name = COALESCE(NULLIF(b.series, ''), 'unclassified')
WHERE (
b.title ILIKE $1
OR b.series ILIKE $1
OR EXISTS (SELECT 1 FROM unnest(
COALESCE(b.authors, CASE WHEN b.author IS NOT NULL AND b.author != '' THEN ARRAY[b.author] ELSE ARRAY[]::text[] END)
|| COALESCE(sm.authors, ARRAY[]::text[])
) AS a WHERE a ILIKE $1)
)
AND ($2::uuid IS NULL OR b.library_id = $2)
AND ($3::text IS NULL OR b.kind = $3)
ORDER BY
CASE WHEN b.title ILIKE $1 THEN 0 ELSE 1 END,
b.title ASC
LIMIT $4
"#;
let series_sql = r#"
WITH sorted_books AS (
@@ -108,7 +112,7 @@ pub async fn search_books(
title ASC
) as rn
FROM books
WHERE ($1::uuid IS NULL OR library_id = $1)
WHERE ($2::uuid IS NULL OR library_id = $2)
),
series_counts AS (
SELECT
@@ -123,39 +127,49 @@ pub async fn search_books(
SELECT sc.library_id, sc.name, sc.book_count, sc.books_read_count, sb.id as first_book_id
FROM series_counts sc
JOIN sorted_books sb ON sb.library_id = sc.library_id AND sb.name = sc.name AND sb.rn = 1
WHERE sc.name ILIKE $2
WHERE sc.name ILIKE $1
ORDER BY sc.name ASC
LIMIT $3
LIMIT $4
"#;
let series_fut = sqlx::query(series_sql)
.bind(library_id_uuid)
.bind(&q_pattern)
.bind(limit_val as i64)
.fetch_all(&state.pool);
let (books_rows, series_rows) = tokio::join!(
sqlx::query(books_sql)
.bind(&q_pattern)
.bind(library_id_uuid)
.bind(kind_filter)
.bind(limit_val)
.fetch_all(&state.pool),
sqlx::query(series_sql)
.bind(&q_pattern)
.bind(library_id_uuid)
.bind(kind_filter) // unused in series query but keeps bind positions consistent
.bind(limit_val)
.fetch_all(&state.pool)
);
let (meili_resp, series_rows) = tokio::join!(meili_fut, series_fut);
let elapsed_ms = start.elapsed().as_millis() as u64;
// Traitement Meilisearch
let meili_resp = meili_resp.map_err(|e| ApiError::internal(format!("meili request failed: {e}")))?;
let (hits, estimated_total_hits, processing_time_ms) = if !meili_resp.status().is_success() {
let body = meili_resp.text().await.unwrap_or_default();
if body.contains("index_not_found") {
(serde_json::json!([]), Some(0u64), Some(0u64))
} else {
return Err(ApiError::internal(format!("meili error: {body}")));
}
} else {
let payload: serde_json::Value = meili_resp.json().await
.map_err(|e| ApiError::internal(format!("invalid meili response: {e}")))?;
(
payload.get("hits").cloned().unwrap_or_else(|| serde_json::json!([])),
payload.get("estimatedTotalHits").and_then(|v| v.as_u64()),
payload.get("processingTimeMs").and_then(|v| v.as_u64()),
)
};
// Build book hits as JSON array (same shape as before)
let books_rows = books_rows.map_err(|e| ApiError::internal(format!("book search failed: {e}")))?;
let hits: Vec<serde_json::Value> = books_rows
.iter()
.map(|row| {
serde_json::json!({
"id": row.get::<Uuid, _>("id").to_string(),
"library_id": row.get::<Uuid, _>("library_id").to_string(),
"kind": row.get::<String, _>("kind"),
"title": row.get::<String, _>("title"),
"authors": row.get::<Vec<String>, _>("authors"),
"series": row.get::<Option<String>, _>("series"),
"volume": row.get::<Option<i32>, _>("volume"),
"language": row.get::<Option<String>, _>("language"),
})
})
.collect();
// Traitement séries
let estimated_total_hits = hits.len() as u64;
// Series hits
let series_hits: Vec<SeriesHit> = series_rows
.unwrap_or_default()
.iter()
@@ -169,9 +183,9 @@ pub async fn search_books(
.collect();
Ok(Json(SearchResponse {
hits,
hits: serde_json::Value::Array(hits),
series_hits,
estimated_total_hits,
processing_time_ms,
estimated_total_hits: Some(estimated_total_hits),
processing_time_ms: Some(elapsed_ms),
}))
}

View File

@@ -42,7 +42,6 @@ pub fn settings_routes() -> Router<AppState> {
.route("/settings/cache/clear", post(clear_cache))
.route("/settings/cache/stats", get(get_cache_stats))
.route("/settings/thumbnail/stats", get(get_thumbnail_stats))
.route("/settings/search/resync", post(force_search_resync))
}
/// List all settings
@@ -325,27 +324,3 @@ pub async fn get_thumbnail_stats(State(_state): State<AppState>) -> Result<Json<
Ok(Json(stats))
}
/// Force a full Meilisearch resync by resetting the sync timestamp
#[utoipa::path(
post,
path = "/settings/search/resync",
tag = "settings",
responses(
(status = 200, description = "Resync scheduled"),
(status = 401, description = "Unauthorized"),
),
security(("Bearer" = []))
)]
pub async fn force_search_resync(
State(state): State<AppState>,
) -> Result<Json<Value>, ApiError> {
sqlx::query("UPDATE sync_metadata SET last_meili_sync = NULL WHERE id = 1")
.execute(&state.pool)
.await?;
Ok(Json(serde_json::json!({
"success": true,
"message": "Search resync scheduled. The indexer will perform a full sync on its next cycle."
})))
}

View File

@@ -12,8 +12,6 @@ use tokio::sync::{Mutex, RwLock, Semaphore};
pub struct AppState {
pub pool: sqlx::PgPool,
pub bootstrap_token: Arc<str>,
pub meili_url: Arc<str>,
pub meili_master_key: Arc<str>,
pub page_cache: Arc<Mutex<LruCache<String, Arc<Vec<u8>>>>>,
pub page_render_limit: Arc<Semaphore>,
pub metrics: Arc<Metrics>,

View File

@@ -1,11 +0,0 @@
import { NextResponse } from "next/server";
import { forceSearchResync } from "@/lib/api";
export async function POST() {
try {
const data = await forceSearchResync();
return NextResponse.json(data);
} catch (error) {
return NextResponse.json({ error: "Failed to trigger search resync" }, { status: 500 });
}
}

View File

@@ -21,9 +21,6 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
const [clearResult, setClearResult] = useState<ClearCacheResponse | null>(null);
const [isSaving, setIsSaving] = useState(false);
const [saveMessage, setSaveMessage] = useState<string | null>(null);
const [isResyncing, setIsResyncing] = useState(false);
const [resyncResult, setResyncResult] = useState<{ success: boolean; message: string } | null>(null);
// Komga sync state — URL and username are persisted in settings
const [komgaUrl, setKomgaUrl] = useState("");
const [komgaUsername, setKomgaUsername] = useState("");
@@ -89,20 +86,6 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
}
}
async function handleSearchResync() {
setIsResyncing(true);
setResyncResult(null);
try {
const response = await fetch("/api/settings/search/resync", { method: "POST" });
const result = await response.json();
setResyncResult(result);
} catch {
setResyncResult({ success: false, message: "Failed to trigger search resync" });
} finally {
setIsResyncing(false);
}
}
const fetchReports = useCallback(async () => {
try {
const resp = await fetch("/api/komga/reports");
@@ -365,43 +348,6 @@ export default function SettingsPage({ initialSettings, initialCacheStats, initi
</CardContent>
</Card>
{/* Search Index */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Icon name="search" size="md" />
Search Index
</CardTitle>
<CardDescription>Force a full resync of the Meilisearch index. This will re-index all books on the next indexer cycle.</CardDescription>
</CardHeader>
<CardContent>
<div className="space-y-4">
{resyncResult && (
<div className={`p-3 rounded-lg ${resyncResult.success ? 'bg-success/10 text-success' : 'bg-destructive/10 text-destructive'}`}>
{resyncResult.message}
</div>
)}
<Button
onClick={handleSearchResync}
disabled={isResyncing}
>
{isResyncing ? (
<>
<Icon name="spinner" size="sm" className="animate-spin -ml-1 mr-2" />
Scheduling...
</>
) : (
<>
<Icon name="refresh" size="sm" className="mr-2" />
Force Search Resync
</>
)}
</Button>
</div>
</CardContent>
</Card>
{/* Limits Settings */}
<Card className="mb-6">
<CardHeader>

View File

@@ -406,12 +406,6 @@ export async function getThumbnailStats() {
return apiFetch<ThumbnailStats>("/settings/thumbnail/stats");
}
export async function forceSearchResync() {
return apiFetch<{ success: boolean; message: string }>("/settings/search/resync", {
method: "POST",
});
}
export async function convertBook(bookId: string) {
return apiFetch<IndexJobDto>(`/books/${bookId}/convert`, { method: "POST" });
}

View File

@@ -7,7 +7,7 @@ Service background sur le port **7081**. Voir `AGENTS.md` racine pour les conven
| Fichier | Rôle |
|---------|------|
| `main.rs` | Point d'entrée, initialisation, lancement du worker |
| `lib.rs` | `AppState` (pool, meili_url, meili_master_key) |
| `lib.rs` | `AppState` (pool) |
| `worker.rs` | Boucle principale : claim job → process → cleanup stale |
| `job.rs` | `claim_next_job`, `process_job`, `fail_job`, `cleanup_stale_jobs` |
| `scanner.rs` | Phase 1 discovery : WalkDir + `parse_metadata_fast` (zéro I/O archive), skip dossiers inchangés via mtime, batching DB |
@@ -15,7 +15,6 @@ Service background sur le port **7081**. Voir `AGENTS.md` racine pour les conven
| `batch.rs` | `flush_all_batches` avec UNNEST, structures `BookInsert/Update/FileInsert/Update/ErrorInsert` |
| `scheduler.rs` | Auto-scan : vérifie toutes les 60s les bibliothèques à monitorer |
| `watcher.rs` | File watcher temps réel |
| `meili.rs` | Indexation/sync Meilisearch |
| `api.rs` | Endpoints HTTP de l'indexer (/health, /ready) |
| `utils.rs` | `remap_libraries_path`, `unmap_libraries_path`, `compute_fingerprint`, `kind_from_format` |
@@ -28,7 +27,6 @@ claim_next_job (UPDATE ... RETURNING, status pending→running)
│ ├─ WalkDir + parse_metadata_fast (zéro I/O archive)
│ ├─ skip dossiers via directory_mtimes (table DB)
│ └─ INSERT books (page_count=NULL) → livres visibles immédiatement
├─ meili::sync_meili
├─ analyzer::cleanup_orphaned_thumbnails (full_rebuild uniquement)
└─ Phase 2 : analyzer::analyze_library_books
├─ SELECT books WHERE page_count IS NULL

View File

@@ -3,7 +3,7 @@ use sqlx::{PgPool, Row};
use tracing::{error, info};
use uuid::Uuid;
use crate::{analyzer, converter, meili, scanner, AppState};
use crate::{analyzer, converter, scanner, AppState};
pub async fn cleanup_stale_jobs(pool: &PgPool) -> Result<()> {
let result = sqlx::query(
@@ -337,9 +337,6 @@ pub async fn process_job(
}
}
// Sync search index after discovery (books are visible immediately)
meili::sync_meili(&state.pool, &state.meili_url, &state.meili_master_key).await?;
// For full rebuild: clean up orphaned thumbnail files (old UUIDs)
if is_full_rebuild {
analyzer::cleanup_orphaned_thumbnails(state).await?;

View File

@@ -3,7 +3,6 @@ pub mod api;
pub mod batch;
pub mod converter;
pub mod job;
pub mod meili;
pub mod scheduler;
pub mod scanner;
pub mod utils;
@@ -15,6 +14,4 @@ use sqlx::PgPool;
#[derive(Clone)]
pub struct AppState {
pub pool: PgPool,
pub meili_url: String,
pub meili_master_key: String,
}

View File

@@ -30,11 +30,7 @@ async fn async_main() -> anyhow::Result<()> {
.connect(&config.database_url)
.await?;
let state = AppState {
pool,
meili_url: config.meili_url.clone(),
meili_master_key: config.meili_master_key.clone(),
};
let state = AppState { pool };
tokio::spawn(indexer::worker::run_worker(state.clone(), config.scan_interval_seconds));

View File

@@ -1,214 +0,0 @@
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use reqwest::Client;
use serde::Serialize;
use sqlx::{PgPool, Row};
use tracing::info;
use uuid::Uuid;
#[derive(Serialize)]
struct SearchDoc {
id: String,
library_id: String,
kind: String,
title: String,
authors: Vec<String>,
series: Option<String>,
volume: Option<i32>,
language: Option<String>,
}
pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str) -> Result<()> {
let client = Client::new();
let base = meili_url.trim_end_matches('/');
// Ensure index exists and has proper settings
let _ = client
.post(format!("{base}/indexes"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({"uid": "books", "primaryKey": "id"}))
.send()
.await;
let _ = client
.patch(format!("{base}/indexes/books/settings/filterable-attributes"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!(["library_id", "kind"]))
.send()
.await;
let _ = client
.put(format!("{base}/indexes/books/settings/searchable-attributes"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!(["title", "authors", "series"]))
.send()
.await;
// Get last sync timestamp
let last_sync: Option<DateTime<Utc>> = sqlx::query_scalar(
"SELECT last_meili_sync FROM sync_metadata WHERE id = 1 AND last_meili_sync IS NOT NULL"
)
.fetch_optional(pool)
.await?;
// If no previous sync, do a full sync
let is_full_sync = last_sync.is_none();
// Get books to sync: all if full sync, only modified since last sync otherwise.
// Join series_metadata to merge series-level authors into the search document.
let books_query = r#"
SELECT b.id, b.library_id, b.kind, b.title, b.series, b.volume, b.language, b.updated_at,
ARRAY(
SELECT DISTINCT unnest(
COALESCE(b.authors, CASE WHEN b.author IS NOT NULL AND b.author != '' THEN ARRAY[b.author] ELSE ARRAY[]::text[] END)
|| COALESCE(sm.authors, ARRAY[]::text[])
)
) as authors
FROM books b
LEFT JOIN series_metadata sm
ON sm.library_id = b.library_id
AND sm.name = COALESCE(NULLIF(b.series, ''), 'unclassified')
"#;
let rows = if is_full_sync {
info!("[MEILI] Performing full sync");
sqlx::query(books_query)
.fetch_all(pool)
.await?
} else {
let since = last_sync.unwrap();
info!("[MEILI] Performing incremental sync since {}", since);
// Include books that changed OR whose series_metadata changed
sqlx::query(&format!(
"{books_query} WHERE b.updated_at > $1 OR sm.updated_at > $1"
))
.bind(since)
.fetch_all(pool)
.await?
};
if rows.is_empty() && !is_full_sync {
info!("[MEILI] No changes to sync");
// Still update the timestamp
sqlx::query(
"INSERT INTO sync_metadata (id, last_meili_sync) VALUES (1, NOW()) ON CONFLICT (id) DO UPDATE SET last_meili_sync = NOW()"
)
.execute(pool)
.await?;
return Ok(());
}
let docs: Vec<SearchDoc> = rows
.into_iter()
.map(|row| SearchDoc {
id: row.get::<Uuid, _>("id").to_string(),
library_id: row.get::<Uuid, _>("library_id").to_string(),
kind: row.get("kind"),
title: row.get("title"),
authors: row.get::<Vec<String>, _>("authors"),
series: row.get("series"),
volume: row.get("volume"),
language: row.get("language"),
})
.collect();
let doc_count = docs.len();
// Send documents to MeiliSearch in batches of 1000
const MEILI_BATCH_SIZE: usize = 1000;
for (i, chunk) in docs.chunks(MEILI_BATCH_SIZE).enumerate() {
let batch_num = i + 1;
info!("[MEILI] Sending batch {}/{} ({} docs)", batch_num, doc_count.div_ceil(MEILI_BATCH_SIZE), chunk.len());
let response = client
.post(format!("{base}/indexes/books/documents"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&chunk)
.send()
.await
.context("failed to send docs to meili")?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(anyhow::anyhow!("MeiliSearch error {}: {}", status, body));
}
}
// Clean up stale documents: remove from Meilisearch any IDs that no longer exist in DB.
// Runs on every sync — the cost is minimal (single fetch of IDs only).
{
let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books")
.fetch_all(pool)
.await?;
// Fetch all document IDs from Meilisearch (paginated to handle large collections)
let mut meili_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut offset: usize = 0;
const PAGE_SIZE: usize = 10000;
loop {
let response = client
.post(format!("{base}/indexes/books/documents/fetch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({
"fields": ["id"],
"limit": PAGE_SIZE,
"offset": offset
}))
.send()
.await;
let response = match response {
Ok(r) if r.status().is_success() => r,
_ => break,
};
let payload: serde_json::Value = match response.json().await {
Ok(v) => v,
Err(_) => break,
};
let results = payload.get("results")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let page_count = results.len();
for doc in results {
if let Some(id) = doc.get("id").and_then(|v| v.as_str()) {
meili_ids.insert(id.to_string());
}
}
if page_count < PAGE_SIZE {
break; // Last page
}
offset += PAGE_SIZE;
}
let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
if !to_delete.is_empty() {
info!("[MEILI] Deleting {} stale documents", to_delete.len());
let _ = client
.post(format!("{base}/indexes/books/documents/delete-batch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&to_delete)
.send()
.await;
}
}
// Update last sync timestamp
sqlx::query(
"INSERT INTO sync_metadata (id, last_meili_sync) VALUES (1, NOW()) ON CONFLICT (id) DO UPDATE SET last_meili_sync = NOW()"
)
.execute(pool)
.await?;
info!("[MEILI] Sync completed: {} documents indexed", doc_count);
Ok(())
}