fix(parsers,api,indexer,backoffice): corriger CBZ Unicode extra fields, centraliser extraction, nettoyer Meili, fixer header

- Parsers: raw ZIP reader (flate2) contournant la validation CRC32 des
  Unicode extra fields (0x7075) qui bloquait certains CBZ
- Parsers: nouvelle API publique extract_page() pour extraire une page
  par index depuis CBZ/CBR/PDF avec fallbacks automatiques
- API: suppression du code d'extraction dupliqué, délégation à parsers::extract_page()
- API: retrait des dépendances directes zip/unrar/pdfium-render/natord
- Indexer: nettoyage Meili systématique à chaque sync (au lieu de ~10%)
  avec pagination pour supporter les grosses collections — corrige les
  doublons dans la recherche
- Indexer: retrait de la dépendance rand (plus utilisée)
- Backoffice: popin jobs rendue via createPortal avec positionnement
  dynamique — corrige le débordement desktop et le header cassé en mobile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-13 13:26:14 +01:00
parent 5db2a7501b
commit 7cca7e40c2
8 changed files with 692 additions and 605 deletions

View File

@@ -15,6 +15,7 @@ futures = "0.3"
image.workspace = true
lru.workspace = true
stripstream-core = { path = "../../crates/core" }
parsers = { path = "../../crates/parsers" }
rand.workspace = true
tokio-stream = "0.1"
reqwest.workspace = true
@@ -28,10 +29,6 @@ tower-http = { version = "0.6", features = ["cors"] }
tracing.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true
natord.workspace = true
pdfium-render.workspace = true
unrar.workspace = true
zip = { version = "8", default-features = false, features = ["deflate"] }
utoipa.workspace = true
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
webp.workspace = true

View File

@@ -1,5 +1,5 @@
use std::{
io::{Read, Write},
io::Write,
path::{Path, PathBuf},
sync::{atomic::Ordering, Arc},
time::Duration,
@@ -351,241 +351,28 @@ fn render_page(
width: u32,
filter: image::imageops::FilterType,
) -> Result<Vec<u8>, ApiError> {
let page_bytes = match input_format {
"cbz" => extract_cbz_page(abs_path, page_number, true)?,
"cbr" => extract_cbr_page(abs_path, page_number, true)?,
"pdf" => render_pdf_page(abs_path, page_number, width)?,
let format = match input_format {
"cbz" => parsers::BookFormat::Cbz,
"cbr" => parsers::BookFormat::Cbr,
"pdf" => parsers::BookFormat::Pdf,
_ => return Err(ApiError::bad_request("unsupported source format")),
};
let pdf_render_width = if width > 0 { width } else { 1200 };
let page_bytes = parsers::extract_page(
std::path::Path::new(abs_path),
format,
page_number,
pdf_render_width,
)
.map_err(|e| {
error!("Failed to extract page {} from {}: {}", page_number, abs_path, e);
ApiError::internal(format!("page extraction failed: {e}"))
})?;
transcode_image(&page_bytes, out_format, quality, width, filter)
}
fn extract_cbz_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
debug!("Opening CBZ archive: {}", abs_path);
let file = std::fs::File::open(abs_path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ApiError::not_found("book file not accessible")
} else {
error!("Cannot open CBZ file {}: {}", abs_path, e);
ApiError::internal(format!("cannot open cbz: {e}"))
}
})?;
let mut archive = match zip::ZipArchive::new(file) {
Ok(a) => a,
Err(zip_err) => {
if allow_fallback {
// Try RAR fallback (file might be a RAR with .cbz extension)
if let Ok(data) = extract_cbr_page(abs_path, page_number, false) {
return Ok(data);
}
// Streaming fallback: read local file headers without central directory
warn!("CBZ central dir failed for {}, trying streaming: {}", abs_path, zip_err);
return extract_cbz_page_streaming(abs_path, page_number);
}
error!("Invalid CBZ archive {}: {}", abs_path, zip_err);
return Err(ApiError::internal(format!("invalid cbz: {zip_err}")));
}
};
let mut image_names: Vec<String> = Vec::new();
for i in 0..archive.len() {
let entry = match archive.by_index(i) {
Ok(e) => e,
Err(e) => {
warn!("Skipping corrupted CBZ entry {} in {}: {}", i, abs_path, e);
continue;
}
};
let name = entry.name().to_ascii_lowercase();
if is_image_name(&name) {
image_names.push(entry.name().to_string());
}
}
image_names.sort_by(|a, b| natord::compare(a, b));
debug!("Found {} images in CBZ {}", image_names.len(), abs_path);
let index = page_number as usize - 1;
let selected = image_names.get(index).ok_or_else(|| {
error!("Page {} out of range in {} (total: {})", page_number, abs_path, image_names.len());
ApiError::not_found("page out of range")
})?;
debug!("Extracting page {} ({}) from {}", page_number, selected, abs_path);
let mut entry = archive.by_name(selected).map_err(|e| {
error!("Failed to read CBZ page {} from {}: {}", selected, abs_path, e);
ApiError::internal(format!("cbz page read failed: {e}"))
})?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf).map_err(|e| {
error!("Failed to load CBZ page {} from {}: {}", selected, abs_path, e);
ApiError::internal(format!("cbz page load failed: {e}"))
})?;
Ok(buf)
}
fn extract_cbz_page_streaming(abs_path: &str, page_number: u32) -> Result<Vec<u8>, ApiError> {
let file = std::fs::File::open(abs_path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ApiError::not_found("book file not accessible")
} else {
ApiError::internal(format!("cannot open cbz: {e}"))
}
})?;
let mut reader = std::io::BufReader::new(file);
let mut image_names: Vec<String> = Vec::new();
loop {
match zip::read::read_zipfile_from_stream(&mut reader) {
Ok(Some(mut entry)) => {
let name = entry.name().to_string();
if is_image_name(&name.to_ascii_lowercase()) {
image_names.push(name);
}
std::io::copy(&mut entry, &mut std::io::sink())
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
}
Ok(None) => break,
Err(_) => {
if !image_names.is_empty() {
break;
}
return Err(ApiError::internal("cbz streaming read failed".to_string()));
}
}
}
image_names.sort_by(|a, b| natord::compare(a, b));
let target = image_names
.get(page_number as usize - 1)
.ok_or_else(|| ApiError::not_found("page out of range"))?
.clone();
// Second pass: extract the target page
let file2 = std::fs::File::open(abs_path)
.map_err(|e| ApiError::internal(format!("cannot reopen cbz: {e}")))?;
let mut reader2 = std::io::BufReader::new(file2);
loop {
match zip::read::read_zipfile_from_stream(&mut reader2) {
Ok(Some(mut entry)) => {
if entry.name() == target {
let mut buf = Vec::new();
entry
.read_to_end(&mut buf)
.map_err(|e| ApiError::internal(format!("cbz stream read: {e}")))?;
return Ok(buf);
}
std::io::copy(&mut entry, &mut std::io::sink())
.map_err(|e| ApiError::internal(format!("cbz stream skip: {e}")))?;
}
Ok(None) => break,
Err(_) => break,
}
}
Err(ApiError::not_found("page not found in archive"))
}
fn extract_cbr_page(abs_path: &str, page_number: u32, allow_fallback: bool) -> Result<Vec<u8>, ApiError> {
info!("Opening CBR archive: {}", abs_path);
let index = page_number as usize - 1;
// Pass 1: list all image names (in-process, no subprocess)
let mut image_names: Vec<String> = {
let archive = match unrar::Archive::new(abs_path).open_for_listing() {
Ok(a) => a,
Err(e) => {
if allow_fallback {
warn!("CBR open failed for {}, trying ZIP fallback: {}", abs_path, e);
return extract_cbz_page(abs_path, page_number, false);
}
return Err(ApiError::internal(format!("unrar listing failed: {}", e)));
}
};
let mut names = Vec::new();
for entry in archive {
let entry = entry.map_err(|e| ApiError::internal(format!("unrar entry error: {}", e)))?;
let name = entry.filename.to_string_lossy().to_string();
if is_image_name(&name.to_ascii_lowercase()) {
names.push(name);
}
}
names
};
image_names.sort_by(|a, b| natord::compare(a, b));
let target = image_names
.get(index)
.ok_or_else(|| {
error!("Page {} out of range (total: {})", page_number, image_names.len());
ApiError::not_found("page out of range")
})?
.clone();
// Pass 2: extract only the target page to memory
let mut archive = unrar::Archive::new(abs_path)
.open_for_processing()
.map_err(|e| ApiError::internal(format!("unrar processing failed: {}", e)))?;
while let Some(header) = archive
.read_header()
.map_err(|e| ApiError::internal(format!("unrar read header: {}", e)))?
{
let entry_name = header.entry().filename.to_string_lossy().to_string();
if entry_name == target {
let (data, _) = header
.read()
.map_err(|e| ApiError::internal(format!("unrar read: {}", e)))?;
info!("Extracted CBR page {} ({} bytes)", page_number, data.len());
return Ok(data);
}
archive = header
.skip()
.map_err(|e| ApiError::internal(format!("unrar skip: {}", e)))?;
}
Err(ApiError::not_found("page not found in archive"))
}
fn render_pdf_page(abs_path: &str, page_number: u32, width: u32) -> Result<Vec<u8>, ApiError> {
use pdfium_render::prelude::*;
debug!("Rendering PDF page {} of {} (width: {})", page_number, abs_path, width);
let pdfium = Pdfium::new(
Pdfium::bind_to_system_library()
.map_err(|e| ApiError::internal(format!("pdfium not available: {:?}", e)))?,
);
let document = pdfium
.load_pdf_from_file(abs_path, None)
.map_err(|e| ApiError::internal(format!("pdf load failed: {:?}", e)))?;
let page_index = (page_number - 1) as u16;
let page = document
.pages()
.get(page_index)
.map_err(|_| ApiError::not_found("page out of range"))?;
let render_width = if width > 0 { width as i32 } else { 1200 };
let config = PdfRenderConfig::new().set_target_width(render_width);
let bitmap = page
.render_with_config(&config)
.map_err(|e| ApiError::internal(format!("pdf render failed: {:?}", e)))?;
let image = bitmap.as_image();
let mut buf = std::io::Cursor::new(Vec::new());
image
.write_to(&mut buf, image::ImageFormat::Png)
.map_err(|e| ApiError::internal(format!("png encode failed: {}", e)))?;
debug!("Rendered PDF page {} ({} bytes)", page_number, buf.get_ref().len());
Ok(buf.into_inner())
}
fn transcode_image(input: &[u8], out_format: &OutputFormat, quality: u8, width: u32, filter: image::imageops::FilterType) -> Result<Vec<u8>, ApiError> {
debug!("Transcoding image: {} bytes, format: {:?}, quality: {}, width: {}", input.len(), out_format, quality, width);
@@ -650,20 +437,3 @@ fn format_matches(source: &ImageFormat, target: &OutputFormat) -> bool {
)
}
fn is_image_name(name: &str) -> bool {
let lower = name.to_lowercase();
lower.ends_with(".jpg")
|| lower.ends_with(".jpeg")
|| lower.ends_with(".png")
|| lower.ends_with(".webp")
|| lower.ends_with(".avif")
|| lower.ends_with(".gif")
|| lower.ends_with(".tif")
|| lower.ends_with(".tiff")
|| lower.ends_with(".bmp")
}
#[allow(dead_code)]
fn _is_absolute_path(value: &str) -> bool {
Path::new(value).is_absolute()
}

View File

@@ -1,8 +1,8 @@
"use client";
import { useEffect, useState, useRef } from "react";
import { useEffect, useState, useRef, useCallback } from "react";
import { createPortal } from "react-dom";
import Link from "next/link";
import { Button } from "./ui/Button";
import { Badge } from "./ui/Badge";
import { ProgressBar } from "./ui/ProgressBar";
@@ -46,7 +46,9 @@ const ChevronIcon = ({ className }: { className?: string }) => (
export function JobsIndicator() {
const [activeJobs, setActiveJobs] = useState<Job[]>([]);
const [isOpen, setIsOpen] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null);
const buttonRef = useRef<HTMLButtonElement>(null);
const popinRef = useRef<HTMLDivElement>(null);
const [popinStyle, setPopinStyle] = useState<React.CSSProperties>({});
useEffect(() => {
const fetchActiveJobs = async () => {
@@ -66,38 +68,87 @@ export function JobsIndicator() {
return () => clearInterval(interval);
}, []);
// Close dropdown when clicking outside
// Position the popin relative to the button
const updatePosition = useCallback(() => {
if (!buttonRef.current) return;
const rect = buttonRef.current.getBoundingClientRect();
const isMobile = window.innerWidth < 640;
if (isMobile) {
setPopinStyle({
position: "fixed",
top: `${rect.bottom + 8}px`,
left: "12px",
right: "12px",
});
} else {
// Align right edge of popin with right edge of button
const rightEdge = window.innerWidth - rect.right;
setPopinStyle({
position: "fixed",
top: `${rect.bottom + 8}px`,
right: `${Math.max(rightEdge, 12)}px`,
width: "384px", // w-96
});
}
}, []);
useEffect(() => {
if (!isOpen) return;
updatePosition();
window.addEventListener("resize", updatePosition);
window.addEventListener("scroll", updatePosition, true);
return () => {
window.removeEventListener("resize", updatePosition);
window.removeEventListener("scroll", updatePosition, true);
};
}, [isOpen, updatePosition]);
// Close when clicking outside
useEffect(() => {
if (!isOpen) return;
const handleClickOutside = (event: MouseEvent) => {
if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
const target = event.target as Node;
if (
buttonRef.current && !buttonRef.current.contains(target) &&
popinRef.current && !popinRef.current.contains(target)
) {
setIsOpen(false);
}
};
document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside);
}, []);
}, [isOpen]);
// Close on Escape
useEffect(() => {
if (!isOpen) return;
const handleEsc = (e: KeyboardEvent) => {
if (e.key === "Escape") setIsOpen(false);
};
document.addEventListener("keydown", handleEsc);
return () => document.removeEventListener("keydown", handleEsc);
}, [isOpen]);
const runningJobs = activeJobs.filter(j => j.status === "running" || j.status === "extracting_pages" || j.status === "generating_thumbnails");
const pendingJobs = activeJobs.filter(j => j.status === "pending");
const totalCount = activeJobs.length;
// Calculate overall progress
const totalProgress = runningJobs.reduce((acc, job) => {
return acc + (job.progress_percent || 0);
}, 0) / (runningJobs.length || 1);
if (totalCount === 0) {
return (
<Link
href="/jobs"
<Link
href="/jobs"
className="
flex items-center justify-center
w-9 h-9
rounded-md
text-muted-foreground
hover:text-foreground
hover:bg-accent
flex items-center justify-center
w-9 h-9
rounded-md
text-muted-foreground
hover:text-foreground
hover:bg-accent
transition-colors duration-200
"
title="View all jobs"
@@ -107,17 +158,143 @@ export function JobsIndicator() {
);
}
const popin = isOpen && (
<>
{/* Mobile backdrop */}
<div
className="fixed inset-0 z-[80] sm:hidden bg-background/60 backdrop-blur-sm"
onClick={() => setIsOpen(false)}
aria-hidden="true"
/>
{/* Popin */}
<div
ref={popinRef}
style={popinStyle}
className="
z-[90]
bg-popover/95 backdrop-blur-md
rounded-xl
shadow-elevation-2
border border-border/60
overflow-hidden
animate-fade-in
"
>
{/* Header */}
<div className="flex items-center justify-between px-4 py-3 border-b border-border/60 bg-muted/50">
<div className="flex items-center gap-3">
<span className="text-xl">📊</span>
<div>
<h3 className="font-semibold text-foreground">Active Jobs</h3>
<p className="text-xs text-muted-foreground">
{runningJobs.length > 0
? `${runningJobs.length} running, ${pendingJobs.length} pending`
: `${pendingJobs.length} job${pendingJobs.length !== 1 ? 's' : ''} pending`
}
</p>
</div>
</div>
<Link
href="/jobs"
className="text-sm font-medium text-primary hover:text-primary/80 transition-colors"
onClick={() => setIsOpen(false)}
>
View All
</Link>
</div>
{/* Overall progress bar if running */}
{runningJobs.length > 0 && (
<div className="px-4 py-3 border-b border-border/60">
<div className="flex items-center justify-between text-sm mb-2">
<span className="text-muted-foreground">Overall Progress</span>
<span className="font-semibold text-foreground">{Math.round(totalProgress)}%</span>
</div>
<ProgressBar value={totalProgress} size="sm" variant="success" />
</div>
)}
{/* Job List */}
<div className="max-h-80 overflow-y-auto scrollbar-hide">
{activeJobs.length === 0 ? (
<div className="flex flex-col items-center justify-center py-8 text-muted-foreground">
<span className="text-4xl mb-2"></span>
<p>No active jobs</p>
</div>
) : (
<ul className="divide-y divide-border/60">
{activeJobs.map(job => (
<li key={job.id}>
<Link
href={`/jobs/${job.id}`}
className="block px-4 py-3 hover:bg-accent/50 transition-colors duration-200"
onClick={() => setIsOpen(false)}
>
<div className="flex items-start gap-3">
<div className="mt-0.5">
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && <span className="animate-spin inline-block"></span>}
{job.status === "pending" && <span></span>}
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-1">
<code className="text-xs px-1.5 py-0.5 bg-muted rounded font-mono">{job.id.slice(0, 8)}</code>
<Badge variant={job.type === 'rebuild' ? 'primary' : job.type === 'thumbnail_regenerate' ? 'warning' : 'secondary'} className="text-[10px]">
{job.type === 'thumbnail_rebuild' ? 'Thumbnails' : job.type === 'thumbnail_regenerate' ? 'Regenerate' : job.type}
</Badge>
</div>
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && job.progress_percent != null && (
<div className="flex items-center gap-2 mt-2">
<MiniProgressBar value={job.progress_percent} />
<span className="text-xs font-medium text-muted-foreground">{job.progress_percent}%</span>
</div>
)}
{job.current_file && (
<p className="text-xs text-muted-foreground mt-1.5 truncate" title={job.current_file}>
📄 {job.current_file}
</p>
)}
{job.stats_json && (
<div className="flex items-center gap-3 mt-1.5 text-xs text-muted-foreground">
<span> {job.stats_json.indexed_files}</span>
{job.stats_json.errors > 0 && (
<span className="text-destructive"> {job.stats_json.errors}</span>
)}
</div>
)}
</div>
</div>
</Link>
</li>
))}
</ul>
)}
</div>
{/* Footer */}
<div className="px-4 py-2 border-t border-border/60 bg-muted/50">
<p className="text-xs text-muted-foreground text-center">Auto-refreshing every 2s</p>
</div>
</div>
</>
);
return (
<div className="relative" ref={dropdownRef}>
<button
<>
<button
ref={buttonRef}
className={`
flex items-center gap-2
px-3 py-2
rounded-md
font-medium text-sm
flex items-center gap-1.5
p-2 sm:px-3 sm:py-2
rounded-md
font-medium text-sm
transition-all duration-200
${runningJobs.length > 0
? 'bg-success/10 text-success hover:bg-success/20'
${runningJobs.length > 0
? 'bg-success/10 text-success hover:bg-success/20'
: 'bg-warning/10 text-warning hover:bg-warning/20'
}
${isOpen ? 'ring-2 ring-ring ring-offset-2 ring-offset-background' : ''}
@@ -131,146 +308,23 @@ export function JobsIndicator() {
<SpinnerIcon className="w-4 h-4" />
</div>
)}
{/* Icon */}
<JobsIcon className="w-4 h-4" />
{/* Badge with count */}
<span className="flex items-center justify-center min-w-5 h-5 px-1.5 text-xs font-bold bg-current rounded-full">
<span className="text-background">{totalCount > 99 ? "99+" : totalCount}</span>
</span>
{/* Chevron */}
<ChevronIcon
className={`w-4 h-4 transition-transform duration-200 ${isOpen ? 'rotate-180' : ''}`}
{/* Chevron - hidden on small screens */}
<ChevronIcon
className={`w-4 h-4 hidden sm:block transition-transform duration-200 ${isOpen ? 'rotate-180' : ''}`}
/>
</button>
{/* Backdrop mobile */}
{isOpen && (
<div
className="fixed inset-0 z-40 sm:hidden bg-background/60 backdrop-blur-sm"
onClick={() => setIsOpen(false)}
aria-hidden="true"
/>
)}
{/* Popin/Dropdown with glassmorphism */}
{isOpen && (
<div className="
fixed sm:absolute
inset-x-3 sm:inset-x-auto
top-[4.5rem] sm:top-full sm:mt-2
sm:w-96
bg-popover/95 backdrop-blur-md
rounded-xl
shadow-elevation-2
border border-border/60
overflow-hidden
z-50
animate-scale-in
">
{/* Header */}
<div className="flex items-center justify-between px-4 py-3 border-b border-border/60 bg-muted/50">
<div className="flex items-center gap-3">
<span className="text-xl">📊</span>
<div>
<h3 className="font-semibold text-foreground">Active Jobs</h3>
<p className="text-xs text-muted-foreground">
{runningJobs.length > 0
? `${runningJobs.length} running, ${pendingJobs.length} pending`
: `${pendingJobs.length} job${pendingJobs.length !== 1 ? 's' : ''} pending`
}
</p>
</div>
</div>
<Link
href="/jobs"
className="text-sm font-medium text-primary hover:text-primary/80 transition-colors"
onClick={() => setIsOpen(false)}
>
View All
</Link>
</div>
{/* Overall progress bar if running */}
{runningJobs.length > 0 && (
<div className="px-4 py-3 border-b border-border/60">
<div className="flex items-center justify-between text-sm mb-2">
<span className="text-muted-foreground">Overall Progress</span>
<span className="font-semibold text-foreground">{Math.round(totalProgress)}%</span>
</div>
<ProgressBar value={totalProgress} size="sm" variant="success" />
</div>
)}
{/* Job List */}
<div className="max-h-80 overflow-y-auto scrollbar-hide">
{activeJobs.length === 0 ? (
<div className="flex flex-col items-center justify-center py-8 text-muted-foreground">
<span className="text-4xl mb-2"></span>
<p>No active jobs</p>
</div>
) : (
<ul className="divide-y divide-border/60">
{activeJobs.map(job => (
<li key={job.id}>
<Link
href={`/jobs/${job.id}`}
className="block px-4 py-3 hover:bg-accent/50 transition-colors duration-200"
onClick={() => setIsOpen(false)}
>
<div className="flex items-start gap-3">
<div className="mt-0.5">
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && <span className="animate-spin inline-block"></span>}
{job.status === "pending" && <span></span>}
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-1">
<code className="text-xs px-1.5 py-0.5 bg-muted rounded font-mono">{job.id.slice(0, 8)}</code>
<Badge variant={job.type === 'rebuild' ? 'primary' : job.type === 'thumbnail_regenerate' ? 'warning' : 'secondary'} className="text-[10px]">
{job.type === 'thumbnail_rebuild' ? 'Thumbnails' : job.type === 'thumbnail_regenerate' ? 'Regenerate' : job.type}
</Badge>
</div>
{(job.status === "running" || job.status === "extracting_pages" || job.status === "generating_thumbnails") && job.progress_percent != null && (
<div className="flex items-center gap-2 mt-2">
<MiniProgressBar value={job.progress_percent} />
<span className="text-xs font-medium text-muted-foreground">{job.progress_percent}%</span>
</div>
)}
{job.current_file && (
<p className="text-xs text-muted-foreground mt-1.5 truncate" title={job.current_file}>
📄 {job.current_file}
</p>
)}
{job.stats_json && (
<div className="flex items-center gap-3 mt-1.5 text-xs text-muted-foreground">
<span> {job.stats_json.indexed_files}</span>
{job.stats_json.errors > 0 && (
<span className="text-destructive"> {job.stats_json.errors}</span>
)}
</div>
)}
</div>
</div>
</Link>
</li>
))}
</ul>
)}
</div>
{/* Footer */}
<div className="px-4 py-2 border-t border-border/60 bg-muted/50">
<p className="text-xs text-muted-foreground text-center">Auto-refreshing every 2s</p>
</div>
</div>
)}
</div>
{typeof document !== "undefined" && createPortal(popin, document.body)}
</>
);
}
@@ -278,7 +332,7 @@ export function JobsIndicator() {
function MiniProgressBar({ value }: { value: number }) {
return (
<div className="flex-1 h-1.5 bg-muted rounded-full overflow-hidden">
<div
<div
className="h-full bg-success rounded-full transition-all duration-300"
style={{ width: `${value}%` }}
/>

View File

@@ -15,7 +15,6 @@ image.workspace = true
notify = "8"
num_cpus.workspace = true
parsers = { path = "../../crates/parsers" }
rand.workspace = true
rayon.workspace = true
reqwest.workspace = true
serde.workspace = true

View File

@@ -117,59 +117,69 @@ pub async fn sync_meili(pool: &PgPool, meili_url: &str, meili_master_key: &str)
}
}
// Handle deletions: get all book IDs from DB and remove from MeiliSearch any that don't exist
// This is expensive, so we only do it periodically (every 10 syncs) or on full syncs
if is_full_sync || rand::random::<u8>() < 26 { // ~10% chance
info!("[MEILI] Checking for documents to delete");
// Get all book IDs from database
// Clean up stale documents: remove from Meilisearch any IDs that no longer exist in DB.
// Runs on every sync — the cost is minimal (single fetch of IDs only).
{
let db_ids: Vec<String> = sqlx::query_scalar("SELECT id::text FROM books")
.fetch_all(pool)
.await?;
// Get all document IDs from MeiliSearch (this requires fetching all documents)
// For efficiency, we'll just delete by query for documents that might be stale
// A better approach would be to track deletions in a separate table
// For now, we'll do a simple approach: fetch all Meili docs and compare
// Note: This could be slow for large collections
let meili_response = client
.post(format!("{base}/indexes/books/documents/fetch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({
"fields": ["id"],
"limit": 100000
}))
.send()
.await;
if let Ok(response) = meili_response {
if response.status().is_success() {
// Meilisearch returns { "results": [...], "offset": ..., "total": ... }
if let Ok(payload) = response.json::<serde_json::Value>().await {
let docs = payload.get("results")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let meili_ids: std::collections::HashSet<String> = docs
.into_iter()
.filter_map(|doc| doc.get("id").and_then(|id| id.as_str()).map(|s| s.to_string()))
.collect();
let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
if !to_delete.is_empty() {
info!("[MEILI] Deleting {} stale documents", to_delete.len());
let _ = client
.post(format!("{base}/indexes/books/documents/delete-batch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&to_delete)
.send()
.await;
}
// Fetch all document IDs from Meilisearch (paginated to handle large collections)
let mut meili_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut offset: usize = 0;
const PAGE_SIZE: usize = 10000;
loop {
let response = client
.post(format!("{base}/indexes/books/documents/fetch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({
"fields": ["id"],
"limit": PAGE_SIZE,
"offset": offset
}))
.send()
.await;
let response = match response {
Ok(r) if r.status().is_success() => r,
_ => break,
};
let payload: serde_json::Value = match response.json().await {
Ok(v) => v,
Err(_) => break,
};
let results = payload.get("results")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let page_count = results.len();
for doc in results {
if let Some(id) = doc.get("id").and_then(|v| v.as_str()) {
meili_ids.insert(id.to_string());
}
}
if page_count < PAGE_SIZE {
break; // Last page
}
offset += PAGE_SIZE;
}
let db_ids_set: std::collections::HashSet<String> = db_ids.into_iter().collect();
let to_delete: Vec<String> = meili_ids.difference(&db_ids_set).cloned().collect();
if !to_delete.is_empty() {
info!("[MEILI] Deleting {} stale documents", to_delete.len());
let _ = client
.post(format!("{base}/indexes/books/documents/delete-batch"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&to_delete)
.send()
.await;
}
}