feat: add external metadata sync system with multiple providers

Add a complete metadata synchronization system allowing users to search
and sync series/book metadata from external providers (Google Books,
Open Library, ComicVine, AniList, Bédéthèque). Each library can use a
different provider. Matching requires manual approval with detailed sync
reports showing what was updated or skipped (locked fields protection).

Key changes:
- DB migrations: external_metadata_links, external_book_metadata tables,
  library metadata_provider column, locked_fields, total_volumes, book
  metadata fields (summary, isbn, publish_date)
- Rust API: MetadataProvider trait + 5 provider implementations,
  7 metadata endpoints (search, match, approve, reject, links, missing,
  delete), sync report system, provider language preference support
- Backoffice: MetadataSearchModal, ProviderIcon, SafeHtml components,
  settings UI for provider/language config, enriched book detail page,
  edit forms with locked fields support, API proxy routes
- OpenAPI/Swagger documentation for all new endpoints and schemas

Closes #3

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-18 14:59:24 +01:00
parent a99bfb5a91
commit c9ccf5cd90
42 changed files with 5492 additions and 198 deletions

View File

@@ -0,0 +1,322 @@
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
pub struct AniListProvider;
impl MetadataProvider for AniListProvider {
fn name(&self) -> &str {
"anilist"
}
fn search_series(
&self,
query: &str,
config: &ProviderConfig,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
> {
let query = query.to_string();
let config = config.clone();
Box::pin(async move { search_series_impl(&query, &config).await })
}
fn get_series_books(
&self,
external_id: &str,
config: &ProviderConfig,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
> {
let external_id = external_id.to_string();
let config = config.clone();
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
}
}
const SEARCH_QUERY: &str = r#"
query ($search: String) {
Page(perPage: 20) {
media(search: $search, type: MANGA, sort: SEARCH_MATCH) {
id
title { romaji english native }
description(asHtml: false)
coverImage { large medium }
startDate { year }
volumes
chapters
staff { edges { node { name { full } } role } }
siteUrl
genres
}
}
}
"#;
const DETAIL_QUERY: &str = r#"
query ($id: Int) {
Media(id: $id, type: MANGA) {
id
title { romaji english native }
description(asHtml: false)
coverImage { large medium }
startDate { year }
volumes
chapters
staff { edges { node { name { full } } role } }
siteUrl
genres
}
}
"#;
async fn graphql_request(
client: &reqwest::Client,
query: &str,
variables: serde_json::Value,
) -> Result<serde_json::Value, String> {
let resp = client
.post("https://graphql.anilist.co")
.header("Content-Type", "application/json")
.json(&serde_json::json!({
"query": query,
"variables": variables,
}))
.send()
.await
.map_err(|e| format!("AniList request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(format!("AniList returned {status}: {text}"));
}
resp.json()
.await
.map_err(|e| format!("Failed to parse AniList response: {e}"))
}
async fn search_series_impl(
query: &str,
_config: &ProviderConfig,
) -> Result<Vec<SeriesCandidate>, String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(15))
.build()
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
let data = graphql_request(
&client,
SEARCH_QUERY,
serde_json::json!({ "search": query }),
)
.await?;
let media = match data
.get("data")
.and_then(|d| d.get("Page"))
.and_then(|p| p.get("media"))
.and_then(|m| m.as_array())
{
Some(media) => media,
None => return Ok(vec![]),
};
let query_lower = query.to_lowercase();
let mut candidates: Vec<SeriesCandidate> = media
.iter()
.filter_map(|m| {
let id = m.get("id").and_then(|id| id.as_i64())? as i64;
let title_obj = m.get("title")?;
let title = title_obj
.get("english")
.and_then(|t| t.as_str())
.or_else(|| title_obj.get("romaji").and_then(|t| t.as_str()))?
.to_string();
let description = m
.get("description")
.and_then(|d| d.as_str())
.map(|d| d.replace("\\n", "\n").trim().to_string())
.filter(|d| !d.is_empty());
let cover_url = m
.get("coverImage")
.and_then(|ci| ci.get("large").or_else(|| ci.get("medium")))
.and_then(|u| u.as_str())
.map(String::from);
let start_year = m
.get("startDate")
.and_then(|sd| sd.get("year"))
.and_then(|y| y.as_i64())
.map(|y| y as i32);
let volumes = m
.get("volumes")
.and_then(|v| v.as_i64())
.map(|v| v as i32);
let site_url = m
.get("siteUrl")
.and_then(|u| u.as_str())
.map(String::from);
let authors = extract_authors(m);
let confidence = compute_confidence(&title, &query_lower);
Some(SeriesCandidate {
external_id: id.to_string(),
title,
authors,
description,
publishers: vec![],
start_year,
total_volumes: volumes,
cover_url,
external_url: site_url,
confidence,
metadata_json: serde_json::json!({}),
})
})
.collect();
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
candidates.truncate(10);
Ok(candidates)
}
async fn get_series_books_impl(
external_id: &str,
_config: &ProviderConfig,
) -> Result<Vec<BookCandidate>, String> {
let id: i64 = external_id
.parse()
.map_err(|_| "invalid AniList ID".to_string())?;
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(15))
.build()
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
let data = graphql_request(
&client,
DETAIL_QUERY,
serde_json::json!({ "id": id }),
)
.await?;
let media = match data.get("data").and_then(|d| d.get("Media")) {
Some(m) => m,
None => return Ok(vec![]),
};
let title_obj = media.get("title").cloned().unwrap_or(serde_json::json!({}));
let title = title_obj
.get("english")
.and_then(|t| t.as_str())
.or_else(|| title_obj.get("romaji").and_then(|t| t.as_str()))
.unwrap_or("")
.to_string();
let volumes = media
.get("volumes")
.and_then(|v| v.as_i64())
.map(|v| v as i32);
let cover_url = media
.get("coverImage")
.and_then(|ci| ci.get("large").or_else(|| ci.get("medium")))
.and_then(|u| u.as_str())
.map(String::from);
let description = media
.get("description")
.and_then(|d| d.as_str())
.map(|d| d.replace("\\n", "\n").trim().to_string());
let authors = extract_authors(media);
// AniList doesn't have per-volume data — generate volume entries if volumes count is known
let mut books = Vec::new();
if let Some(total) = volumes {
for vol in 1..=total {
books.push(BookCandidate {
external_book_id: format!("{}-vol-{}", external_id, vol),
title: format!("{} Vol. {}", title, vol),
volume_number: Some(vol),
authors: authors.clone(),
isbn: None,
summary: if vol == 1 { description.clone() } else { None },
cover_url: if vol == 1 { cover_url.clone() } else { None },
page_count: None,
language: Some("ja".to_string()),
publish_date: None,
metadata_json: serde_json::json!({}),
});
}
} else {
// Single entry for the whole manga
books.push(BookCandidate {
external_book_id: external_id.to_string(),
title,
volume_number: Some(1),
authors,
isbn: None,
summary: description,
cover_url,
page_count: None,
language: Some("ja".to_string()),
publish_date: None,
metadata_json: serde_json::json!({}),
});
}
Ok(books)
}
fn extract_authors(media: &serde_json::Value) -> Vec<String> {
let mut authors = Vec::new();
if let Some(edges) = media
.get("staff")
.and_then(|s| s.get("edges"))
.and_then(|e| e.as_array())
{
for edge in edges {
let role = edge
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
let role_lower = role.to_lowercase();
if role_lower.contains("story") || role_lower.contains("art") || role_lower.contains("original") {
if let Some(name) = edge
.get("node")
.and_then(|n| n.get("name"))
.and_then(|n| n.get("full"))
.and_then(|f| f.as_str())
{
if !authors.contains(&name.to_string()) {
authors.push(name.to_string());
}
}
}
}
}
authors
}
fn compute_confidence(title: &str, query: &str) -> f32 {
let title_lower = title.to_lowercase();
if title_lower == query {
1.0
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
0.8
} else if title_lower.contains(query) || query.contains(&title_lower) {
0.7
} else {
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
let max_len = query.len().max(title_lower.len()).max(1);
(common as f32 / max_len as f32).clamp(0.1, 0.6)
}
}