feat: add external metadata sync system with multiple providers
Add a complete metadata synchronization system allowing users to search and sync series/book metadata from external providers (Google Books, Open Library, ComicVine, AniList, Bédéthèque). Each library can use a different provider. Matching requires manual approval with detailed sync reports showing what was updated or skipped (locked fields protection). Key changes: - DB migrations: external_metadata_links, external_book_metadata tables, library metadata_provider column, locked_fields, total_volumes, book metadata fields (summary, isbn, publish_date) - Rust API: MetadataProvider trait + 5 provider implementations, 7 metadata endpoints (search, match, approve, reject, links, missing, delete), sync report system, provider language preference support - Backoffice: MetadataSearchModal, ProviderIcon, SafeHtml components, settings UI for provider/language config, enriched book detail page, edit forms with locked fields support, API proxy routes - OpenAPI/Swagger documentation for all new endpoints and schemas Closes #3 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
351
apps/api/src/metadata_providers/open_library.rs
Normal file
351
apps/api/src/metadata_providers/open_library.rs
Normal file
@@ -0,0 +1,351 @@
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct OpenLibraryProvider;
|
||||
|
||||
impl MetadataProvider for OpenLibraryProvider {
|
||||
fn name(&self) -> &str {
|
||||
"open_library"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
// Open Library uses 3-letter language codes
|
||||
let ol_lang = match config.language.as_str() {
|
||||
"fr" => "fre",
|
||||
"es" => "spa",
|
||||
_ => "eng",
|
||||
};
|
||||
|
||||
let url = format!(
|
||||
"https://openlibrary.org/search.json?title={}&limit=20&language={}",
|
||||
urlencoded(query),
|
||||
ol_lang,
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Open Library request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Open Library returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Open Library response: {e}"))?;
|
||||
|
||||
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
||||
Some(docs) => docs,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
let mut series_map: std::collections::HashMap<String, SeriesCandidateBuilder> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for doc in docs {
|
||||
let title = doc
|
||||
.get("title")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let authors: Vec<String> = doc
|
||||
.get("author_name")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
let publishers: Vec<String> = doc
|
||||
.get("publisher")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| {
|
||||
let mut pubs: Vec<String> = arr.iter().filter_map(|v| v.as_str().map(String::from)).collect();
|
||||
pubs.truncate(3);
|
||||
pubs
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let first_publish_year = doc
|
||||
.get("first_publish_year")
|
||||
.and_then(|y| y.as_i64())
|
||||
.map(|y| y as i32);
|
||||
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
||||
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
||||
let key = doc
|
||||
.get("key")
|
||||
.and_then(|k| k.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let series_name = extract_series_name(&title);
|
||||
|
||||
let entry = series_map
|
||||
.entry(series_name.clone())
|
||||
.or_insert_with(|| SeriesCandidateBuilder {
|
||||
title: series_name.clone(),
|
||||
authors: vec![],
|
||||
description: None,
|
||||
publishers: vec![],
|
||||
start_year: None,
|
||||
volume_count: 0,
|
||||
cover_url: None,
|
||||
external_id: key.clone(),
|
||||
external_url: if key.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(format!("https://openlibrary.org{}", key))
|
||||
},
|
||||
});
|
||||
|
||||
entry.volume_count += 1;
|
||||
|
||||
for a in &authors {
|
||||
if !entry.authors.contains(a) {
|
||||
entry.authors.push(a.clone());
|
||||
}
|
||||
}
|
||||
for p in &publishers {
|
||||
if !entry.publishers.contains(p) {
|
||||
entry.publishers.push(p.clone());
|
||||
}
|
||||
}
|
||||
if entry.start_year.is_none() || first_publish_year.map_or(false, |y| entry.start_year.unwrap() > y) {
|
||||
if first_publish_year.is_some() {
|
||||
entry.start_year = first_publish_year;
|
||||
}
|
||||
}
|
||||
if entry.cover_url.is_none() {
|
||||
entry.cover_url = cover_url;
|
||||
}
|
||||
}
|
||||
|
||||
let mut candidates: Vec<SeriesCandidate> = series_map
|
||||
.into_values()
|
||||
.map(|b| {
|
||||
let confidence = compute_confidence(&b.title, &query_lower);
|
||||
SeriesCandidate {
|
||||
external_id: b.external_id,
|
||||
title: b.title,
|
||||
authors: b.authors,
|
||||
description: b.description,
|
||||
publishers: b.publishers,
|
||||
start_year: b.start_year,
|
||||
total_volumes: if b.volume_count > 1 { Some(b.volume_count) } else { None },
|
||||
cover_url: b.cover_url,
|
||||
external_url: b.external_url,
|
||||
confidence,
|
||||
metadata_json: serde_json::json!({}),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.truncate(10);
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
// Fetch the work to get its title for series search
|
||||
let url = format!("https://openlibrary.org{}.json", external_id);
|
||||
let resp = client.get(&url).send().await.map_err(|e| format!("Open Library request failed: {e}"))?;
|
||||
|
||||
let work: serde_json::Value = if resp.status().is_success() {
|
||||
resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?
|
||||
} else {
|
||||
serde_json::json!({})
|
||||
};
|
||||
|
||||
let title = work.get("title").and_then(|t| t.as_str()).unwrap_or("");
|
||||
let series_name = extract_series_name(title);
|
||||
|
||||
// Search for editions of this series
|
||||
let search_url = format!(
|
||||
"https://openlibrary.org/search.json?title={}&limit=40",
|
||||
urlencoded(&series_name)
|
||||
);
|
||||
let resp = client.get(&search_url).send().await.map_err(|e| format!("Open Library search failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?;
|
||||
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
||||
Some(docs) => docs,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let mut books: Vec<BookCandidate> = docs
|
||||
.iter()
|
||||
.map(|doc| {
|
||||
let title = doc.get("title").and_then(|t| t.as_str()).unwrap_or("").to_string();
|
||||
let authors: Vec<String> = doc
|
||||
.get("author_name")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
let isbn = doc
|
||||
.get("isbn")
|
||||
.and_then(|a| a.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let page_count = doc
|
||||
.get("number_of_pages_median")
|
||||
.and_then(|n| n.as_i64())
|
||||
.map(|n| n as i32);
|
||||
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
||||
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
||||
let language = doc
|
||||
.get("language")
|
||||
.and_then(|a| a.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let publish_date = doc
|
||||
.get("first_publish_year")
|
||||
.and_then(|y| y.as_i64())
|
||||
.map(|y| y.to_string());
|
||||
let key = doc.get("key").and_then(|k| k.as_str()).unwrap_or("").to_string();
|
||||
let volume_number = extract_volume_number(&title);
|
||||
|
||||
BookCandidate {
|
||||
external_book_id: key,
|
||||
title,
|
||||
volume_number,
|
||||
authors,
|
||||
isbn,
|
||||
summary: None,
|
||||
cover_url,
|
||||
page_count,
|
||||
language,
|
||||
publish_date,
|
||||
metadata_json: serde_json::json!({}),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn extract_series_name(title: &str) -> String {
|
||||
let re_patterns = [
|
||||
r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"\s*\(\d+\)\s*$",
|
||||
r"\s+\d+\s*$",
|
||||
];
|
||||
let mut result = title.to_string();
|
||||
for pattern in &re_patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
let cleaned = re.replace(&result, "").to_string();
|
||||
if !cleaned.is_empty() {
|
||||
result = cleaned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
result.trim().to_string()
|
||||
}
|
||||
|
||||
fn extract_volume_number(title: &str) -> Option<i32> {
|
||||
let patterns = [
|
||||
r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)",
|
||||
r"\((\d+)\)\s*$",
|
||||
r"\b(\d+)\s*$",
|
||||
];
|
||||
for pattern in &patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
if let Some(caps) = re.captures(title) {
|
||||
if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::<i32>().ok()) {
|
||||
return Some(num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.8
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
for byte in s.bytes() {
|
||||
match byte {
|
||||
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
||||
result.push(byte as char);
|
||||
}
|
||||
_ => result.push_str(&format!("%{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
struct SeriesCandidateBuilder {
|
||||
title: String,
|
||||
authors: Vec<String>,
|
||||
description: Option<String>,
|
||||
publishers: Vec<String>,
|
||||
start_year: Option<i32>,
|
||||
volume_count: i32,
|
||||
cover_url: Option<String>,
|
||||
external_id: String,
|
||||
external_url: Option<String>,
|
||||
}
|
||||
Reference in New Issue
Block a user