All checks were successful
Deploy with Docker Compose / deploy (push) Successful in 6s
Add a status_mappings table to replace hardcoded provider status normalization. Users can now configure how provider statuses (e.g. "releasing", "finie") map to target statuses (e.g. "ongoing", "ended") via the Settings > Integrations page. - Migration 0038: status_mappings table with pre-seeded mappings - Migration 0039: re-normalize existing series_metadata.status values - API: CRUD endpoints for status mappings, DB-based normalize function - API: new GET /series/provider-statuses endpoint - Backoffice: StatusMappingsCard component with create target, assign, and delete capabilities - Fix all clippy warnings across the API crate - Fix missing OpenAPI schema refs (MetadataStats, ProviderCount) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
352 lines
11 KiB
Rust
352 lines
11 KiB
Rust
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
|
|
|
pub struct OpenLibraryProvider;
|
|
|
|
impl MetadataProvider for OpenLibraryProvider {
|
|
fn name(&self) -> &str {
|
|
"open_library"
|
|
}
|
|
|
|
fn search_series(
|
|
&self,
|
|
query: &str,
|
|
config: &ProviderConfig,
|
|
) -> std::pin::Pin<
|
|
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
|
> {
|
|
let query = query.to_string();
|
|
let config = config.clone();
|
|
Box::pin(async move { search_series_impl(&query, &config).await })
|
|
}
|
|
|
|
fn get_series_books(
|
|
&self,
|
|
external_id: &str,
|
|
config: &ProviderConfig,
|
|
) -> std::pin::Pin<
|
|
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
|
> {
|
|
let external_id = external_id.to_string();
|
|
let config = config.clone();
|
|
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
|
}
|
|
}
|
|
|
|
async fn search_series_impl(
|
|
query: &str,
|
|
config: &ProviderConfig,
|
|
) -> Result<Vec<SeriesCandidate>, String> {
|
|
let client = reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(15))
|
|
.build()
|
|
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
|
|
|
// Open Library uses 3-letter language codes
|
|
let ol_lang = match config.language.as_str() {
|
|
"fr" => "fre",
|
|
"es" => "spa",
|
|
_ => "eng",
|
|
};
|
|
|
|
let url = format!(
|
|
"https://openlibrary.org/search.json?title={}&limit=20&language={}",
|
|
urlencoded(query),
|
|
ol_lang,
|
|
);
|
|
|
|
let resp = client
|
|
.get(&url)
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Open Library request failed: {e}"))?;
|
|
|
|
if !resp.status().is_success() {
|
|
let status = resp.status();
|
|
let text = resp.text().await.unwrap_or_default();
|
|
return Err(format!("Open Library returned {status}: {text}"));
|
|
}
|
|
|
|
let data: serde_json::Value = resp
|
|
.json()
|
|
.await
|
|
.map_err(|e| format!("Failed to parse Open Library response: {e}"))?;
|
|
|
|
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
|
Some(docs) => docs,
|
|
None => return Ok(vec![]),
|
|
};
|
|
|
|
let query_lower = query.to_lowercase();
|
|
let mut series_map: std::collections::HashMap<String, SeriesCandidateBuilder> =
|
|
std::collections::HashMap::new();
|
|
|
|
for doc in docs {
|
|
let title = doc
|
|
.get("title")
|
|
.and_then(|t| t.as_str())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
let authors: Vec<String> = doc
|
|
.get("author_name")
|
|
.and_then(|a| a.as_array())
|
|
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
|
.unwrap_or_default();
|
|
let publishers: Vec<String> = doc
|
|
.get("publisher")
|
|
.and_then(|a| a.as_array())
|
|
.map(|arr| {
|
|
let mut pubs: Vec<String> = arr.iter().filter_map(|v| v.as_str().map(String::from)).collect();
|
|
pubs.truncate(3);
|
|
pubs
|
|
})
|
|
.unwrap_or_default();
|
|
let first_publish_year = doc
|
|
.get("first_publish_year")
|
|
.and_then(|y| y.as_i64())
|
|
.map(|y| y as i32);
|
|
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
|
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
|
let key = doc
|
|
.get("key")
|
|
.and_then(|k| k.as_str())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
|
|
let series_name = extract_series_name(&title);
|
|
|
|
let entry = series_map
|
|
.entry(series_name.clone())
|
|
.or_insert_with(|| SeriesCandidateBuilder {
|
|
title: series_name.clone(),
|
|
authors: vec![],
|
|
description: None,
|
|
publishers: vec![],
|
|
start_year: None,
|
|
volume_count: 0,
|
|
cover_url: None,
|
|
external_id: key.clone(),
|
|
external_url: if key.is_empty() {
|
|
None
|
|
} else {
|
|
Some(format!("https://openlibrary.org{}", key))
|
|
},
|
|
});
|
|
|
|
entry.volume_count += 1;
|
|
|
|
for a in &authors {
|
|
if !entry.authors.contains(a) {
|
|
entry.authors.push(a.clone());
|
|
}
|
|
}
|
|
for p in &publishers {
|
|
if !entry.publishers.contains(p) {
|
|
entry.publishers.push(p.clone());
|
|
}
|
|
}
|
|
if (entry.start_year.is_none() || first_publish_year.is_some_and(|y| entry.start_year.unwrap() > y))
|
|
&& first_publish_year.is_some()
|
|
{
|
|
entry.start_year = first_publish_year;
|
|
}
|
|
if entry.cover_url.is_none() {
|
|
entry.cover_url = cover_url;
|
|
}
|
|
}
|
|
|
|
let mut candidates: Vec<SeriesCandidate> = series_map
|
|
.into_values()
|
|
.map(|b| {
|
|
let confidence = compute_confidence(&b.title, &query_lower);
|
|
SeriesCandidate {
|
|
external_id: b.external_id,
|
|
title: b.title,
|
|
authors: b.authors,
|
|
description: b.description,
|
|
publishers: b.publishers,
|
|
start_year: b.start_year,
|
|
total_volumes: if b.volume_count > 1 { Some(b.volume_count) } else { None },
|
|
cover_url: b.cover_url,
|
|
external_url: b.external_url,
|
|
confidence,
|
|
metadata_json: serde_json::json!({}),
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
|
candidates.truncate(10);
|
|
Ok(candidates)
|
|
}
|
|
|
|
async fn get_series_books_impl(
|
|
external_id: &str,
|
|
_config: &ProviderConfig,
|
|
) -> Result<Vec<BookCandidate>, String> {
|
|
let client = reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(15))
|
|
.build()
|
|
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
|
|
|
// Fetch the work to get its title for series search
|
|
let url = format!("https://openlibrary.org{}.json", external_id);
|
|
let resp = client.get(&url).send().await.map_err(|e| format!("Open Library request failed: {e}"))?;
|
|
|
|
let work: serde_json::Value = if resp.status().is_success() {
|
|
resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?
|
|
} else {
|
|
serde_json::json!({})
|
|
};
|
|
|
|
let title = work.get("title").and_then(|t| t.as_str()).unwrap_or("");
|
|
let series_name = extract_series_name(title);
|
|
|
|
// Search for editions of this series
|
|
let search_url = format!(
|
|
"https://openlibrary.org/search.json?title={}&limit=40",
|
|
urlencoded(&series_name)
|
|
);
|
|
let resp = client.get(&search_url).send().await.map_err(|e| format!("Open Library search failed: {e}"))?;
|
|
|
|
if !resp.status().is_success() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let data: serde_json::Value = resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?;
|
|
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
|
Some(docs) => docs,
|
|
None => return Ok(vec![]),
|
|
};
|
|
|
|
let mut books: Vec<BookCandidate> = docs
|
|
.iter()
|
|
.map(|doc| {
|
|
let title = doc.get("title").and_then(|t| t.as_str()).unwrap_or("").to_string();
|
|
let authors: Vec<String> = doc
|
|
.get("author_name")
|
|
.and_then(|a| a.as_array())
|
|
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
|
.unwrap_or_default();
|
|
let isbn = doc
|
|
.get("isbn")
|
|
.and_then(|a| a.as_array())
|
|
.and_then(|arr| arr.first())
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from);
|
|
let page_count = doc
|
|
.get("number_of_pages_median")
|
|
.and_then(|n| n.as_i64())
|
|
.map(|n| n as i32);
|
|
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
|
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
|
let language = doc
|
|
.get("language")
|
|
.and_then(|a| a.as_array())
|
|
.and_then(|arr| arr.first())
|
|
.and_then(|v| v.as_str())
|
|
.map(String::from);
|
|
let publish_date = doc
|
|
.get("first_publish_year")
|
|
.and_then(|y| y.as_i64())
|
|
.map(|y| y.to_string());
|
|
let key = doc.get("key").and_then(|k| k.as_str()).unwrap_or("").to_string();
|
|
let volume_number = extract_volume_number(&title);
|
|
|
|
BookCandidate {
|
|
external_book_id: key,
|
|
title,
|
|
volume_number,
|
|
authors,
|
|
isbn,
|
|
summary: None,
|
|
cover_url,
|
|
page_count,
|
|
language,
|
|
publish_date,
|
|
metadata_json: serde_json::json!({}),
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
|
|
Ok(books)
|
|
}
|
|
|
|
fn extract_series_name(title: &str) -> String {
|
|
let re_patterns = [
|
|
r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
|
r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
|
r"\s*\(\d+\)\s*$",
|
|
r"\s+\d+\s*$",
|
|
];
|
|
let mut result = title.to_string();
|
|
for pattern in &re_patterns {
|
|
if let Ok(re) = regex::Regex::new(pattern) {
|
|
let cleaned = re.replace(&result, "").to_string();
|
|
if !cleaned.is_empty() {
|
|
result = cleaned;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
result.trim().to_string()
|
|
}
|
|
|
|
fn extract_volume_number(title: &str) -> Option<i32> {
|
|
let patterns = [
|
|
r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)",
|
|
r"\((\d+)\)\s*$",
|
|
r"\b(\d+)\s*$",
|
|
];
|
|
for pattern in &patterns {
|
|
if let Ok(re) = regex::Regex::new(pattern) {
|
|
if let Some(caps) = re.captures(title) {
|
|
if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::<i32>().ok()) {
|
|
return Some(num);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
fn compute_confidence(title: &str, query: &str) -> f32 {
|
|
let title_lower = title.to_lowercase();
|
|
if title_lower == query {
|
|
1.0
|
|
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
|
0.8
|
|
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
|
0.7
|
|
} else {
|
|
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
|
|
let max_len = query.len().max(title_lower.len()).max(1);
|
|
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
|
}
|
|
}
|
|
|
|
fn urlencoded(s: &str) -> String {
|
|
let mut result = String::new();
|
|
for byte in s.bytes() {
|
|
match byte {
|
|
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
|
result.push(byte as char);
|
|
}
|
|
_ => result.push_str(&format!("%{:02X}", byte)),
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
struct SeriesCandidateBuilder {
|
|
title: String,
|
|
authors: Vec<String>,
|
|
description: Option<String>,
|
|
publishers: Vec<String>,
|
|
start_year: Option<i32>,
|
|
volume_count: i32,
|
|
cover_url: Option<String>,
|
|
external_id: String,
|
|
external_url: Option<String>,
|
|
}
|