Files
stripstream-librarian/apps/api/src/metadata_providers/google_books.rs
Froidefond Julien cfc98819ab
All checks were successful
Deploy with Docker Compose / deploy (push) Successful in 6s
feat: add configurable status mappings for metadata providers
Add a status_mappings table to replace hardcoded provider status
normalization. Users can now configure how provider statuses (e.g.
"releasing", "finie") map to target statuses (e.g. "ongoing", "ended")
via the Settings > Integrations page.

- Migration 0038: status_mappings table with pre-seeded mappings
- Migration 0039: re-normalize existing series_metadata.status values
- API: CRUD endpoints for status mappings, DB-based normalize function
- API: new GET /series/provider-statuses endpoint
- Backoffice: StatusMappingsCard component with create target, assign,
  and delete capabilities
- Fix all clippy warnings across the API crate
- Fix missing OpenAPI schema refs (MetadataStats, ProviderCount)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 12:44:22 +01:00

473 lines
14 KiB
Rust

use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
pub struct GoogleBooksProvider;
impl MetadataProvider for GoogleBooksProvider {
fn name(&self) -> &str {
"google_books"
}
fn search_series(
&self,
query: &str,
config: &ProviderConfig,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
> {
let query = query.to_string();
let config = config.clone();
Box::pin(async move { search_series_impl(&query, &config).await })
}
fn get_series_books(
&self,
external_id: &str,
config: &ProviderConfig,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
> {
let external_id = external_id.to_string();
let config = config.clone();
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
}
}
async fn search_series_impl(
query: &str,
config: &ProviderConfig,
) -> Result<Vec<SeriesCandidate>, String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(15))
.build()
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
let search_query = format!("intitle:{}", query);
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=20&printType=books&langRestrict={}",
urlencoded(&search_query),
urlencoded(&config.language),
);
if let Some(ref key) = config.api_key {
url.push_str(&format!("&key={}", key));
}
let resp = client
.get(&url)
.send()
.await
.map_err(|e| format!("Google Books request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(format!("Google Books returned {status}: {text}"));
}
let data: serde_json::Value = resp
.json()
.await
.map_err(|e| format!("Failed to parse Google Books response: {e}"))?;
let items = match data.get("items").and_then(|i| i.as_array()) {
Some(items) => items,
None => return Ok(vec![]),
};
// Group volumes by series name to produce series candidates
let query_lower = query.to_lowercase();
let mut series_map: std::collections::HashMap<String, SeriesCandidateBuilder> =
std::collections::HashMap::new();
for item in items {
let volume_info = match item.get("volumeInfo") {
Some(vi) => vi,
None => continue,
};
let title = volume_info
.get("title")
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string();
let authors: Vec<String> = volume_info
.get("authors")
.and_then(|a| a.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let publisher = volume_info
.get("publisher")
.and_then(|p| p.as_str())
.map(String::from);
let published_date = volume_info
.get("publishedDate")
.and_then(|d| d.as_str())
.map(String::from);
let description = volume_info
.get("description")
.and_then(|d| d.as_str())
.map(String::from);
// Extract series info from title or seriesInfo
let series_name = volume_info
.get("seriesInfo")
.and_then(|si| si.get("title"))
.and_then(|t| t.as_str())
.map(String::from)
.unwrap_or_else(|| extract_series_name(&title));
let cover_url = volume_info
.get("imageLinks")
.and_then(|il| {
il.get("thumbnail")
.or_else(|| il.get("smallThumbnail"))
})
.and_then(|u| u.as_str())
.map(|s| s.replace("http://", "https://"));
let google_id = item
.get("id")
.and_then(|id| id.as_str())
.unwrap_or("")
.to_string();
let entry = series_map
.entry(series_name.clone())
.or_insert_with(|| SeriesCandidateBuilder {
title: series_name.clone(),
authors: vec![],
description: None,
publishers: vec![],
start_year: None,
volume_count: 0,
cover_url: None,
external_id: google_id.clone(),
external_url: None,
metadata_json: serde_json::json!({}),
});
entry.volume_count += 1;
// Merge authors
for a in &authors {
if !entry.authors.contains(a) {
entry.authors.push(a.clone());
}
}
// Set description if not yet set
if entry.description.is_none() {
entry.description = description;
}
// Merge publisher
if let Some(ref pub_name) = publisher {
if !entry.publishers.contains(pub_name) {
entry.publishers.push(pub_name.clone());
}
}
// Extract year
if let Some(ref date) = published_date {
if let Some(year) = extract_year(date) {
if entry.start_year.is_none() || entry.start_year.unwrap() > year {
entry.start_year = Some(year);
}
}
}
if entry.cover_url.is_none() {
entry.cover_url = cover_url;
}
entry.external_url = Some(format!(
"https://books.google.com/books?id={}",
google_id
));
}
let mut candidates: Vec<SeriesCandidate> = series_map
.into_values()
.map(|b| {
let confidence = compute_confidence(&b.title, &query_lower);
SeriesCandidate {
external_id: b.external_id,
title: b.title,
authors: b.authors,
description: b.description,
publishers: b.publishers,
start_year: b.start_year,
total_volumes: if b.volume_count > 1 {
Some(b.volume_count)
} else {
None
},
cover_url: b.cover_url,
external_url: b.external_url,
confidence,
metadata_json: b.metadata_json,
}
})
.collect();
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
candidates.truncate(10);
Ok(candidates)
}
async fn get_series_books_impl(
external_id: &str,
config: &ProviderConfig,
) -> Result<Vec<BookCandidate>, String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(15))
.build()
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
// First fetch the volume to get its series info
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes/{}",
external_id
);
if let Some(ref key) = config.api_key {
url.push_str(&format!("?key={}", key));
}
let resp = client
.get(&url)
.send()
.await
.map_err(|e| format!("Google Books request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(format!("Google Books returned {status}: {text}"));
}
let volume: serde_json::Value = resp
.json()
.await
.map_err(|e| format!("Failed to parse Google Books response: {e}"))?;
let volume_info = volume.get("volumeInfo").cloned().unwrap_or(serde_json::json!({}));
let title = volume_info
.get("title")
.and_then(|t| t.as_str())
.unwrap_or("");
// Search for more volumes in this series
let series_name = extract_series_name(title);
let search_query = format!("intitle:{}", series_name);
let mut search_url = format!(
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=40&printType=books&langRestrict={}",
urlencoded(&search_query),
urlencoded(&config.language),
);
if let Some(ref key) = config.api_key {
search_url.push_str(&format!("&key={}", key));
}
let resp = client
.get(&search_url)
.send()
.await
.map_err(|e| format!("Google Books search failed: {e}"))?;
if !resp.status().is_success() {
// Return just the single volume as a book
return Ok(vec![volume_to_book_candidate(&volume)]);
}
let data: serde_json::Value = resp
.json()
.await
.map_err(|e| format!("Failed to parse search response: {e}"))?;
let items = match data.get("items").and_then(|i| i.as_array()) {
Some(items) => items,
None => return Ok(vec![volume_to_book_candidate(&volume)]),
};
let mut books: Vec<BookCandidate> = items
.iter()
.map(volume_to_book_candidate)
.collect();
// Sort by volume number
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
Ok(books)
}
fn volume_to_book_candidate(item: &serde_json::Value) -> BookCandidate {
let volume_info = item.get("volumeInfo").cloned().unwrap_or(serde_json::json!({}));
let title = volume_info
.get("title")
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string();
let authors: Vec<String> = volume_info
.get("authors")
.and_then(|a| a.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let isbn = volume_info
.get("industryIdentifiers")
.and_then(|ids| ids.as_array())
.and_then(|arr| {
arr.iter()
.find(|id| {
id.get("type")
.and_then(|t| t.as_str())
.map(|t| t == "ISBN_13" || t == "ISBN_10")
.unwrap_or(false)
})
.and_then(|id| id.get("identifier").and_then(|i| i.as_str()))
})
.map(String::from);
let summary = volume_info
.get("description")
.and_then(|d| d.as_str())
.map(String::from);
let cover_url = volume_info
.get("imageLinks")
.and_then(|il| il.get("thumbnail").or_else(|| il.get("smallThumbnail")))
.and_then(|u| u.as_str())
.map(|s| s.replace("http://", "https://"));
let page_count = volume_info
.get("pageCount")
.and_then(|p| p.as_i64())
.map(|p| p as i32);
let language = volume_info
.get("language")
.and_then(|l| l.as_str())
.map(String::from);
let publish_date = volume_info
.get("publishedDate")
.and_then(|d| d.as_str())
.map(String::from);
let google_id = item
.get("id")
.and_then(|id| id.as_str())
.unwrap_or("")
.to_string();
let volume_number = extract_volume_number(&title);
BookCandidate {
external_book_id: google_id,
title,
volume_number,
authors,
isbn,
summary,
cover_url,
page_count,
language,
publish_date,
metadata_json: serde_json::json!({}),
}
}
fn extract_series_name(title: &str) -> String {
// Remove trailing volume indicators like "Vol. 1", "Tome 2", "#3", "- Volume 1"
let re_patterns = [
r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
r"\s*\(\d+\)\s*$",
r"\s+\d+\s*$",
];
let mut result = title.to_string();
for pattern in &re_patterns {
if let Ok(re) = regex::Regex::new(pattern) {
let cleaned = re.replace(&result, "").to_string();
if !cleaned.is_empty() {
result = cleaned;
break;
}
}
}
result.trim().to_string()
}
fn extract_volume_number(title: &str) -> Option<i32> {
let patterns = [
r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)",
r"\((\d+)\)\s*$",
r"\b(\d+)\s*$",
];
for pattern in &patterns {
if let Ok(re) = regex::Regex::new(pattern) {
if let Some(caps) = re.captures(title) {
if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::<i32>().ok()) {
return Some(num);
}
}
}
}
None
}
fn extract_year(date: &str) -> Option<i32> {
date.get(..4).and_then(|s| s.parse::<i32>().ok())
}
fn compute_confidence(title: &str, query: &str) -> f32 {
let title_lower = title.to_lowercase();
if title_lower == query {
1.0
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
0.8
} else if title_lower.contains(query) || query.contains(&title_lower) {
0.7
} else {
// Simple character overlap ratio
let common: usize = query
.chars()
.filter(|c| title_lower.contains(*c))
.count();
let max_len = query.len().max(title_lower.len()).max(1);
(common as f32 / max_len as f32).clamp(0.1, 0.6)
}
}
fn urlencoded(s: &str) -> String {
let mut result = String::new();
for byte in s.bytes() {
match byte {
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
result.push(byte as char);
}
_ => {
result.push_str(&format!("%{:02X}", byte));
}
}
}
result
}
struct SeriesCandidateBuilder {
title: String,
authors: Vec<String>,
description: Option<String>,
publishers: Vec<String>,
start_year: Option<i32>,
volume_count: i32,
cover_url: Option<String>,
external_id: String,
external_url: Option<String>,
metadata_json: serde_json::Value,
}