use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; pub struct GoogleBooksProvider; impl MetadataProvider for GoogleBooksProvider { fn name(&self) -> &str { "google_books" } fn search_series( &self, query: &str, config: &ProviderConfig, ) -> std::pin::Pin< Box, String>> + Send + '_>, > { let query = query.to_string(); let config = config.clone(); Box::pin(async move { search_series_impl(&query, &config).await }) } fn get_series_books( &self, external_id: &str, config: &ProviderConfig, ) -> std::pin::Pin< Box, String>> + Send + '_>, > { let external_id = external_id.to_string(); let config = config.clone(); Box::pin(async move { get_series_books_impl(&external_id, &config).await }) } } async fn search_series_impl( query: &str, config: &ProviderConfig, ) -> Result, String> { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(15)) .build() .map_err(|e| format!("failed to build HTTP client: {e}"))?; let search_query = format!("intitle:{}", query); let mut url = format!( "https://www.googleapis.com/books/v1/volumes?q={}&maxResults=20&printType=books&langRestrict={}", urlencoded(&search_query), urlencoded(&config.language), ); if let Some(ref key) = config.api_key { url.push_str(&format!("&key={}", key)); } let resp = client .get(&url) .send() .await .map_err(|e| format!("Google Books request failed: {e}"))?; if !resp.status().is_success() { let status = resp.status(); let text = resp.text().await.unwrap_or_default(); return Err(format!("Google Books returned {status}: {text}")); } let data: serde_json::Value = resp .json() .await .map_err(|e| format!("Failed to parse Google Books response: {e}"))?; let items = match data.get("items").and_then(|i| i.as_array()) { Some(items) => items, None => return Ok(vec![]), }; // Group volumes by series name to produce series candidates let query_lower = query.to_lowercase(); let mut series_map: std::collections::HashMap = std::collections::HashMap::new(); for item in items { let volume_info = match item.get("volumeInfo") { Some(vi) => vi, None => continue, }; let title = volume_info .get("title") .and_then(|t| t.as_str()) .unwrap_or("") .to_string(); let authors: Vec = volume_info .get("authors") .and_then(|a| a.as_array()) .map(|arr| { arr.iter() .filter_map(|v| v.as_str().map(String::from)) .collect() }) .unwrap_or_default(); let publisher = volume_info .get("publisher") .and_then(|p| p.as_str()) .map(String::from); let published_date = volume_info .get("publishedDate") .and_then(|d| d.as_str()) .map(String::from); let description = volume_info .get("description") .and_then(|d| d.as_str()) .map(String::from); // Extract series info from title or seriesInfo let series_name = volume_info .get("seriesInfo") .and_then(|si| si.get("title")) .and_then(|t| t.as_str()) .map(String::from) .unwrap_or_else(|| extract_series_name(&title)); let cover_url = volume_info .get("imageLinks") .and_then(|il| { il.get("thumbnail") .or_else(|| il.get("smallThumbnail")) }) .and_then(|u| u.as_str()) .map(|s| s.replace("http://", "https://")); let google_id = item .get("id") .and_then(|id| id.as_str()) .unwrap_or("") .to_string(); let entry = series_map .entry(series_name.clone()) .or_insert_with(|| SeriesCandidateBuilder { title: series_name.clone(), authors: vec![], description: None, publishers: vec![], start_year: None, volume_count: 0, cover_url: None, external_id: google_id.clone(), external_url: None, metadata_json: serde_json::json!({}), }); entry.volume_count += 1; // Merge authors for a in &authors { if !entry.authors.contains(a) { entry.authors.push(a.clone()); } } // Set description if not yet set if entry.description.is_none() { entry.description = description; } // Merge publisher if let Some(ref pub_name) = publisher { if !entry.publishers.contains(pub_name) { entry.publishers.push(pub_name.clone()); } } // Extract year if let Some(ref date) = published_date { if let Some(year) = extract_year(date) { if entry.start_year.is_none() || entry.start_year.unwrap() > year { entry.start_year = Some(year); } } } if entry.cover_url.is_none() { entry.cover_url = cover_url; } entry.external_url = Some(format!( "https://books.google.com/books?id={}", google_id )); } let mut candidates: Vec = series_map .into_values() .map(|b| { let confidence = compute_confidence(&b.title, &query_lower); SeriesCandidate { external_id: b.external_id, title: b.title, authors: b.authors, description: b.description, publishers: b.publishers, start_year: b.start_year, total_volumes: if b.volume_count > 1 { Some(b.volume_count) } else { None }, cover_url: b.cover_url, external_url: b.external_url, confidence, metadata_json: b.metadata_json, } }) .collect(); candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)); candidates.truncate(10); Ok(candidates) } async fn get_series_books_impl( external_id: &str, config: &ProviderConfig, ) -> Result, String> { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(15)) .build() .map_err(|e| format!("failed to build HTTP client: {e}"))?; // First fetch the volume to get its series info let mut url = format!( "https://www.googleapis.com/books/v1/volumes/{}", external_id ); if let Some(ref key) = config.api_key { url.push_str(&format!("?key={}", key)); } let resp = client .get(&url) .send() .await .map_err(|e| format!("Google Books request failed: {e}"))?; if !resp.status().is_success() { let status = resp.status(); let text = resp.text().await.unwrap_or_default(); return Err(format!("Google Books returned {status}: {text}")); } let volume: serde_json::Value = resp .json() .await .map_err(|e| format!("Failed to parse Google Books response: {e}"))?; let volume_info = volume.get("volumeInfo").cloned().unwrap_or(serde_json::json!({})); let title = volume_info .get("title") .and_then(|t| t.as_str()) .unwrap_or(""); // Search for more volumes in this series let series_name = extract_series_name(title); let search_query = format!("intitle:{}", series_name); let mut search_url = format!( "https://www.googleapis.com/books/v1/volumes?q={}&maxResults=40&printType=books&langRestrict={}", urlencoded(&search_query), urlencoded(&config.language), ); if let Some(ref key) = config.api_key { search_url.push_str(&format!("&key={}", key)); } let resp = client .get(&search_url) .send() .await .map_err(|e| format!("Google Books search failed: {e}"))?; if !resp.status().is_success() { // Return just the single volume as a book return Ok(vec![volume_to_book_candidate(&volume)]); } let data: serde_json::Value = resp .json() .await .map_err(|e| format!("Failed to parse search response: {e}"))?; let items = match data.get("items").and_then(|i| i.as_array()) { Some(items) => items, None => return Ok(vec![volume_to_book_candidate(&volume)]), }; let mut books: Vec = items .iter() .map(volume_to_book_candidate) .collect(); // Sort by volume number books.sort_by_key(|b| b.volume_number.unwrap_or(999)); Ok(books) } fn volume_to_book_candidate(item: &serde_json::Value) -> BookCandidate { let volume_info = item.get("volumeInfo").cloned().unwrap_or(serde_json::json!({})); let title = volume_info .get("title") .and_then(|t| t.as_str()) .unwrap_or("") .to_string(); let authors: Vec = volume_info .get("authors") .and_then(|a| a.as_array()) .map(|arr| { arr.iter() .filter_map(|v| v.as_str().map(String::from)) .collect() }) .unwrap_or_default(); let isbn = volume_info .get("industryIdentifiers") .and_then(|ids| ids.as_array()) .and_then(|arr| { arr.iter() .find(|id| { id.get("type") .and_then(|t| t.as_str()) .map(|t| t == "ISBN_13" || t == "ISBN_10") .unwrap_or(false) }) .and_then(|id| id.get("identifier").and_then(|i| i.as_str())) }) .map(String::from); let summary = volume_info .get("description") .and_then(|d| d.as_str()) .map(String::from); let cover_url = volume_info .get("imageLinks") .and_then(|il| il.get("thumbnail").or_else(|| il.get("smallThumbnail"))) .and_then(|u| u.as_str()) .map(|s| s.replace("http://", "https://")); let page_count = volume_info .get("pageCount") .and_then(|p| p.as_i64()) .map(|p| p as i32); let language = volume_info .get("language") .and_then(|l| l.as_str()) .map(String::from); let publish_date = volume_info .get("publishedDate") .and_then(|d| d.as_str()) .map(String::from); let google_id = item .get("id") .and_then(|id| id.as_str()) .unwrap_or("") .to_string(); let volume_number = extract_volume_number(&title); BookCandidate { external_book_id: google_id, title, volume_number, authors, isbn, summary, cover_url, page_count, language, publish_date, metadata_json: serde_json::json!({}), } } fn extract_series_name(title: &str) -> String { // Remove trailing volume indicators like "Vol. 1", "Tome 2", "#3", "- Volume 1" let re_patterns = [ r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", r"\s*\(\d+\)\s*$", r"\s+\d+\s*$", ]; let mut result = title.to_string(); for pattern in &re_patterns { if let Ok(re) = regex::Regex::new(pattern) { let cleaned = re.replace(&result, "").to_string(); if !cleaned.is_empty() { result = cleaned; break; } } } result.trim().to_string() } fn extract_volume_number(title: &str) -> Option { let patterns = [ r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)", r"\((\d+)\)\s*$", r"\b(\d+)\s*$", ]; for pattern in &patterns { if let Ok(re) = regex::Regex::new(pattern) { if let Some(caps) = re.captures(title) { if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::().ok()) { return Some(num); } } } } None } fn extract_year(date: &str) -> Option { date.get(..4).and_then(|s| s.parse::().ok()) } fn compute_confidence(title: &str, query: &str) -> f32 { let title_lower = title.to_lowercase(); if title_lower == query { 1.0 } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { 0.8 } else if title_lower.contains(query) || query.contains(&title_lower) { 0.7 } else { // Simple character overlap ratio let common: usize = query .chars() .filter(|c| title_lower.contains(*c)) .count(); let max_len = query.len().max(title_lower.len()).max(1); (common as f32 / max_len as f32).clamp(0.1, 0.6) } } fn urlencoded(s: &str) -> String { let mut result = String::new(); for byte in s.bytes() { match byte { b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { result.push(byte as char); } _ => { result.push_str(&format!("%{:02X}", byte)); } } } result } struct SeriesCandidateBuilder { title: String, authors: Vec, description: Option, publishers: Vec, start_year: Option, volume_count: i32, cover_url: Option, external_id: String, external_url: Option, metadata_json: serde_json::Value, }