feat: add external metadata sync system with multiple providers
Add a complete metadata synchronization system allowing users to search and sync series/book metadata from external providers (Google Books, Open Library, ComicVine, AniList, Bédéthèque). Each library can use a different provider. Matching requires manual approval with detailed sync reports showing what was updated or skipped (locked fields protection). Key changes: - DB migrations: external_metadata_links, external_book_metadata tables, library metadata_provider column, locked_fields, total_volumes, book metadata fields (summary, isbn, publish_date) - Rust API: MetadataProvider trait + 5 provider implementations, 7 metadata endpoints (search, match, approve, reject, links, missing, delete), sync report system, provider language preference support - Backoffice: MetadataSearchModal, ProviderIcon, SafeHtml components, settings UI for provider/language config, enriched book detail page, edit forms with locked fields support, API proxy routes - OpenAPI/Swagger documentation for all new endpoints and schemas Closes #3 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ stripstream-core = { path = "../../crates/core" }
|
||||
parsers = { path = "../../crates/parsers" }
|
||||
rand.workspace = true
|
||||
tokio-stream = "0.1"
|
||||
regex = "1"
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
@@ -33,3 +34,4 @@ uuid.workspace = true
|
||||
utoipa.workspace = true
|
||||
utoipa-swagger-ui = { workspace = true, features = ["axum"] }
|
||||
webp.workspace = true
|
||||
scraper.workspace = true
|
||||
|
||||
@@ -31,6 +31,7 @@ COPY crates/parsers/src crates/parsers/src
|
||||
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
--mount=type=cache,target=/app/target \
|
||||
touch apps/api/src/main.rs crates/core/src/lib.rs crates/parsers/src/lib.rs && \
|
||||
cargo build --release -p api && \
|
||||
cp /app/target/release/api /usr/local/bin/api
|
||||
|
||||
|
||||
@@ -84,6 +84,12 @@ pub struct BookDetails {
|
||||
pub reading_current_page: Option<i32>,
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub reading_last_read_at: Option<DateTime<Utc>>,
|
||||
pub summary: Option<String>,
|
||||
pub isbn: Option<String>,
|
||||
pub publish_date: Option<String>,
|
||||
/// Fields locked from external metadata sync
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub locked_fields: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
/// List books with optional filtering and pagination
|
||||
@@ -249,7 +255,7 @@ pub async fn get_book(
|
||||
) -> Result<Json<BookDetails>, ApiError> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT b.id, b.library_id, b.kind, b.title, b.author, b.authors, b.series, b.volume, b.language, b.page_count, b.thumbnail_path,
|
||||
SELECT b.id, b.library_id, b.kind, b.title, b.author, b.authors, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.locked_fields, b.summary, b.isbn, b.publish_date,
|
||||
bf.abs_path, bf.format, bf.parse_status,
|
||||
COALESCE(brp.status, 'unread') AS reading_status,
|
||||
brp.current_page AS reading_current_page,
|
||||
@@ -290,6 +296,10 @@ pub async fn get_book(
|
||||
reading_status: row.get("reading_status"),
|
||||
reading_current_page: row.get("reading_current_page"),
|
||||
reading_last_read_at: row.get("reading_last_read_at"),
|
||||
summary: row.get("summary"),
|
||||
isbn: row.get("isbn"),
|
||||
publish_date: row.get("publish_date"),
|
||||
locked_fields: Some(row.get::<serde_json::Value, _>("locked_fields")),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -961,6 +971,12 @@ pub struct UpdateBookRequest {
|
||||
pub series: Option<String>,
|
||||
pub volume: Option<i32>,
|
||||
pub language: Option<String>,
|
||||
pub summary: Option<String>,
|
||||
pub isbn: Option<String>,
|
||||
pub publish_date: Option<String>,
|
||||
/// Fields locked from external metadata sync
|
||||
#[serde(default)]
|
||||
pub locked_fields: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
/// Update metadata for a specific book
|
||||
@@ -996,12 +1012,18 @@ pub async fn update_book(
|
||||
let series = body.series.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string);
|
||||
let language = body.language.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string);
|
||||
|
||||
let summary = body.summary.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string);
|
||||
let isbn = body.isbn.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string);
|
||||
let publish_date = body.publish_date.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string);
|
||||
let locked_fields = body.locked_fields.clone().unwrap_or(serde_json::json!({}));
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
UPDATE books
|
||||
SET title = $2, author = $3, authors = $4, series = $5, volume = $6, language = $7, updated_at = NOW()
|
||||
SET title = $2, author = $3, authors = $4, series = $5, volume = $6, language = $7,
|
||||
summary = $8, isbn = $9, publish_date = $10, locked_fields = $11, updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING id, library_id, kind, title, author, authors, series, volume, language, page_count, thumbnail_path,
|
||||
summary, isbn, publish_date,
|
||||
COALESCE((SELECT status FROM book_reading_progress WHERE book_id = $1), 'unread') AS reading_status,
|
||||
(SELECT current_page FROM book_reading_progress WHERE book_id = $1) AS reading_current_page,
|
||||
(SELECT last_read_at FROM book_reading_progress WHERE book_id = $1) AS reading_last_read_at
|
||||
@@ -1014,6 +1036,10 @@ pub async fn update_book(
|
||||
.bind(&series)
|
||||
.bind(body.volume)
|
||||
.bind(&language)
|
||||
.bind(&summary)
|
||||
.bind(&isbn)
|
||||
.bind(&publish_date)
|
||||
.bind(&locked_fields)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
@@ -1038,6 +1064,10 @@ pub async fn update_book(
|
||||
reading_status: row.get("reading_status"),
|
||||
reading_current_page: row.get("reading_current_page"),
|
||||
reading_last_read_at: row.get("reading_last_read_at"),
|
||||
summary: row.get("summary"),
|
||||
isbn: row.get("isbn"),
|
||||
publish_date: row.get("publish_date"),
|
||||
locked_fields: Some(locked_fields),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1048,9 +1078,12 @@ pub struct SeriesMetadata {
|
||||
pub description: Option<String>,
|
||||
pub publishers: Vec<String>,
|
||||
pub start_year: Option<i32>,
|
||||
pub total_volumes: Option<i32>,
|
||||
/// Convenience: author from first book (for pre-filling the per-book apply section)
|
||||
pub book_author: Option<String>,
|
||||
pub book_language: Option<String>,
|
||||
/// Fields locked from external metadata sync, e.g. {"authors": true, "description": true}
|
||||
pub locked_fields: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Get metadata for a specific series
|
||||
@@ -1087,7 +1120,7 @@ pub async fn get_series_metadata(
|
||||
};
|
||||
|
||||
let meta_row = sqlx::query(
|
||||
"SELECT authors, description, publishers, start_year FROM series_metadata WHERE library_id = $1 AND name = $2"
|
||||
"SELECT authors, description, publishers, start_year, total_volumes, locked_fields FROM series_metadata WHERE library_id = $1 AND name = $2"
|
||||
)
|
||||
.bind(library_id)
|
||||
.bind(&name)
|
||||
@@ -1099,8 +1132,10 @@ pub async fn get_series_metadata(
|
||||
description: meta_row.as_ref().and_then(|r| r.get("description")),
|
||||
publishers: meta_row.as_ref().map(|r| r.get::<Vec<String>, _>("publishers")).unwrap_or_default(),
|
||||
start_year: meta_row.as_ref().and_then(|r| r.get("start_year")),
|
||||
total_volumes: meta_row.as_ref().and_then(|r| r.get("total_volumes")),
|
||||
book_author: books_row.as_ref().and_then(|r| r.get("author")),
|
||||
book_language: books_row.as_ref().and_then(|r| r.get("language")),
|
||||
locked_fields: meta_row.as_ref().map(|r| r.get::<serde_json::Value, _>("locked_fields")).unwrap_or(serde_json::json!({})),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1122,6 +1157,10 @@ pub struct UpdateSeriesRequest {
|
||||
#[serde(default)]
|
||||
pub publishers: Vec<String>,
|
||||
pub start_year: Option<i32>,
|
||||
pub total_volumes: Option<i32>,
|
||||
/// Fields locked from external metadata sync
|
||||
#[serde(default)]
|
||||
pub locked_fields: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
@@ -1214,15 +1253,18 @@ pub async fn update_series(
|
||||
.map(|a| a.trim().to_string())
|
||||
.filter(|a| !a.is_empty())
|
||||
.collect();
|
||||
let locked_fields = body.locked_fields.clone().unwrap_or(serde_json::json!({}));
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO series_metadata (library_id, name, authors, description, publishers, start_year, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, NOW())
|
||||
INSERT INTO series_metadata (library_id, name, authors, description, publishers, start_year, total_volumes, locked_fields, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW())
|
||||
ON CONFLICT (library_id, name) DO UPDATE
|
||||
SET authors = EXCLUDED.authors,
|
||||
description = EXCLUDED.description,
|
||||
publishers = EXCLUDED.publishers,
|
||||
start_year = EXCLUDED.start_year,
|
||||
total_volumes = EXCLUDED.total_volumes,
|
||||
locked_fields = EXCLUDED.locked_fields,
|
||||
updated_at = NOW()
|
||||
"#
|
||||
)
|
||||
@@ -1232,6 +1274,8 @@ pub async fn update_series(
|
||||
.bind(&description)
|
||||
.bind(&publishers)
|
||||
.bind(body.start_year)
|
||||
.bind(body.total_volumes)
|
||||
.bind(&locked_fields)
|
||||
.execute(&state.pool)
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ pub struct LibraryResponse {
|
||||
#[schema(value_type = Option<String>)]
|
||||
pub next_scan_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub watcher_enabled: bool,
|
||||
pub metadata_provider: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
@@ -45,8 +46,8 @@ pub struct CreateLibraryRequest {
|
||||
)]
|
||||
pub async fn list_libraries(State(state): State<AppState>) -> Result<Json<Vec<LibraryResponse>>, ApiError> {
|
||||
let rows = sqlx::query(
|
||||
"SELECT l.id, l.name, l.root_path, l.enabled, l.monitor_enabled, l.scan_mode, l.next_scan_at, l.watcher_enabled,
|
||||
(SELECT COUNT(*) FROM books b WHERE b.library_id = l.id) as book_count
|
||||
"SELECT l.id, l.name, l.root_path, l.enabled, l.monitor_enabled, l.scan_mode, l.next_scan_at, l.watcher_enabled, l.metadata_provider,
|
||||
(SELECT COUNT(*) FROM books b WHERE b.library_id = l.id) as book_count
|
||||
FROM libraries l ORDER BY l.created_at DESC"
|
||||
)
|
||||
.fetch_all(&state.pool)
|
||||
@@ -64,6 +65,7 @@ pub async fn list_libraries(State(state): State<AppState>) -> Result<Json<Vec<Li
|
||||
scan_mode: row.get("scan_mode"),
|
||||
next_scan_at: row.get("next_scan_at"),
|
||||
watcher_enabled: row.get("watcher_enabled"),
|
||||
metadata_provider: row.get("metadata_provider"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -115,6 +117,7 @@ pub async fn create_library(
|
||||
scan_mode: "manual".to_string(),
|
||||
next_scan_at: None,
|
||||
watcher_enabled: false,
|
||||
metadata_provider: None,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -281,7 +284,7 @@ pub async fn update_monitoring(
|
||||
let watcher_enabled = input.watcher_enabled.unwrap_or(false);
|
||||
|
||||
let result = sqlx::query(
|
||||
"UPDATE libraries SET monitor_enabled = $2, scan_mode = $3, next_scan_at = $4, watcher_enabled = $5 WHERE id = $1 RETURNING id, name, root_path, enabled, monitor_enabled, scan_mode, next_scan_at, watcher_enabled"
|
||||
"UPDATE libraries SET monitor_enabled = $2, scan_mode = $3, next_scan_at = $4, watcher_enabled = $5 WHERE id = $1 RETURNING id, name, root_path, enabled, monitor_enabled, scan_mode, next_scan_at, watcher_enabled, metadata_provider"
|
||||
)
|
||||
.bind(library_id)
|
||||
.bind(input.monitor_enabled)
|
||||
@@ -310,5 +313,66 @@ pub async fn update_monitoring(
|
||||
scan_mode: row.get("scan_mode"),
|
||||
next_scan_at: row.get("next_scan_at"),
|
||||
watcher_enabled: row.get("watcher_enabled"),
|
||||
metadata_provider: row.get("metadata_provider"),
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
pub struct UpdateMetadataProviderRequest {
|
||||
pub metadata_provider: Option<String>,
|
||||
}
|
||||
|
||||
/// Update the metadata provider for a library
|
||||
#[utoipa::path(
|
||||
patch,
|
||||
path = "/libraries/{id}/metadata-provider",
|
||||
tag = "libraries",
|
||||
params(
|
||||
("id" = String, Path, description = "Library UUID"),
|
||||
),
|
||||
request_body = UpdateMetadataProviderRequest,
|
||||
responses(
|
||||
(status = 200, body = LibraryResponse),
|
||||
(status = 404, description = "Library not found"),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 403, description = "Forbidden - Admin scope required"),
|
||||
),
|
||||
security(("Bearer" = []))
|
||||
)]
|
||||
pub async fn update_metadata_provider(
|
||||
State(state): State<AppState>,
|
||||
AxumPath(library_id): AxumPath<Uuid>,
|
||||
Json(input): Json<UpdateMetadataProviderRequest>,
|
||||
) -> Result<Json<LibraryResponse>, ApiError> {
|
||||
let provider = input.metadata_provider.as_deref().filter(|s| !s.is_empty());
|
||||
|
||||
let result = sqlx::query(
|
||||
"UPDATE libraries SET metadata_provider = $2 WHERE id = $1 RETURNING id, name, root_path, enabled, monitor_enabled, scan_mode, next_scan_at, watcher_enabled, metadata_provider"
|
||||
)
|
||||
.bind(library_id)
|
||||
.bind(provider)
|
||||
.fetch_optional(&state.pool)
|
||||
.await?;
|
||||
|
||||
let Some(row) = result else {
|
||||
return Err(ApiError::not_found("library not found"));
|
||||
};
|
||||
|
||||
let book_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM books WHERE library_id = $1")
|
||||
.bind(library_id)
|
||||
.fetch_one(&state.pool)
|
||||
.await?;
|
||||
|
||||
Ok(Json(LibraryResponse {
|
||||
id: row.get("id"),
|
||||
name: row.get("name"),
|
||||
root_path: row.get("root_path"),
|
||||
enabled: row.get("enabled"),
|
||||
book_count,
|
||||
monitor_enabled: row.get("monitor_enabled"),
|
||||
scan_mode: row.get("scan_mode"),
|
||||
next_scan_at: row.get("next_scan_at"),
|
||||
watcher_enabled: row.get("watcher_enabled"),
|
||||
metadata_provider: row.get("metadata_provider"),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ mod handlers;
|
||||
mod index_jobs;
|
||||
mod komga;
|
||||
mod libraries;
|
||||
mod metadata;
|
||||
mod metadata_providers;
|
||||
mod api_middleware;
|
||||
mod openapi;
|
||||
mod pages;
|
||||
@@ -83,6 +85,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
.route("/libraries/:id", delete(libraries::delete_library))
|
||||
.route("/libraries/:id/scan", axum::routing::post(libraries::scan_library))
|
||||
.route("/libraries/:id/monitoring", axum::routing::patch(libraries::update_monitoring))
|
||||
.route("/libraries/:id/metadata-provider", axum::routing::patch(libraries::update_metadata_provider))
|
||||
.route("/books/:id", axum::routing::patch(books::update_book))
|
||||
.route("/books/:id/convert", axum::routing::post(books::convert_book))
|
||||
.route("/libraries/:library_id/series/:name", axum::routing::patch(books::update_series))
|
||||
@@ -102,6 +105,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
.route("/komga/sync", axum::routing::post(komga::sync_komga_read_books))
|
||||
.route("/komga/reports", get(komga::list_sync_reports))
|
||||
.route("/komga/reports/:id", get(komga::get_sync_report))
|
||||
.route("/metadata/search", axum::routing::post(metadata::search_metadata))
|
||||
.route("/metadata/match", axum::routing::post(metadata::create_metadata_match))
|
||||
.route("/metadata/approve/:id", axum::routing::post(metadata::approve_metadata))
|
||||
.route("/metadata/reject/:id", axum::routing::post(metadata::reject_metadata))
|
||||
.route("/metadata/links", get(metadata::get_metadata_links))
|
||||
.route("/metadata/missing/:id", get(metadata::get_missing_books))
|
||||
.route("/metadata/links/:id", delete(metadata::delete_metadata_link))
|
||||
.merge(settings::settings_routes())
|
||||
.route_layer(middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
|
||||
1010
apps/api/src/metadata.rs
Normal file
1010
apps/api/src/metadata.rs
Normal file
File diff suppressed because it is too large
Load Diff
322
apps/api/src/metadata_providers/anilist.rs
Normal file
322
apps/api/src/metadata_providers/anilist.rs
Normal file
@@ -0,0 +1,322 @@
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct AniListProvider;
|
||||
|
||||
impl MetadataProvider for AniListProvider {
|
||||
fn name(&self) -> &str {
|
||||
"anilist"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
const SEARCH_QUERY: &str = r#"
|
||||
query ($search: String) {
|
||||
Page(perPage: 20) {
|
||||
media(search: $search, type: MANGA, sort: SEARCH_MATCH) {
|
||||
id
|
||||
title { romaji english native }
|
||||
description(asHtml: false)
|
||||
coverImage { large medium }
|
||||
startDate { year }
|
||||
volumes
|
||||
chapters
|
||||
staff { edges { node { name { full } } role } }
|
||||
siteUrl
|
||||
genres
|
||||
}
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
const DETAIL_QUERY: &str = r#"
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
id
|
||||
title { romaji english native }
|
||||
description(asHtml: false)
|
||||
coverImage { large medium }
|
||||
startDate { year }
|
||||
volumes
|
||||
chapters
|
||||
staff { edges { node { name { full } } role } }
|
||||
siteUrl
|
||||
genres
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
async fn graphql_request(
|
||||
client: &reqwest::Client,
|
||||
query: &str,
|
||||
variables: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let resp = client
|
||||
.post("https://graphql.anilist.co")
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&serde_json::json!({
|
||||
"query": query,
|
||||
"variables": variables,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("AniList request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("AniList returned {status}: {text}"));
|
||||
}
|
||||
|
||||
resp.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse AniList response: {e}"))
|
||||
}
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
let data = graphql_request(
|
||||
&client,
|
||||
SEARCH_QUERY,
|
||||
serde_json::json!({ "search": query }),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let media = match data
|
||||
.get("data")
|
||||
.and_then(|d| d.get("Page"))
|
||||
.and_then(|p| p.get("media"))
|
||||
.and_then(|m| m.as_array())
|
||||
{
|
||||
Some(media) => media,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
|
||||
let mut candidates: Vec<SeriesCandidate> = media
|
||||
.iter()
|
||||
.filter_map(|m| {
|
||||
let id = m.get("id").and_then(|id| id.as_i64())? as i64;
|
||||
let title_obj = m.get("title")?;
|
||||
let title = title_obj
|
||||
.get("english")
|
||||
.and_then(|t| t.as_str())
|
||||
.or_else(|| title_obj.get("romaji").and_then(|t| t.as_str()))?
|
||||
.to_string();
|
||||
|
||||
let description = m
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(|d| d.replace("\\n", "\n").trim().to_string())
|
||||
.filter(|d| !d.is_empty());
|
||||
|
||||
let cover_url = m
|
||||
.get("coverImage")
|
||||
.and_then(|ci| ci.get("large").or_else(|| ci.get("medium")))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
|
||||
let start_year = m
|
||||
.get("startDate")
|
||||
.and_then(|sd| sd.get("year"))
|
||||
.and_then(|y| y.as_i64())
|
||||
.map(|y| y as i32);
|
||||
|
||||
let volumes = m
|
||||
.get("volumes")
|
||||
.and_then(|v| v.as_i64())
|
||||
.map(|v| v as i32);
|
||||
|
||||
let site_url = m
|
||||
.get("siteUrl")
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
|
||||
let authors = extract_authors(m);
|
||||
|
||||
let confidence = compute_confidence(&title, &query_lower);
|
||||
|
||||
Some(SeriesCandidate {
|
||||
external_id: id.to_string(),
|
||||
title,
|
||||
authors,
|
||||
description,
|
||||
publishers: vec![],
|
||||
start_year,
|
||||
total_volumes: volumes,
|
||||
cover_url,
|
||||
external_url: site_url,
|
||||
confidence,
|
||||
metadata_json: serde_json::json!({}),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.truncate(10);
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let id: i64 = external_id
|
||||
.parse()
|
||||
.map_err(|_| "invalid AniList ID".to_string())?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
let data = graphql_request(
|
||||
&client,
|
||||
DETAIL_QUERY,
|
||||
serde_json::json!({ "id": id }),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let media = match data.get("data").and_then(|d| d.get("Media")) {
|
||||
Some(m) => m,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let title_obj = media.get("title").cloned().unwrap_or(serde_json::json!({}));
|
||||
let title = title_obj
|
||||
.get("english")
|
||||
.and_then(|t| t.as_str())
|
||||
.or_else(|| title_obj.get("romaji").and_then(|t| t.as_str()))
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let volumes = media
|
||||
.get("volumes")
|
||||
.and_then(|v| v.as_i64())
|
||||
.map(|v| v as i32);
|
||||
|
||||
let cover_url = media
|
||||
.get("coverImage")
|
||||
.and_then(|ci| ci.get("large").or_else(|| ci.get("medium")))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
|
||||
let description = media
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(|d| d.replace("\\n", "\n").trim().to_string());
|
||||
|
||||
let authors = extract_authors(media);
|
||||
|
||||
// AniList doesn't have per-volume data — generate volume entries if volumes count is known
|
||||
let mut books = Vec::new();
|
||||
if let Some(total) = volumes {
|
||||
for vol in 1..=total {
|
||||
books.push(BookCandidate {
|
||||
external_book_id: format!("{}-vol-{}", external_id, vol),
|
||||
title: format!("{} Vol. {}", title, vol),
|
||||
volume_number: Some(vol),
|
||||
authors: authors.clone(),
|
||||
isbn: None,
|
||||
summary: if vol == 1 { description.clone() } else { None },
|
||||
cover_url: if vol == 1 { cover_url.clone() } else { None },
|
||||
page_count: None,
|
||||
language: Some("ja".to_string()),
|
||||
publish_date: None,
|
||||
metadata_json: serde_json::json!({}),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Single entry for the whole manga
|
||||
books.push(BookCandidate {
|
||||
external_book_id: external_id.to_string(),
|
||||
title,
|
||||
volume_number: Some(1),
|
||||
authors,
|
||||
isbn: None,
|
||||
summary: description,
|
||||
cover_url,
|
||||
page_count: None,
|
||||
language: Some("ja".to_string()),
|
||||
publish_date: None,
|
||||
metadata_json: serde_json::json!({}),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn extract_authors(media: &serde_json::Value) -> Vec<String> {
|
||||
let mut authors = Vec::new();
|
||||
if let Some(edges) = media
|
||||
.get("staff")
|
||||
.and_then(|s| s.get("edges"))
|
||||
.and_then(|e| e.as_array())
|
||||
{
|
||||
for edge in edges {
|
||||
let role = edge
|
||||
.get("role")
|
||||
.and_then(|r| r.as_str())
|
||||
.unwrap_or("");
|
||||
let role_lower = role.to_lowercase();
|
||||
if role_lower.contains("story") || role_lower.contains("art") || role_lower.contains("original") {
|
||||
if let Some(name) = edge
|
||||
.get("node")
|
||||
.and_then(|n| n.get("name"))
|
||||
.and_then(|n| n.get("full"))
|
||||
.and_then(|f| f.as_str())
|
||||
{
|
||||
if !authors.contains(&name.to_string()) {
|
||||
authors.push(name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
authors
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.8
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
576
apps/api/src/metadata_providers/bedetheque.rs
Normal file
576
apps/api/src/metadata_providers/bedetheque.rs
Normal file
@@ -0,0 +1,576 @@
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct BedethequeProvider;
|
||||
|
||||
impl MetadataProvider for BedethequeProvider {
|
||||
fn name(&self) -> &str {
|
||||
"bedetheque"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
fn build_client() -> Result<reqwest::Client, String> {
|
||||
reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(20))
|
||||
.user_agent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0")
|
||||
.default_headers({
|
||||
let mut h = reqwest::header::HeaderMap::new();
|
||||
h.insert(
|
||||
reqwest::header::ACCEPT,
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
||||
.parse()
|
||||
.unwrap(),
|
||||
);
|
||||
h.insert(
|
||||
reqwest::header::ACCEPT_LANGUAGE,
|
||||
"fr-FR,fr;q=0.9,en;q=0.5".parse().unwrap(),
|
||||
);
|
||||
h.insert(reqwest::header::REFERER, "https://www.bedetheque.com/".parse().unwrap());
|
||||
h
|
||||
})
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))
|
||||
}
|
||||
|
||||
/// Remove diacritics for URL construction (bedetheque uses ASCII slugs)
|
||||
fn normalize_for_url(s: &str) -> String {
|
||||
s.chars()
|
||||
.map(|c| match c {
|
||||
'é' | 'è' | 'ê' | 'ë' | 'É' | 'È' | 'Ê' | 'Ë' => 'e',
|
||||
'à' | 'â' | 'ä' | 'À' | 'Â' | 'Ä' => 'a',
|
||||
'ù' | 'û' | 'ü' | 'Ù' | 'Û' | 'Ü' => 'u',
|
||||
'ô' | 'ö' | 'Ô' | 'Ö' => 'o',
|
||||
'î' | 'ï' | 'Î' | 'Ï' => 'i',
|
||||
'ç' | 'Ç' => 'c',
|
||||
'ñ' | 'Ñ' => 'n',
|
||||
_ => c,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
for byte in s.bytes() {
|
||||
match byte {
|
||||
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
||||
result.push(byte as char);
|
||||
}
|
||||
b' ' => result.push('+'),
|
||||
_ => result.push_str(&format!("%{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Search
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let client = build_client()?;
|
||||
|
||||
// Use the full-text search page
|
||||
let url = format!(
|
||||
"https://www.bedetheque.com/search/tout?RechTexte={}&RechWhere=0",
|
||||
urlencoded(&normalize_for_url(query))
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Bedetheque request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
return Err(format!("Bedetheque returned {status}"));
|
||||
}
|
||||
|
||||
let html = resp
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to read Bedetheque response: {e}"))?;
|
||||
|
||||
// Detect IP blacklist
|
||||
if html.contains("<title></title>") || html.contains("<title> </title>") {
|
||||
return Err("Bedetheque: IP may be rate-limited, please retry later".to_string());
|
||||
}
|
||||
|
||||
// Parse HTML in a block so the non-Send Html type is dropped before any .await
|
||||
let candidates = {
|
||||
let document = Html::parse_document(&html);
|
||||
let link_sel =
|
||||
Selector::parse("a[href*='/serie-']").map_err(|e| format!("selector error: {e}"))?;
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
for el in document.select(&link_sel) {
|
||||
let href = match el.value().attr("href") {
|
||||
Some(h) => h.to_string(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let (series_id, _slug) = match parse_serie_href(&href) {
|
||||
Some(v) => v,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if !seen.insert(series_id.clone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let title = el.text().collect::<String>().trim().to_string();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let confidence = compute_confidence(&title, &query_lower);
|
||||
let cover_url = format!(
|
||||
"https://www.bedetheque.com/cache/thb_series/PlancheS_{}.jpg",
|
||||
series_id
|
||||
);
|
||||
|
||||
candidates.push(SeriesCandidate {
|
||||
external_id: series_id.clone(),
|
||||
title: title.clone(),
|
||||
authors: vec![],
|
||||
description: None,
|
||||
publishers: vec![],
|
||||
start_year: None,
|
||||
total_volumes: None,
|
||||
cover_url: Some(cover_url),
|
||||
external_url: Some(href),
|
||||
confidence,
|
||||
metadata_json: serde_json::json!({}),
|
||||
});
|
||||
}
|
||||
|
||||
candidates.sort_by(|a, b| {
|
||||
b.confidence
|
||||
.partial_cmp(&a.confidence)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
candidates.truncate(10);
|
||||
candidates
|
||||
}; // document is dropped here — safe to .await below
|
||||
|
||||
// For the top candidates, fetch series details to enrich metadata
|
||||
// (limit to top 3 to avoid hammering the site)
|
||||
let mut enriched = Vec::new();
|
||||
for mut c in candidates {
|
||||
if enriched.len() < 3 {
|
||||
if let Ok(details) = fetch_series_details(&client, &c.external_id, c.external_url.as_deref()).await {
|
||||
if let Some(desc) = details.description {
|
||||
c.description = Some(desc);
|
||||
}
|
||||
if !details.authors.is_empty() {
|
||||
c.authors = details.authors;
|
||||
}
|
||||
if !details.publishers.is_empty() {
|
||||
c.publishers = details.publishers;
|
||||
}
|
||||
if let Some(year) = details.start_year {
|
||||
c.start_year = Some(year);
|
||||
}
|
||||
if let Some(count) = details.album_count {
|
||||
c.total_volumes = Some(count);
|
||||
}
|
||||
c.metadata_json = serde_json::json!({
|
||||
"description": c.description,
|
||||
"authors": c.authors,
|
||||
"publishers": c.publishers,
|
||||
"start_year": c.start_year,
|
||||
});
|
||||
}
|
||||
}
|
||||
enriched.push(c);
|
||||
}
|
||||
|
||||
Ok(enriched)
|
||||
}
|
||||
|
||||
/// Parse serie URL to extract (id, slug)
|
||||
fn parse_serie_href(href: &str) -> Option<(String, String)> {
|
||||
// Patterns:
|
||||
// https://www.bedetheque.com/serie-3-BD-Blacksad.html
|
||||
// /serie-3-BD-Blacksad.html
|
||||
let re = regex::Regex::new(r"/serie-(\d+)-[A-Za-z]+-(.+?)(?:__\d+)?\.html").ok()?;
|
||||
let caps = re.captures(href)?;
|
||||
Some((caps[1].to_string(), caps[2].to_string()))
|
||||
}
|
||||
|
||||
struct SeriesDetails {
|
||||
description: Option<String>,
|
||||
authors: Vec<String>,
|
||||
publishers: Vec<String>,
|
||||
start_year: Option<i32>,
|
||||
album_count: Option<i32>,
|
||||
}
|
||||
|
||||
async fn fetch_series_details(
|
||||
client: &reqwest::Client,
|
||||
series_id: &str,
|
||||
series_url: Option<&str>,
|
||||
) -> Result<SeriesDetails, String> {
|
||||
// Build URL — append __10000 to get all albums on one page
|
||||
let url = match series_url {
|
||||
Some(u) => {
|
||||
// Replace .html with __10000.html
|
||||
u.replace(".html", "__10000.html")
|
||||
}
|
||||
None => format!(
|
||||
"https://www.bedetheque.com/serie-{}-BD-Serie__10000.html",
|
||||
series_id
|
||||
),
|
||||
};
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to fetch series page: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Err(format!("Series page returned {}", resp.status()));
|
||||
}
|
||||
|
||||
let html = resp
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to read series page: {e}"))?;
|
||||
|
||||
let doc = Html::parse_document(&html);
|
||||
let mut details = SeriesDetails {
|
||||
description: None,
|
||||
authors: vec![],
|
||||
publishers: vec![],
|
||||
start_year: None,
|
||||
album_count: None,
|
||||
};
|
||||
|
||||
// Description: look for #full-commentaire or .serie-info
|
||||
if let Ok(sel) = Selector::parse("#full-commentaire") {
|
||||
if let Some(el) = doc.select(&sel).next() {
|
||||
let text = el.text().collect::<String>().trim().to_string();
|
||||
if !text.is_empty() {
|
||||
details.description = Some(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback description from span.infoedition
|
||||
if details.description.is_none() {
|
||||
if let Ok(sel) = Selector::parse("span.infoedition") {
|
||||
if let Some(el) = doc.select(&sel).next() {
|
||||
let text = el.text().collect::<String>().trim().to_string();
|
||||
if !text.is_empty() {
|
||||
details.description = Some(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract authors and publishers from album info blocks
|
||||
if let Ok(sel) = Selector::parse(".infos li") {
|
||||
let mut authors_set = std::collections::HashSet::new();
|
||||
let mut publishers_set = std::collections::HashSet::new();
|
||||
|
||||
for li in doc.select(&sel) {
|
||||
let text = li.text().collect::<String>();
|
||||
let text = text.trim();
|
||||
|
||||
if let Some(val) = extract_info_value(text, "Scénario") {
|
||||
for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
authors_set.insert(a.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(val) = extract_info_value(text, "Dessin") {
|
||||
for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
authors_set.insert(a.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(val) = extract_info_value(text, "Editeur") {
|
||||
for p in val.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
publishers_set.insert(p.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
details.authors = authors_set.into_iter().collect();
|
||||
details.authors.sort();
|
||||
details.publishers = publishers_set.into_iter().collect();
|
||||
details.publishers.sort();
|
||||
}
|
||||
|
||||
// Album count from serie-info text (e.g. "Tomes : 8")
|
||||
let page_text = doc.root_element().text().collect::<String>();
|
||||
if let Ok(re) = regex::Regex::new(r"Tomes?\s*:\s*(\d+)") {
|
||||
if let Some(caps) = re.captures(&page_text) {
|
||||
if let Ok(n) = caps[1].parse::<i32>() {
|
||||
details.album_count = Some(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start year from first album date (Dépot légal)
|
||||
if let Ok(re) = regex::Regex::new(r"[Dd][ée]p[ôo]t l[ée]gal\s*:\s*\d{2}/(\d{4})") {
|
||||
if let Some(caps) = re.captures(&page_text) {
|
||||
if let Ok(year) = caps[1].parse::<i32>() {
|
||||
details.start_year = Some(year);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(details)
|
||||
}
|
||||
|
||||
/// Extract value after a label like "Scénario : Jean-Claude" → "Jean-Claude"
|
||||
fn extract_info_value<'a>(text: &'a str, label: &str) -> Option<&'a str> {
|
||||
// Handle both "Label :" and "Label:"
|
||||
let patterns = [
|
||||
format!("{} :", label),
|
||||
format!("{}:", label),
|
||||
format!("{} :", &label.to_lowercase()),
|
||||
];
|
||||
for pat in &patterns {
|
||||
if let Some(pos) = text.find(pat.as_str()) {
|
||||
let val = text[pos + pat.len()..].trim();
|
||||
if !val.is_empty() {
|
||||
return Some(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Get series books
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let client = build_client()?;
|
||||
|
||||
// We need to find the series URL — try a direct fetch
|
||||
// external_id is the numeric series ID
|
||||
// We try to fetch the series page to get the album list
|
||||
let url = format!(
|
||||
"https://www.bedetheque.com/serie-{}-BD-Serie__10000.html",
|
||||
external_id
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to fetch series: {e}"))?;
|
||||
|
||||
// If the generic slug fails, try without the slug part (bedetheque redirects)
|
||||
let html = if resp.status().is_success() {
|
||||
resp.text().await.map_err(|e| format!("Failed to read: {e}"))?
|
||||
} else {
|
||||
// Try alternative URL pattern
|
||||
let alt_url = format!(
|
||||
"https://www.bedetheque.com/serie-{}__10000.html",
|
||||
external_id
|
||||
);
|
||||
let resp2 = client
|
||||
.get(&alt_url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to fetch series (alt): {e}"))?;
|
||||
if !resp2.status().is_success() {
|
||||
return Err(format!("Series page not found for id {external_id}"));
|
||||
}
|
||||
resp2.text().await.map_err(|e| format!("Failed to read: {e}"))?
|
||||
};
|
||||
|
||||
if html.contains("<title></title>") {
|
||||
return Err("Bedetheque: IP may be rate-limited".to_string());
|
||||
}
|
||||
|
||||
let doc = Html::parse_document(&html);
|
||||
let mut books = Vec::new();
|
||||
|
||||
// Albums are in .album-main blocks
|
||||
let album_sel = Selector::parse(".album-main").map_err(|e| format!("selector: {e}"))?;
|
||||
|
||||
for album_el in doc.select(&album_sel) {
|
||||
let album_html = album_el.html();
|
||||
let album_doc = Html::parse_fragment(&album_html);
|
||||
|
||||
// Title from .titre
|
||||
let title = select_text(&album_doc, ".titre")
|
||||
.or_else(|| {
|
||||
Selector::parse(".titre a")
|
||||
.ok()
|
||||
.and_then(|s| album_doc.select(&s).next())
|
||||
.map(|el| el.text().collect::<String>().trim().to_string())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Volume number from title or .num span
|
||||
let volume_number = select_text(&album_doc, ".num")
|
||||
.and_then(|s| {
|
||||
s.trim_end_matches('.')
|
||||
.trim()
|
||||
.parse::<i32>()
|
||||
.ok()
|
||||
})
|
||||
.or_else(|| extract_volume_from_title(&title));
|
||||
|
||||
// Album URL
|
||||
let album_url = Selector::parse("a[href*='/BD-']")
|
||||
.ok()
|
||||
.and_then(|s| album_doc.select(&s).next())
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.map(String::from);
|
||||
|
||||
// External book id from URL
|
||||
let external_book_id = album_url
|
||||
.as_deref()
|
||||
.and_then(|u| {
|
||||
regex::Regex::new(r"-(\d+)\.html")
|
||||
.ok()
|
||||
.and_then(|re| re.captures(u))
|
||||
.map(|c| c[1].to_string())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// Cover
|
||||
let cover_url = Selector::parse("img[src*='cache/thb_couv']")
|
||||
.ok()
|
||||
.and_then(|s| album_doc.select(&s).next())
|
||||
.and_then(|el| el.value().attr("src"))
|
||||
.map(|s| {
|
||||
if s.starts_with("http") {
|
||||
s.to_string()
|
||||
} else {
|
||||
format!("https://www.bedetheque.com{}", s)
|
||||
}
|
||||
});
|
||||
|
||||
// Extract info fields
|
||||
let album_text = album_el.text().collect::<String>();
|
||||
let authors = extract_all_authors(&album_text);
|
||||
let isbn = extract_info_value(&album_text, "EAN/ISBN")
|
||||
.or_else(|| extract_info_value(&album_text, "ISBN"))
|
||||
.map(|s| s.trim().to_string());
|
||||
let page_count = extract_info_value(&album_text, "Planches")
|
||||
.and_then(|s| s.trim().parse::<i32>().ok());
|
||||
let publish_date = extract_info_value(&album_text, "Dépot légal")
|
||||
.or_else(|| extract_info_value(&album_text, "Depot legal"))
|
||||
.map(|s| s.trim().to_string());
|
||||
|
||||
books.push(BookCandidate {
|
||||
external_book_id,
|
||||
title,
|
||||
volume_number,
|
||||
authors,
|
||||
isbn,
|
||||
summary: None,
|
||||
cover_url,
|
||||
page_count,
|
||||
language: Some("fr".to_string()),
|
||||
publish_date,
|
||||
metadata_json: serde_json::json!({}),
|
||||
});
|
||||
}
|
||||
|
||||
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn select_text(doc: &Html, selector: &str) -> Option<String> {
|
||||
Selector::parse(selector)
|
||||
.ok()
|
||||
.and_then(|s| doc.select(&s).next())
|
||||
.map(|el| el.text().collect::<String>().trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
fn extract_all_authors(text: &str) -> Vec<String> {
|
||||
let mut authors = Vec::new();
|
||||
for label in ["Scénario", "Scenario", "Dessin"] {
|
||||
if let Some(val) = extract_info_value(text, label) {
|
||||
for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) {
|
||||
if !authors.contains(&a.to_string()) {
|
||||
authors.push(a.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
authors
|
||||
}
|
||||
|
||||
fn extract_volume_from_title(title: &str) -> Option<i32> {
|
||||
let patterns = [
|
||||
r"(?i)(?:tome|t\.)\s*(\d+)",
|
||||
r"(?i)(?:vol(?:ume)?\.?)\s*(\d+)",
|
||||
r"#\s*(\d+)",
|
||||
];
|
||||
for pattern in &patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
if let Some(caps) = re.captures(title) {
|
||||
if let Ok(n) = caps[1].parse::<i32>() {
|
||||
return Some(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.85
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
let common: usize = query
|
||||
.chars()
|
||||
.filter(|c| title_lower.contains(*c))
|
||||
.count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
267
apps/api/src/metadata_providers/comicvine.rs
Normal file
267
apps/api/src/metadata_providers/comicvine.rs
Normal file
@@ -0,0 +1,267 @@
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct ComicVineProvider;
|
||||
|
||||
impl MetadataProvider for ComicVineProvider {
|
||||
fn name(&self) -> &str {
|
||||
"comicvine"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
fn build_client() -> Result<reqwest::Client, String> {
|
||||
reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.user_agent("StripstreamLibrarian/1.0")
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))
|
||||
}
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let api_key = config
|
||||
.api_key
|
||||
.as_deref()
|
||||
.filter(|k| !k.is_empty())
|
||||
.ok_or_else(|| "ComicVine requires an API key. Configure it in Settings > Integrations.".to_string())?;
|
||||
|
||||
let client = build_client()?;
|
||||
|
||||
let url = format!(
|
||||
"https://comicvine.gamespot.com/api/search/?api_key={}&format=json&resources=volume&query={}&limit=20",
|
||||
api_key,
|
||||
urlencoded(query)
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("ComicVine request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("ComicVine returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse ComicVine response: {e}"))?;
|
||||
|
||||
let results = match data.get("results").and_then(|r| r.as_array()) {
|
||||
Some(results) => results,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
|
||||
let mut candidates: Vec<SeriesCandidate> = results
|
||||
.iter()
|
||||
.filter_map(|vol| {
|
||||
let name = vol.get("name").and_then(|n| n.as_str())?.to_string();
|
||||
let id = vol.get("id").and_then(|id| id.as_i64())? as i64;
|
||||
let description = vol
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(|d| strip_html(d));
|
||||
let publisher = vol
|
||||
.get("publisher")
|
||||
.and_then(|p| p.get("name"))
|
||||
.and_then(|n| n.as_str())
|
||||
.map(String::from);
|
||||
let start_year = vol
|
||||
.get("start_year")
|
||||
.and_then(|y| y.as_str())
|
||||
.and_then(|y| y.parse::<i32>().ok());
|
||||
let count_of_issues = vol
|
||||
.get("count_of_issues")
|
||||
.and_then(|c| c.as_i64())
|
||||
.map(|c| c as i32);
|
||||
let cover_url = vol
|
||||
.get("image")
|
||||
.and_then(|img| img.get("medium_url").or_else(|| img.get("small_url")))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
let site_url = vol
|
||||
.get("site_detail_url")
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
|
||||
let confidence = compute_confidence(&name, &query_lower);
|
||||
|
||||
Some(SeriesCandidate {
|
||||
external_id: id.to_string(),
|
||||
title: name,
|
||||
authors: vec![],
|
||||
description,
|
||||
publishers: publisher.into_iter().collect(),
|
||||
start_year,
|
||||
total_volumes: count_of_issues,
|
||||
cover_url,
|
||||
external_url: site_url,
|
||||
confidence,
|
||||
metadata_json: serde_json::json!({}),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.truncate(10);
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let api_key = config
|
||||
.api_key
|
||||
.as_deref()
|
||||
.filter(|k| !k.is_empty())
|
||||
.ok_or_else(|| "ComicVine requires an API key".to_string())?;
|
||||
|
||||
let client = build_client()?;
|
||||
|
||||
let url = format!(
|
||||
"https://comicvine.gamespot.com/api/issues/?api_key={}&format=json&filter=volume:{}&sort=issue_number:asc&limit=100&field_list=id,name,issue_number,description,image,cover_date,site_detail_url",
|
||||
api_key,
|
||||
external_id
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("ComicVine request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("ComicVine returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse ComicVine response: {e}"))?;
|
||||
|
||||
let results = match data.get("results").and_then(|r| r.as_array()) {
|
||||
Some(results) => results,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let books: Vec<BookCandidate> = results
|
||||
.iter()
|
||||
.filter_map(|issue| {
|
||||
let id = issue.get("id").and_then(|id| id.as_i64())? as i64;
|
||||
let name = issue
|
||||
.get("name")
|
||||
.and_then(|n| n.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let issue_number = issue
|
||||
.get("issue_number")
|
||||
.and_then(|n| n.as_str())
|
||||
.and_then(|n| n.parse::<f64>().ok())
|
||||
.map(|n| n as i32);
|
||||
let description = issue
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(|d| strip_html(d));
|
||||
let cover_url = issue
|
||||
.get("image")
|
||||
.and_then(|img| img.get("medium_url").or_else(|| img.get("small_url")))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(String::from);
|
||||
let cover_date = issue
|
||||
.get("cover_date")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(String::from);
|
||||
|
||||
Some(BookCandidate {
|
||||
external_book_id: id.to_string(),
|
||||
title: name,
|
||||
volume_number: issue_number,
|
||||
authors: vec![],
|
||||
isbn: None,
|
||||
summary: description,
|
||||
cover_url,
|
||||
page_count: None,
|
||||
language: None,
|
||||
publish_date: cover_date,
|
||||
metadata_json: serde_json::json!({}),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn strip_html(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let mut in_tag = false;
|
||||
for ch in s.chars() {
|
||||
match ch {
|
||||
'<' => in_tag = true,
|
||||
'>' => in_tag = false,
|
||||
_ if !in_tag => result.push(ch),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
result.trim().to_string()
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.8
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
for byte in s.bytes() {
|
||||
match byte {
|
||||
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
||||
result.push(byte as char);
|
||||
}
|
||||
_ => result.push_str(&format!("%{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
472
apps/api/src/metadata_providers/google_books.rs
Normal file
472
apps/api/src/metadata_providers/google_books.rs
Normal file
@@ -0,0 +1,472 @@
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct GoogleBooksProvider;
|
||||
|
||||
impl MetadataProvider for GoogleBooksProvider {
|
||||
fn name(&self) -> &str {
|
||||
"google_books"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
let search_query = format!("intitle:{}", query);
|
||||
let mut url = format!(
|
||||
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=20&printType=books&langRestrict={}",
|
||||
urlencoded(&search_query),
|
||||
urlencoded(&config.language),
|
||||
);
|
||||
if let Some(ref key) = config.api_key {
|
||||
url.push_str(&format!("&key={}", key));
|
||||
}
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Google Books request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Google Books returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Google Books response: {e}"))?;
|
||||
|
||||
let items = match data.get("items").and_then(|i| i.as_array()) {
|
||||
Some(items) => items,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
// Group volumes by series name to produce series candidates
|
||||
let query_lower = query.to_lowercase();
|
||||
let mut series_map: std::collections::HashMap<String, SeriesCandidateBuilder> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for item in items {
|
||||
let volume_info = match item.get("volumeInfo") {
|
||||
Some(vi) => vi,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let title = volume_info
|
||||
.get("title")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let authors: Vec<String> = volume_info
|
||||
.get("authors")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|v| v.as_str().map(String::from))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let publisher = volume_info
|
||||
.get("publisher")
|
||||
.and_then(|p| p.as_str())
|
||||
.map(String::from);
|
||||
let published_date = volume_info
|
||||
.get("publishedDate")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(String::from);
|
||||
let description = volume_info
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(String::from);
|
||||
|
||||
// Extract series info from title or seriesInfo
|
||||
let series_name = volume_info
|
||||
.get("seriesInfo")
|
||||
.and_then(|si| si.get("title"))
|
||||
.and_then(|t| t.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| extract_series_name(&title));
|
||||
|
||||
let cover_url = volume_info
|
||||
.get("imageLinks")
|
||||
.and_then(|il| {
|
||||
il.get("thumbnail")
|
||||
.or_else(|| il.get("smallThumbnail"))
|
||||
})
|
||||
.and_then(|u| u.as_str())
|
||||
.map(|s| s.replace("http://", "https://"));
|
||||
|
||||
let google_id = item
|
||||
.get("id")
|
||||
.and_then(|id| id.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let entry = series_map
|
||||
.entry(series_name.clone())
|
||||
.or_insert_with(|| SeriesCandidateBuilder {
|
||||
title: series_name.clone(),
|
||||
authors: vec![],
|
||||
description: None,
|
||||
publishers: vec![],
|
||||
start_year: None,
|
||||
volume_count: 0,
|
||||
cover_url: None,
|
||||
external_id: google_id.clone(),
|
||||
external_url: None,
|
||||
metadata_json: serde_json::json!({}),
|
||||
});
|
||||
|
||||
entry.volume_count += 1;
|
||||
|
||||
// Merge authors
|
||||
for a in &authors {
|
||||
if !entry.authors.contains(a) {
|
||||
entry.authors.push(a.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Set description if not yet set
|
||||
if entry.description.is_none() {
|
||||
entry.description = description;
|
||||
}
|
||||
|
||||
// Merge publisher
|
||||
if let Some(ref pub_name) = publisher {
|
||||
if !entry.publishers.contains(pub_name) {
|
||||
entry.publishers.push(pub_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Extract year
|
||||
if let Some(ref date) = published_date {
|
||||
if let Some(year) = extract_year(date) {
|
||||
if entry.start_year.is_none() || entry.start_year.unwrap() > year {
|
||||
entry.start_year = Some(year);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if entry.cover_url.is_none() {
|
||||
entry.cover_url = cover_url;
|
||||
}
|
||||
|
||||
entry.external_url = Some(format!(
|
||||
"https://books.google.com/books?id={}",
|
||||
google_id
|
||||
));
|
||||
}
|
||||
|
||||
let mut candidates: Vec<SeriesCandidate> = series_map
|
||||
.into_values()
|
||||
.map(|b| {
|
||||
let confidence = compute_confidence(&b.title, &query_lower);
|
||||
SeriesCandidate {
|
||||
external_id: b.external_id,
|
||||
title: b.title,
|
||||
authors: b.authors,
|
||||
description: b.description,
|
||||
publishers: b.publishers,
|
||||
start_year: b.start_year,
|
||||
total_volumes: if b.volume_count > 1 {
|
||||
Some(b.volume_count)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
cover_url: b.cover_url,
|
||||
external_url: b.external_url,
|
||||
confidence,
|
||||
metadata_json: b.metadata_json,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.truncate(10);
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
// First fetch the volume to get its series info
|
||||
let mut url = format!(
|
||||
"https://www.googleapis.com/books/v1/volumes/{}",
|
||||
external_id
|
||||
);
|
||||
if let Some(ref key) = config.api_key {
|
||||
url.push_str(&format!("?key={}", key));
|
||||
}
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Google Books request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Google Books returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let volume: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Google Books response: {e}"))?;
|
||||
|
||||
let volume_info = volume.get("volumeInfo").cloned().unwrap_or(serde_json::json!({}));
|
||||
let title = volume_info
|
||||
.get("title")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
// Search for more volumes in this series
|
||||
let series_name = extract_series_name(title);
|
||||
let search_query = format!("intitle:{}", series_name);
|
||||
let mut search_url = format!(
|
||||
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=40&printType=books&langRestrict={}",
|
||||
urlencoded(&search_query),
|
||||
urlencoded(&config.language),
|
||||
);
|
||||
if let Some(ref key) = config.api_key {
|
||||
search_url.push_str(&format!("&key={}", key));
|
||||
}
|
||||
|
||||
let resp = client
|
||||
.get(&search_url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Google Books search failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
// Return just the single volume as a book
|
||||
return Ok(vec![volume_to_book_candidate(&volume)]);
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse search response: {e}"))?;
|
||||
|
||||
let items = match data.get("items").and_then(|i| i.as_array()) {
|
||||
Some(items) => items,
|
||||
None => return Ok(vec![volume_to_book_candidate(&volume)]),
|
||||
};
|
||||
|
||||
let mut books: Vec<BookCandidate> = items
|
||||
.iter()
|
||||
.map(|item| volume_to_book_candidate(item))
|
||||
.collect();
|
||||
|
||||
// Sort by volume number
|
||||
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
|
||||
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn volume_to_book_candidate(item: &serde_json::Value) -> BookCandidate {
|
||||
let volume_info = item.get("volumeInfo").cloned().unwrap_or(serde_json::json!({}));
|
||||
let title = volume_info
|
||||
.get("title")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let authors: Vec<String> = volume_info
|
||||
.get("authors")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|v| v.as_str().map(String::from))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let isbn = volume_info
|
||||
.get("industryIdentifiers")
|
||||
.and_then(|ids| ids.as_array())
|
||||
.and_then(|arr| {
|
||||
arr.iter()
|
||||
.find(|id| {
|
||||
id.get("type")
|
||||
.and_then(|t| t.as_str())
|
||||
.map(|t| t == "ISBN_13" || t == "ISBN_10")
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.and_then(|id| id.get("identifier").and_then(|i| i.as_str()))
|
||||
})
|
||||
.map(String::from);
|
||||
let summary = volume_info
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(String::from);
|
||||
let cover_url = volume_info
|
||||
.get("imageLinks")
|
||||
.and_then(|il| il.get("thumbnail").or_else(|| il.get("smallThumbnail")))
|
||||
.and_then(|u| u.as_str())
|
||||
.map(|s| s.replace("http://", "https://"));
|
||||
let page_count = volume_info
|
||||
.get("pageCount")
|
||||
.and_then(|p| p.as_i64())
|
||||
.map(|p| p as i32);
|
||||
let language = volume_info
|
||||
.get("language")
|
||||
.and_then(|l| l.as_str())
|
||||
.map(String::from);
|
||||
let publish_date = volume_info
|
||||
.get("publishedDate")
|
||||
.and_then(|d| d.as_str())
|
||||
.map(String::from);
|
||||
let google_id = item
|
||||
.get("id")
|
||||
.and_then(|id| id.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let volume_number = extract_volume_number(&title);
|
||||
|
||||
BookCandidate {
|
||||
external_book_id: google_id,
|
||||
title,
|
||||
volume_number,
|
||||
authors,
|
||||
isbn,
|
||||
summary,
|
||||
cover_url,
|
||||
page_count,
|
||||
language,
|
||||
publish_date,
|
||||
metadata_json: serde_json::json!({}),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_series_name(title: &str) -> String {
|
||||
// Remove trailing volume indicators like "Vol. 1", "Tome 2", "#3", "- Volume 1"
|
||||
let re_patterns = [
|
||||
r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"\s*\(\d+\)\s*$",
|
||||
r"\s+\d+\s*$",
|
||||
];
|
||||
|
||||
let mut result = title.to_string();
|
||||
for pattern in &re_patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
let cleaned = re.replace(&result, "").to_string();
|
||||
if !cleaned.is_empty() {
|
||||
result = cleaned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.trim().to_string()
|
||||
}
|
||||
|
||||
fn extract_volume_number(title: &str) -> Option<i32> {
|
||||
let patterns = [
|
||||
r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)",
|
||||
r"\((\d+)\)\s*$",
|
||||
r"\b(\d+)\s*$",
|
||||
];
|
||||
|
||||
for pattern in &patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
if let Some(caps) = re.captures(title) {
|
||||
if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::<i32>().ok()) {
|
||||
return Some(num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_year(date: &str) -> Option<i32> {
|
||||
date.get(..4).and_then(|s| s.parse::<i32>().ok())
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.8
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
// Simple character overlap ratio
|
||||
let common: usize = query
|
||||
.chars()
|
||||
.filter(|c| title_lower.contains(*c))
|
||||
.count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
for byte in s.bytes() {
|
||||
match byte {
|
||||
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
||||
result.push(byte as char);
|
||||
}
|
||||
_ => {
|
||||
result.push_str(&format!("%{:02X}", byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
struct SeriesCandidateBuilder {
|
||||
title: String,
|
||||
authors: Vec<String>,
|
||||
description: Option<String>,
|
||||
publishers: Vec<String>,
|
||||
start_year: Option<i32>,
|
||||
volume_count: i32,
|
||||
cover_url: Option<String>,
|
||||
external_id: String,
|
||||
external_url: Option<String>,
|
||||
metadata_json: serde_json::Value,
|
||||
}
|
||||
81
apps/api/src/metadata_providers/mod.rs
Normal file
81
apps/api/src/metadata_providers/mod.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
pub mod anilist;
|
||||
pub mod bedetheque;
|
||||
pub mod comicvine;
|
||||
pub mod google_books;
|
||||
pub mod open_library;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Configuration passed to providers (API keys, etc.)
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ProviderConfig {
|
||||
pub api_key: Option<String>,
|
||||
/// Preferred language for metadata results (ISO 639-1: "en", "fr", "es"). Defaults to "en".
|
||||
pub language: String,
|
||||
}
|
||||
|
||||
/// A candidate series returned by a provider search
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SeriesCandidate {
|
||||
pub external_id: String,
|
||||
pub title: String,
|
||||
pub authors: Vec<String>,
|
||||
pub description: Option<String>,
|
||||
pub publishers: Vec<String>,
|
||||
pub start_year: Option<i32>,
|
||||
pub total_volumes: Option<i32>,
|
||||
pub cover_url: Option<String>,
|
||||
pub external_url: Option<String>,
|
||||
pub confidence: f32,
|
||||
pub metadata_json: serde_json::Value,
|
||||
}
|
||||
|
||||
/// A candidate book within a series
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BookCandidate {
|
||||
pub external_book_id: String,
|
||||
pub title: String,
|
||||
pub volume_number: Option<i32>,
|
||||
pub authors: Vec<String>,
|
||||
pub isbn: Option<String>,
|
||||
pub summary: Option<String>,
|
||||
pub cover_url: Option<String>,
|
||||
pub page_count: Option<i32>,
|
||||
pub language: Option<String>,
|
||||
pub publish_date: Option<String>,
|
||||
pub metadata_json: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Trait that all metadata providers must implement
|
||||
pub trait MetadataProvider: Send + Sync {
|
||||
#[allow(dead_code)]
|
||||
fn name(&self) -> &str;
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
>;
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
>;
|
||||
}
|
||||
|
||||
/// Factory function to get a provider by name
|
||||
pub fn get_provider(name: &str) -> Option<Box<dyn MetadataProvider>> {
|
||||
match name {
|
||||
"google_books" => Some(Box::new(google_books::GoogleBooksProvider)),
|
||||
"open_library" => Some(Box::new(open_library::OpenLibraryProvider)),
|
||||
"comicvine" => Some(Box::new(comicvine::ComicVineProvider)),
|
||||
"anilist" => Some(Box::new(anilist::AniListProvider)),
|
||||
"bedetheque" => Some(Box::new(bedetheque::BedethequeProvider)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
351
apps/api/src/metadata_providers/open_library.rs
Normal file
351
apps/api/src/metadata_providers/open_library.rs
Normal file
@@ -0,0 +1,351 @@
|
||||
use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate};
|
||||
|
||||
pub struct OpenLibraryProvider;
|
||||
|
||||
impl MetadataProvider for OpenLibraryProvider {
|
||||
fn name(&self) -> &str {
|
||||
"open_library"
|
||||
}
|
||||
|
||||
fn search_series(
|
||||
&self,
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<SeriesCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let query = query.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { search_series_impl(&query, &config).await })
|
||||
}
|
||||
|
||||
fn get_series_books(
|
||||
&self,
|
||||
external_id: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> std::pin::Pin<
|
||||
Box<dyn std::future::Future<Output = Result<Vec<BookCandidate>, String>> + Send + '_>,
|
||||
> {
|
||||
let external_id = external_id.to_string();
|
||||
let config = config.clone();
|
||||
Box::pin(async move { get_series_books_impl(&external_id, &config).await })
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_series_impl(
|
||||
query: &str,
|
||||
config: &ProviderConfig,
|
||||
) -> Result<Vec<SeriesCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
// Open Library uses 3-letter language codes
|
||||
let ol_lang = match config.language.as_str() {
|
||||
"fr" => "fre",
|
||||
"es" => "spa",
|
||||
_ => "eng",
|
||||
};
|
||||
|
||||
let url = format!(
|
||||
"https://openlibrary.org/search.json?title={}&limit=20&language={}",
|
||||
urlencoded(query),
|
||||
ol_lang,
|
||||
);
|
||||
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Open Library request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Open Library returned {status}: {text}"));
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Open Library response: {e}"))?;
|
||||
|
||||
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
||||
Some(docs) => docs,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
let mut series_map: std::collections::HashMap<String, SeriesCandidateBuilder> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for doc in docs {
|
||||
let title = doc
|
||||
.get("title")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let authors: Vec<String> = doc
|
||||
.get("author_name")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
let publishers: Vec<String> = doc
|
||||
.get("publisher")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| {
|
||||
let mut pubs: Vec<String> = arr.iter().filter_map(|v| v.as_str().map(String::from)).collect();
|
||||
pubs.truncate(3);
|
||||
pubs
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let first_publish_year = doc
|
||||
.get("first_publish_year")
|
||||
.and_then(|y| y.as_i64())
|
||||
.map(|y| y as i32);
|
||||
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
||||
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
||||
let key = doc
|
||||
.get("key")
|
||||
.and_then(|k| k.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let series_name = extract_series_name(&title);
|
||||
|
||||
let entry = series_map
|
||||
.entry(series_name.clone())
|
||||
.or_insert_with(|| SeriesCandidateBuilder {
|
||||
title: series_name.clone(),
|
||||
authors: vec![],
|
||||
description: None,
|
||||
publishers: vec![],
|
||||
start_year: None,
|
||||
volume_count: 0,
|
||||
cover_url: None,
|
||||
external_id: key.clone(),
|
||||
external_url: if key.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(format!("https://openlibrary.org{}", key))
|
||||
},
|
||||
});
|
||||
|
||||
entry.volume_count += 1;
|
||||
|
||||
for a in &authors {
|
||||
if !entry.authors.contains(a) {
|
||||
entry.authors.push(a.clone());
|
||||
}
|
||||
}
|
||||
for p in &publishers {
|
||||
if !entry.publishers.contains(p) {
|
||||
entry.publishers.push(p.clone());
|
||||
}
|
||||
}
|
||||
if entry.start_year.is_none() || first_publish_year.map_or(false, |y| entry.start_year.unwrap() > y) {
|
||||
if first_publish_year.is_some() {
|
||||
entry.start_year = first_publish_year;
|
||||
}
|
||||
}
|
||||
if entry.cover_url.is_none() {
|
||||
entry.cover_url = cover_url;
|
||||
}
|
||||
}
|
||||
|
||||
let mut candidates: Vec<SeriesCandidate> = series_map
|
||||
.into_values()
|
||||
.map(|b| {
|
||||
let confidence = compute_confidence(&b.title, &query_lower);
|
||||
SeriesCandidate {
|
||||
external_id: b.external_id,
|
||||
title: b.title,
|
||||
authors: b.authors,
|
||||
description: b.description,
|
||||
publishers: b.publishers,
|
||||
start_year: b.start_year,
|
||||
total_volumes: if b.volume_count > 1 { Some(b.volume_count) } else { None },
|
||||
cover_url: b.cover_url,
|
||||
external_url: b.external_url,
|
||||
confidence,
|
||||
metadata_json: serde_json::json!({}),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.truncate(10);
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
async fn get_series_books_impl(
|
||||
external_id: &str,
|
||||
_config: &ProviderConfig,
|
||||
) -> Result<Vec<BookCandidate>, String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.build()
|
||||
.map_err(|e| format!("failed to build HTTP client: {e}"))?;
|
||||
|
||||
// Fetch the work to get its title for series search
|
||||
let url = format!("https://openlibrary.org{}.json", external_id);
|
||||
let resp = client.get(&url).send().await.map_err(|e| format!("Open Library request failed: {e}"))?;
|
||||
|
||||
let work: serde_json::Value = if resp.status().is_success() {
|
||||
resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?
|
||||
} else {
|
||||
serde_json::json!({})
|
||||
};
|
||||
|
||||
let title = work.get("title").and_then(|t| t.as_str()).unwrap_or("");
|
||||
let series_name = extract_series_name(title);
|
||||
|
||||
// Search for editions of this series
|
||||
let search_url = format!(
|
||||
"https://openlibrary.org/search.json?title={}&limit=40",
|
||||
urlencoded(&series_name)
|
||||
);
|
||||
let resp = client.get(&search_url).send().await.map_err(|e| format!("Open Library search failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let data: serde_json::Value = resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?;
|
||||
let docs = match data.get("docs").and_then(|d| d.as_array()) {
|
||||
Some(docs) => docs,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let mut books: Vec<BookCandidate> = docs
|
||||
.iter()
|
||||
.map(|doc| {
|
||||
let title = doc.get("title").and_then(|t| t.as_str()).unwrap_or("").to_string();
|
||||
let authors: Vec<String> = doc
|
||||
.get("author_name")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
let isbn = doc
|
||||
.get("isbn")
|
||||
.and_then(|a| a.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let page_count = doc
|
||||
.get("number_of_pages_median")
|
||||
.and_then(|n| n.as_i64())
|
||||
.map(|n| n as i32);
|
||||
let cover_i = doc.get("cover_i").and_then(|c| c.as_i64());
|
||||
let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id));
|
||||
let language = doc
|
||||
.get("language")
|
||||
.and_then(|a| a.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let publish_date = doc
|
||||
.get("first_publish_year")
|
||||
.and_then(|y| y.as_i64())
|
||||
.map(|y| y.to_string());
|
||||
let key = doc.get("key").and_then(|k| k.as_str()).unwrap_or("").to_string();
|
||||
let volume_number = extract_volume_number(&title);
|
||||
|
||||
BookCandidate {
|
||||
external_book_id: key,
|
||||
title,
|
||||
volume_number,
|
||||
authors,
|
||||
isbn,
|
||||
summary: None,
|
||||
cover_url,
|
||||
page_count,
|
||||
language,
|
||||
publish_date,
|
||||
metadata_json: serde_json::json!({}),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
books.sort_by_key(|b| b.volume_number.unwrap_or(999));
|
||||
Ok(books)
|
||||
}
|
||||
|
||||
fn extract_series_name(title: &str) -> String {
|
||||
let re_patterns = [
|
||||
r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$",
|
||||
r"\s*\(\d+\)\s*$",
|
||||
r"\s+\d+\s*$",
|
||||
];
|
||||
let mut result = title.to_string();
|
||||
for pattern in &re_patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
let cleaned = re.replace(&result, "").to_string();
|
||||
if !cleaned.is_empty() {
|
||||
result = cleaned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
result.trim().to_string()
|
||||
}
|
||||
|
||||
fn extract_volume_number(title: &str) -> Option<i32> {
|
||||
let patterns = [
|
||||
r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)",
|
||||
r"\((\d+)\)\s*$",
|
||||
r"\b(\d+)\s*$",
|
||||
];
|
||||
for pattern in &patterns {
|
||||
if let Ok(re) = regex::Regex::new(pattern) {
|
||||
if let Some(caps) = re.captures(title) {
|
||||
if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::<i32>().ok()) {
|
||||
return Some(num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn compute_confidence(title: &str, query: &str) -> f32 {
|
||||
let title_lower = title.to_lowercase();
|
||||
if title_lower == query {
|
||||
1.0
|
||||
} else if title_lower.starts_with(query) || query.starts_with(&title_lower) {
|
||||
0.8
|
||||
} else if title_lower.contains(query) || query.contains(&title_lower) {
|
||||
0.7
|
||||
} else {
|
||||
let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count();
|
||||
let max_len = query.len().max(title_lower.len()).max(1);
|
||||
(common as f32 / max_len as f32).clamp(0.1, 0.6)
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
for byte in s.bytes() {
|
||||
match byte {
|
||||
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
|
||||
result.push(byte as char);
|
||||
}
|
||||
_ => result.push_str(&format!("%{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
struct SeriesCandidateBuilder {
|
||||
title: String,
|
||||
authors: Vec<String>,
|
||||
description: Option<String>,
|
||||
publishers: Vec<String>,
|
||||
start_year: Option<i32>,
|
||||
volume_count: i32,
|
||||
cover_url: Option<String>,
|
||||
external_id: String,
|
||||
external_url: Option<String>,
|
||||
}
|
||||
@@ -46,6 +46,13 @@ use utoipa::OpenApi;
|
||||
crate::settings::clear_cache,
|
||||
crate::settings::get_cache_stats,
|
||||
crate::settings::get_thumbnail_stats,
|
||||
crate::metadata::search_metadata,
|
||||
crate::metadata::create_metadata_match,
|
||||
crate::metadata::approve_metadata,
|
||||
crate::metadata::reject_metadata,
|
||||
crate::metadata::get_metadata_links,
|
||||
crate::metadata::get_missing_books,
|
||||
crate::metadata::delete_metadata_link,
|
||||
),
|
||||
components(
|
||||
schemas(
|
||||
@@ -94,6 +101,18 @@ use utoipa::OpenApi;
|
||||
crate::stats::LibraryStats,
|
||||
crate::stats::TopSeries,
|
||||
crate::stats::MonthlyAdditions,
|
||||
crate::metadata::ApproveRequest,
|
||||
crate::metadata::ApproveResponse,
|
||||
crate::metadata::SyncReport,
|
||||
crate::metadata::SeriesSyncReport,
|
||||
crate::metadata::BookSyncReport,
|
||||
crate::metadata::FieldChange,
|
||||
crate::metadata::MetadataSearchRequest,
|
||||
crate::metadata::SeriesCandidateDto,
|
||||
crate::metadata::MetadataMatchRequest,
|
||||
crate::metadata::ExternalMetadataLinkDto,
|
||||
crate::metadata::MissingBooksDto,
|
||||
crate::metadata::MissingBookItem,
|
||||
ErrorResponse,
|
||||
)
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user