From c9ccf5cd90d54b9d7e33f3d259039a21f83e68ec Mon Sep 17 00:00:00 2001 From: Froidefond Julien Date: Wed, 18 Mar 2026 14:59:24 +0100 Subject: [PATCH] feat: add external metadata sync system with multiple providers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a complete metadata synchronization system allowing users to search and sync series/book metadata from external providers (Google Books, Open Library, ComicVine, AniList, Bédéthèque). Each library can use a different provider. Matching requires manual approval with detailed sync reports showing what was updated or skipped (locked fields protection). Key changes: - DB migrations: external_metadata_links, external_book_metadata tables, library metadata_provider column, locked_fields, total_volumes, book metadata fields (summary, isbn, publish_date) - Rust API: MetadataProvider trait + 5 provider implementations, 7 metadata endpoints (search, match, approve, reject, links, missing, delete), sync report system, provider language preference support - Backoffice: MetadataSearchModal, ProviderIcon, SafeHtml components, settings UI for provider/language config, enriched book detail page, edit forms with locked fields support, API proxy routes - OpenAPI/Swagger documentation for all new endpoints and schemas Closes #3 Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 303 +++++ Cargo.toml | 1 + apps/api/Cargo.toml | 2 + apps/api/Dockerfile | 1 + apps/api/src/books.rs | 54 +- apps/api/src/libraries.rs | 70 +- apps/api/src/main.rs | 10 + apps/api/src/metadata.rs | 1010 +++++++++++++++++ apps/api/src/metadata_providers/anilist.rs | 322 ++++++ apps/api/src/metadata_providers/bedetheque.rs | 576 ++++++++++ apps/api/src/metadata_providers/comicvine.rs | 267 +++++ .../src/metadata_providers/google_books.rs | 472 ++++++++ apps/api/src/metadata_providers/mod.rs | 81 ++ .../src/metadata_providers/open_library.rs | 351 ++++++ apps/api/src/openapi.rs | 19 + .../libraries/[id]/metadata-provider/route.ts | 20 + .../app/api/metadata/approve/route.ts | 17 + .../app/api/metadata/links/route.ts | 35 + .../app/api/metadata/match/route.ts | 16 + .../app/api/metadata/missing/route.ts | 17 + .../app/api/metadata/reject/route.ts | 15 + .../app/api/metadata/search/route.ts | 16 + apps/backoffice/app/books/[id]/page.tsx | 326 +++--- apps/backoffice/app/books/page.tsx | 3 + .../app/components/EditBookForm.tsx | 126 +- .../app/components/EditSeriesForm.tsx | 97 +- .../app/components/LibraryActions.tsx | 56 +- .../app/components/MetadataSearchModal.tsx | 671 +++++++++++ .../app/components/ProviderIcon.tsx | 120 ++ apps/backoffice/app/components/SafeHtml.tsx | 29 + .../app/libraries/[id]/series/[name]/page.tsx | 23 +- apps/backoffice/app/libraries/page.tsx | 1 + apps/backoffice/app/settings/SettingsPage.tsx | 171 +++ apps/backoffice/lib/api.ts | 146 +++ apps/backoffice/package-lock.json | 188 ++- apps/backoffice/package.json | 4 +- apps/indexer/Dockerfile | 1 + .../migrations/0028_add_external_metadata.sql | 41 + .../0029_add_library_metadata_provider.sql | 1 + infra/migrations/0030_add_locked_fields.sql | 6 + ...1_add_total_volumes_to_series_metadata.sql | 1 + .../0032_add_book_metadata_fields.sql | 4 + 42 files changed, 5492 insertions(+), 198 deletions(-) create mode 100644 apps/api/src/metadata.rs create mode 100644 apps/api/src/metadata_providers/anilist.rs create mode 100644 apps/api/src/metadata_providers/bedetheque.rs create mode 100644 apps/api/src/metadata_providers/comicvine.rs create mode 100644 apps/api/src/metadata_providers/google_books.rs create mode 100644 apps/api/src/metadata_providers/mod.rs create mode 100644 apps/api/src/metadata_providers/open_library.rs create mode 100644 apps/backoffice/app/api/libraries/[id]/metadata-provider/route.ts create mode 100644 apps/backoffice/app/api/metadata/approve/route.ts create mode 100644 apps/backoffice/app/api/metadata/links/route.ts create mode 100644 apps/backoffice/app/api/metadata/match/route.ts create mode 100644 apps/backoffice/app/api/metadata/missing/route.ts create mode 100644 apps/backoffice/app/api/metadata/reject/route.ts create mode 100644 apps/backoffice/app/api/metadata/search/route.ts create mode 100644 apps/backoffice/app/components/MetadataSearchModal.tsx create mode 100644 apps/backoffice/app/components/ProviderIcon.tsx create mode 100644 apps/backoffice/app/components/SafeHtml.tsx create mode 100644 infra/migrations/0028_add_external_metadata.sql create mode 100644 infra/migrations/0029_add_library_metadata_provider.sql create mode 100644 infra/migrations/0030_add_locked_fields.sql create mode 100644 infra/migrations/0031_add_total_volumes_to_series_metadata.sql create mode 100644 infra/migrations/0032_add_book_metadata_fields.sql diff --git a/Cargo.lock b/Cargo.lock index 8f08a73..a0c96a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,19 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -65,7 +78,9 @@ dependencies = [ "lru", "parsers", "rand 0.8.5", + "regex", "reqwest", + "scraper", "serde", "serde_json", "sha2", @@ -463,6 +478,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "der" version = "0.7.10" @@ -483,6 +521,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_more" +version = "0.99.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -512,6 +561,21 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "ecb" version = "0.1.2" @@ -521,6 +585,12 @@ dependencies = [ "cipher", ] +[[package]] +name = "ego-tree" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c6ba7d4eec39eaa9ab24d44a0e73a7949a1095a8b3f3abb11eddf27dbb56a53" + [[package]] name = "either" version = "1.15.0" @@ -629,6 +699,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.32" @@ -728,6 +808,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -738,6 +827,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -855,6 +953,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "html5ever" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" +dependencies = [ + "log", + "mac", + "markup5ever", + "match_token", +] + [[package]] name = "http" version = "1.4.0" @@ -1406,6 +1516,37 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1496,6 +1637,12 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c" +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nom" version = "8.0.0" @@ -1690,6 +1837,58 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1793,6 +1992,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.37" @@ -2230,6 +2435,41 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0e749d29b2064585327af5038a5a8eb73aeebad4a3472e83531a436563f7208" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.27" @@ -2302,6 +2542,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2365,6 +2614,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "slab" version = "0.4.12" @@ -2613,6 +2868,31 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -2679,6 +2959,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "2.0.18" @@ -2991,6 +3282,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -3038,6 +3335,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16string" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 9678070..e9c2286 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,3 +41,4 @@ walkdir = "2.5" webp = "0.3" utoipa = "4.0" utoipa-swagger-ui = "6.0" +scraper = "0.21" diff --git a/apps/api/Cargo.toml b/apps/api/Cargo.toml index 7983517..d487bb6 100644 --- a/apps/api/Cargo.toml +++ b/apps/api/Cargo.toml @@ -19,6 +19,7 @@ stripstream-core = { path = "../../crates/core" } parsers = { path = "../../crates/parsers" } rand.workspace = true tokio-stream = "0.1" +regex = "1" reqwest.workspace = true serde.workspace = true serde_json.workspace = true @@ -33,3 +34,4 @@ uuid.workspace = true utoipa.workspace = true utoipa-swagger-ui = { workspace = true, features = ["axum"] } webp.workspace = true +scraper.workspace = true diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index 1338d5c..ec50b55 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -31,6 +31,7 @@ COPY crates/parsers/src crates/parsers/src RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/git \ --mount=type=cache,target=/app/target \ + touch apps/api/src/main.rs crates/core/src/lib.rs crates/parsers/src/lib.rs && \ cargo build --release -p api && \ cp /app/target/release/api /usr/local/bin/api diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs index 0e87b56..46b0109 100644 --- a/apps/api/src/books.rs +++ b/apps/api/src/books.rs @@ -84,6 +84,12 @@ pub struct BookDetails { pub reading_current_page: Option, #[schema(value_type = Option)] pub reading_last_read_at: Option>, + pub summary: Option, + pub isbn: Option, + pub publish_date: Option, + /// Fields locked from external metadata sync + #[serde(skip_serializing_if = "Option::is_none")] + pub locked_fields: Option, } /// List books with optional filtering and pagination @@ -249,7 +255,7 @@ pub async fn get_book( ) -> Result, ApiError> { let row = sqlx::query( r#" - SELECT b.id, b.library_id, b.kind, b.title, b.author, b.authors, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, + SELECT b.id, b.library_id, b.kind, b.title, b.author, b.authors, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.locked_fields, b.summary, b.isbn, b.publish_date, bf.abs_path, bf.format, bf.parse_status, COALESCE(brp.status, 'unread') AS reading_status, brp.current_page AS reading_current_page, @@ -290,6 +296,10 @@ pub async fn get_book( reading_status: row.get("reading_status"), reading_current_page: row.get("reading_current_page"), reading_last_read_at: row.get("reading_last_read_at"), + summary: row.get("summary"), + isbn: row.get("isbn"), + publish_date: row.get("publish_date"), + locked_fields: Some(row.get::("locked_fields")), })) } @@ -961,6 +971,12 @@ pub struct UpdateBookRequest { pub series: Option, pub volume: Option, pub language: Option, + pub summary: Option, + pub isbn: Option, + pub publish_date: Option, + /// Fields locked from external metadata sync + #[serde(default)] + pub locked_fields: Option, } /// Update metadata for a specific book @@ -996,12 +1012,18 @@ pub async fn update_book( let series = body.series.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); let language = body.language.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); + let summary = body.summary.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); + let isbn = body.isbn.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); + let publish_date = body.publish_date.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); + let locked_fields = body.locked_fields.clone().unwrap_or(serde_json::json!({})); let row = sqlx::query( r#" UPDATE books - SET title = $2, author = $3, authors = $4, series = $5, volume = $6, language = $7, updated_at = NOW() + SET title = $2, author = $3, authors = $4, series = $5, volume = $6, language = $7, + summary = $8, isbn = $9, publish_date = $10, locked_fields = $11, updated_at = NOW() WHERE id = $1 RETURNING id, library_id, kind, title, author, authors, series, volume, language, page_count, thumbnail_path, + summary, isbn, publish_date, COALESCE((SELECT status FROM book_reading_progress WHERE book_id = $1), 'unread') AS reading_status, (SELECT current_page FROM book_reading_progress WHERE book_id = $1) AS reading_current_page, (SELECT last_read_at FROM book_reading_progress WHERE book_id = $1) AS reading_last_read_at @@ -1014,6 +1036,10 @@ pub async fn update_book( .bind(&series) .bind(body.volume) .bind(&language) + .bind(&summary) + .bind(&isbn) + .bind(&publish_date) + .bind(&locked_fields) .fetch_optional(&state.pool) .await?; @@ -1038,6 +1064,10 @@ pub async fn update_book( reading_status: row.get("reading_status"), reading_current_page: row.get("reading_current_page"), reading_last_read_at: row.get("reading_last_read_at"), + summary: row.get("summary"), + isbn: row.get("isbn"), + publish_date: row.get("publish_date"), + locked_fields: Some(locked_fields), })) } @@ -1048,9 +1078,12 @@ pub struct SeriesMetadata { pub description: Option, pub publishers: Vec, pub start_year: Option, + pub total_volumes: Option, /// Convenience: author from first book (for pre-filling the per-book apply section) pub book_author: Option, pub book_language: Option, + /// Fields locked from external metadata sync, e.g. {"authors": true, "description": true} + pub locked_fields: serde_json::Value, } /// Get metadata for a specific series @@ -1087,7 +1120,7 @@ pub async fn get_series_metadata( }; let meta_row = sqlx::query( - "SELECT authors, description, publishers, start_year FROM series_metadata WHERE library_id = $1 AND name = $2" + "SELECT authors, description, publishers, start_year, total_volumes, locked_fields FROM series_metadata WHERE library_id = $1 AND name = $2" ) .bind(library_id) .bind(&name) @@ -1099,8 +1132,10 @@ pub async fn get_series_metadata( description: meta_row.as_ref().and_then(|r| r.get("description")), publishers: meta_row.as_ref().map(|r| r.get::, _>("publishers")).unwrap_or_default(), start_year: meta_row.as_ref().and_then(|r| r.get("start_year")), + total_volumes: meta_row.as_ref().and_then(|r| r.get("total_volumes")), book_author: books_row.as_ref().and_then(|r| r.get("author")), book_language: books_row.as_ref().and_then(|r| r.get("language")), + locked_fields: meta_row.as_ref().map(|r| r.get::("locked_fields")).unwrap_or(serde_json::json!({})), })) } @@ -1122,6 +1157,10 @@ pub struct UpdateSeriesRequest { #[serde(default)] pub publishers: Vec, pub start_year: Option, + pub total_volumes: Option, + /// Fields locked from external metadata sync + #[serde(default)] + pub locked_fields: Option, } #[derive(Serialize, ToSchema)] @@ -1214,15 +1253,18 @@ pub async fn update_series( .map(|a| a.trim().to_string()) .filter(|a| !a.is_empty()) .collect(); + let locked_fields = body.locked_fields.clone().unwrap_or(serde_json::json!({})); sqlx::query( r#" - INSERT INTO series_metadata (library_id, name, authors, description, publishers, start_year, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, NOW()) + INSERT INTO series_metadata (library_id, name, authors, description, publishers, start_year, total_volumes, locked_fields, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW()) ON CONFLICT (library_id, name) DO UPDATE SET authors = EXCLUDED.authors, description = EXCLUDED.description, publishers = EXCLUDED.publishers, start_year = EXCLUDED.start_year, + total_volumes = EXCLUDED.total_volumes, + locked_fields = EXCLUDED.locked_fields, updated_at = NOW() "# ) @@ -1232,6 +1274,8 @@ pub async fn update_series( .bind(&description) .bind(&publishers) .bind(body.start_year) + .bind(body.total_volumes) + .bind(&locked_fields) .execute(&state.pool) .await?; diff --git a/apps/api/src/libraries.rs b/apps/api/src/libraries.rs index 8d59f52..c44bbf6 100644 --- a/apps/api/src/libraries.rs +++ b/apps/api/src/libraries.rs @@ -21,6 +21,7 @@ pub struct LibraryResponse { #[schema(value_type = Option)] pub next_scan_at: Option>, pub watcher_enabled: bool, + pub metadata_provider: Option, } #[derive(Deserialize, ToSchema)] @@ -45,8 +46,8 @@ pub struct CreateLibraryRequest { )] pub async fn list_libraries(State(state): State) -> Result>, ApiError> { let rows = sqlx::query( - "SELECT l.id, l.name, l.root_path, l.enabled, l.monitor_enabled, l.scan_mode, l.next_scan_at, l.watcher_enabled, - (SELECT COUNT(*) FROM books b WHERE b.library_id = l.id) as book_count + "SELECT l.id, l.name, l.root_path, l.enabled, l.monitor_enabled, l.scan_mode, l.next_scan_at, l.watcher_enabled, l.metadata_provider, + (SELECT COUNT(*) FROM books b WHERE b.library_id = l.id) as book_count FROM libraries l ORDER BY l.created_at DESC" ) .fetch_all(&state.pool) @@ -64,6 +65,7 @@ pub async fn list_libraries(State(state): State) -> Result, +} + +/// Update the metadata provider for a library +#[utoipa::path( + patch, + path = "/libraries/{id}/metadata-provider", + tag = "libraries", + params( + ("id" = String, Path, description = "Library UUID"), + ), + request_body = UpdateMetadataProviderRequest, + responses( + (status = 200, body = LibraryResponse), + (status = 404, description = "Library not found"), + (status = 401, description = "Unauthorized"), + (status = 403, description = "Forbidden - Admin scope required"), + ), + security(("Bearer" = [])) +)] +pub async fn update_metadata_provider( + State(state): State, + AxumPath(library_id): AxumPath, + Json(input): Json, +) -> Result, ApiError> { + let provider = input.metadata_provider.as_deref().filter(|s| !s.is_empty()); + + let result = sqlx::query( + "UPDATE libraries SET metadata_provider = $2 WHERE id = $1 RETURNING id, name, root_path, enabled, monitor_enabled, scan_mode, next_scan_at, watcher_enabled, metadata_provider" + ) + .bind(library_id) + .bind(provider) + .fetch_optional(&state.pool) + .await?; + + let Some(row) = result else { + return Err(ApiError::not_found("library not found")); + }; + + let book_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM books WHERE library_id = $1") + .bind(library_id) + .fetch_one(&state.pool) + .await?; + + Ok(Json(LibraryResponse { + id: row.get("id"), + name: row.get("name"), + root_path: row.get("root_path"), + enabled: row.get("enabled"), + book_count, + monitor_enabled: row.get("monitor_enabled"), + scan_mode: row.get("scan_mode"), + next_scan_at: row.get("next_scan_at"), + watcher_enabled: row.get("watcher_enabled"), + metadata_provider: row.get("metadata_provider"), })) } diff --git a/apps/api/src/main.rs b/apps/api/src/main.rs index 229e0f9..93a769f 100644 --- a/apps/api/src/main.rs +++ b/apps/api/src/main.rs @@ -5,6 +5,8 @@ mod handlers; mod index_jobs; mod komga; mod libraries; +mod metadata; +mod metadata_providers; mod api_middleware; mod openapi; mod pages; @@ -83,6 +85,7 @@ async fn main() -> anyhow::Result<()> { .route("/libraries/:id", delete(libraries::delete_library)) .route("/libraries/:id/scan", axum::routing::post(libraries::scan_library)) .route("/libraries/:id/monitoring", axum::routing::patch(libraries::update_monitoring)) + .route("/libraries/:id/metadata-provider", axum::routing::patch(libraries::update_metadata_provider)) .route("/books/:id", axum::routing::patch(books::update_book)) .route("/books/:id/convert", axum::routing::post(books::convert_book)) .route("/libraries/:library_id/series/:name", axum::routing::patch(books::update_series)) @@ -102,6 +105,13 @@ async fn main() -> anyhow::Result<()> { .route("/komga/sync", axum::routing::post(komga::sync_komga_read_books)) .route("/komga/reports", get(komga::list_sync_reports)) .route("/komga/reports/:id", get(komga::get_sync_report)) + .route("/metadata/search", axum::routing::post(metadata::search_metadata)) + .route("/metadata/match", axum::routing::post(metadata::create_metadata_match)) + .route("/metadata/approve/:id", axum::routing::post(metadata::approve_metadata)) + .route("/metadata/reject/:id", axum::routing::post(metadata::reject_metadata)) + .route("/metadata/links", get(metadata::get_metadata_links)) + .route("/metadata/missing/:id", get(metadata::get_missing_books)) + .route("/metadata/links/:id", delete(metadata::delete_metadata_link)) .merge(settings::settings_routes()) .route_layer(middleware::from_fn_with_state( state.clone(), diff --git a/apps/api/src/metadata.rs b/apps/api/src/metadata.rs new file mode 100644 index 0000000..c9f9e8c --- /dev/null +++ b/apps/api/src/metadata.rs @@ -0,0 +1,1010 @@ +use axum::{ + extract::{Path as AxumPath, Query, State}, + Json, +}; +use serde::{Deserialize, Serialize}; +use sqlx::Row; +use uuid::Uuid; +use utoipa::ToSchema; + +use crate::{error::ApiError, metadata_providers, state::AppState}; + +// --------------------------------------------------------------------------- +// DTOs +// --------------------------------------------------------------------------- + +#[derive(Deserialize, ToSchema)] +pub struct MetadataSearchRequest { + pub library_id: String, + pub series_name: String, + /// Optional provider override (defaults to library/global setting) + pub provider: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct SeriesCandidateDto { + pub provider: String, + pub external_id: String, + pub title: String, + pub authors: Vec, + pub description: Option, + pub publishers: Vec, + pub start_year: Option, + pub total_volumes: Option, + pub cover_url: Option, + pub external_url: Option, + pub confidence: f32, + pub metadata_json: serde_json::Value, +} + +#[derive(Deserialize, ToSchema)] +#[allow(dead_code)] +pub struct MetadataMatchRequest { + pub library_id: String, + pub series_name: String, + pub provider: String, + pub external_id: String, + pub external_url: Option, + pub confidence: Option, + pub title: String, + pub metadata_json: serde_json::Value, + pub total_volumes: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct ExternalMetadataLinkDto { + #[schema(value_type = String)] + pub id: Uuid, + #[schema(value_type = String)] + pub library_id: Uuid, + pub series_name: String, + pub provider: String, + pub external_id: String, + pub external_url: Option, + pub status: String, + pub confidence: Option, + pub metadata_json: serde_json::Value, + pub total_volumes_external: Option, + pub matched_at: String, + pub approved_at: Option, + pub synced_at: Option, +} + +#[derive(Deserialize, ToSchema)] +pub struct ApproveRequest { + #[serde(default)] + pub sync_series: bool, + #[serde(default)] + pub sync_books: bool, +} + +#[derive(Serialize, ToSchema)] +pub struct FieldChange { + pub field: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub old_value: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub new_value: Option, +} + +#[derive(Serialize, ToSchema, Default)] +pub struct SeriesSyncReport { + pub fields_updated: Vec, + pub fields_skipped: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct BookSyncReport { + #[schema(value_type = String)] + pub book_id: Uuid, + pub title: String, + pub volume: Option, + pub fields_updated: Vec, + pub fields_skipped: Vec, +} + +#[derive(Serialize, ToSchema, Default)] +pub struct SyncReport { + pub series: Option, + pub books: Vec, + pub books_matched: i64, + pub books_unmatched: i64, +} + +#[derive(Serialize, ToSchema)] +pub struct ApproveResponse { + pub status: String, + pub report: SyncReport, +} + +#[derive(Serialize, ToSchema)] +pub struct MissingBooksDto { + pub total_external: i64, + pub total_local: i64, + pub missing_count: i64, + pub missing_books: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct MissingBookItem { + pub title: Option, + pub volume_number: Option, + pub external_book_id: Option, +} + +#[derive(Deserialize)] +pub struct MetadataLinkQuery { + pub library_id: Option, + pub series_name: Option, +} + +// --------------------------------------------------------------------------- +// POST /metadata/search +// --------------------------------------------------------------------------- + +#[utoipa::path( + post, + path = "/metadata/search", + tag = "metadata", + request_body = MetadataSearchRequest, + responses( + (status = 200, body = Vec), + (status = 400, description = "Bad request"), + (status = 500, description = "Provider error"), + ), + security(("Bearer" = [])) +)] +pub async fn search_metadata( + State(state): State, + Json(body): Json, +) -> Result>, ApiError> { + let library_id: Uuid = body + .library_id + .parse() + .map_err(|_| ApiError::bad_request("invalid library_id"))?; + + if body.series_name.trim().is_empty() { + return Err(ApiError::bad_request("series_name is required")); + } + + // Determine provider: explicit override → library-level → global setting → default + let provider_name = if let Some(ref p) = body.provider { + if !p.is_empty() { p.clone() } else { get_provider_for_library(&state, library_id).await? } + } else { + get_provider_for_library(&state, library_id).await? + }; + + // Fall back to google_books if the configured provider isn't implemented yet + let provider = metadata_providers::get_provider(&provider_name) + .or_else(|| metadata_providers::get_provider("google_books")) + .ok_or_else(|| ApiError::bad_request(format!("unknown provider: {provider_name}")))?; + + let provider_config = load_provider_config(&state, &provider_name).await; + + let candidates = provider + .search_series(&body.series_name, &provider_config) + .await + .map_err(|e| ApiError::internal(format!("provider error: {e}")))?; + + let actual_provider = provider.name().to_string(); + let dtos: Vec = candidates + .into_iter() + .map(|c| SeriesCandidateDto { + provider: actual_provider.clone(), + external_id: c.external_id, + title: c.title, + authors: c.authors, + description: c.description, + publishers: c.publishers, + start_year: c.start_year, + total_volumes: c.total_volumes, + cover_url: c.cover_url, + external_url: c.external_url, + confidence: c.confidence, + metadata_json: c.metadata_json, + }) + .collect(); + + Ok(Json(dtos)) +} + +// --------------------------------------------------------------------------- +// POST /metadata/match +// --------------------------------------------------------------------------- + +#[utoipa::path( + post, + path = "/metadata/match", + tag = "metadata", + request_body = MetadataMatchRequest, + responses( + (status = 200, body = ExternalMetadataLinkDto), + (status = 400, description = "Bad request"), + ), + security(("Bearer" = [])) +)] +pub async fn create_metadata_match( + State(state): State, + Json(body): Json, +) -> Result, ApiError> { + let library_id: Uuid = body + .library_id + .parse() + .map_err(|_| ApiError::bad_request("invalid library_id"))?; + + let row = sqlx::query( + r#" + INSERT INTO external_metadata_links + (library_id, series_name, provider, external_id, external_url, status, confidence, metadata_json, total_volumes_external) + VALUES ($1, $2, $3, $4, $5, 'pending', $6, $7, $8) + ON CONFLICT (library_id, series_name, provider) + DO UPDATE SET + external_id = EXCLUDED.external_id, + external_url = EXCLUDED.external_url, + status = 'pending', + confidence = EXCLUDED.confidence, + metadata_json = EXCLUDED.metadata_json, + total_volumes_external = EXCLUDED.total_volumes_external, + matched_at = NOW(), + updated_at = NOW(), + approved_at = NULL, + synced_at = NULL + RETURNING id, library_id, series_name, provider, external_id, external_url, status, confidence, + metadata_json, total_volumes_external, matched_at, approved_at, synced_at + "#, + ) + .bind(library_id) + .bind(&body.series_name) + .bind(&body.provider) + .bind(&body.external_id) + .bind(&body.external_url) + .bind(body.confidence) + .bind(&body.metadata_json) + .bind(body.total_volumes) + .fetch_one(&state.pool) + .await?; + + Ok(Json(row_to_link_dto(&row))) +} + +// --------------------------------------------------------------------------- +// POST /metadata/approve/:id +// --------------------------------------------------------------------------- + +#[utoipa::path( + post, + path = "/metadata/approve/{id}", + tag = "metadata", + params(("id" = String, Path, description = "Link UUID")), + request_body = ApproveRequest, + responses( + (status = 200, body = ApproveResponse), + (status = 404, description = "Link not found"), + ), + security(("Bearer" = [])) +)] +pub async fn approve_metadata( + State(state): State, + AxumPath(id): AxumPath, + Json(body): Json, +) -> Result, ApiError> { + // Update status to approved + let result = sqlx::query( + r#" + UPDATE external_metadata_links + SET status = 'approved', approved_at = NOW(), updated_at = NOW() + WHERE id = $1 + RETURNING library_id, series_name, provider, external_id, metadata_json, total_volumes_external + "#, + ) + .bind(id) + .fetch_optional(&state.pool) + .await?; + + let row = result.ok_or_else(|| ApiError::not_found("link not found"))?; + + let library_id: Uuid = row.get("library_id"); + let series_name: String = row.get("series_name"); + + // Reject any other approved links for the same series (only one active link per series) + // Also clean up their external_book_metadata + let old_link_ids: Vec = sqlx::query_scalar( + r#" + UPDATE external_metadata_links + SET status = 'rejected', updated_at = NOW() + WHERE library_id = $1 AND series_name = $2 AND id != $3 AND status = 'approved' + RETURNING id + "#, + ) + .bind(library_id) + .bind(&series_name) + .bind(id) + .fetch_all(&state.pool) + .await?; + + if !old_link_ids.is_empty() { + sqlx::query("DELETE FROM external_book_metadata WHERE link_id = ANY($1)") + .bind(&old_link_ids) + .execute(&state.pool) + .await?; + } + + let provider_name: String = row.get("provider"); + let external_id: String = row.get("external_id"); + let metadata_json: serde_json::Value = row.get("metadata_json"); + let total_volumes_external: Option = row.get("total_volumes_external"); + + let mut report = SyncReport::default(); + + // Sync series metadata if requested + if body.sync_series { + report.series = Some( + sync_series_metadata(&state, library_id, &series_name, &metadata_json, total_volumes_external).await? + ); + } + + // Sync books if requested + if body.sync_books { + let (matched, book_reports, unmatched) = + sync_books_metadata(&state, id, library_id, &series_name, &provider_name, &external_id) + .await?; + report.books_matched = matched; + report.books = book_reports; + report.books_unmatched = unmatched; + + // Update synced_at + sqlx::query("UPDATE external_metadata_links SET synced_at = NOW(), updated_at = NOW() WHERE id = $1") + .bind(id) + .execute(&state.pool) + .await?; + } + + Ok(Json(ApproveResponse { + status: "approved".to_string(), + report, + })) +} + +// --------------------------------------------------------------------------- +// POST /metadata/reject/:id +// --------------------------------------------------------------------------- + +#[utoipa::path( + post, + path = "/metadata/reject/{id}", + tag = "metadata", + params(("id" = String, Path, description = "Link UUID")), + responses( + (status = 200, description = "Rejected"), + (status = 404, description = "Link not found"), + ), + security(("Bearer" = [])) +)] +pub async fn reject_metadata( + State(state): State, + AxumPath(id): AxumPath, +) -> Result, ApiError> { + let result = sqlx::query( + "UPDATE external_metadata_links SET status = 'rejected', updated_at = NOW() WHERE id = $1", + ) + .bind(id) + .execute(&state.pool) + .await?; + + if result.rows_affected() == 0 { + return Err(ApiError::not_found("link not found")); + } + + Ok(Json(serde_json::json!({"status": "rejected"}))) +} + +// --------------------------------------------------------------------------- +// GET /metadata/links +// --------------------------------------------------------------------------- + +#[utoipa::path( + get, + path = "/metadata/links", + tag = "metadata", + params( + ("library_id" = Option, Query, description = "Library UUID"), + ("series_name" = Option, Query, description = "Series name"), + ), + responses( + (status = 200, body = Vec), + ), + security(("Bearer" = [])) +)] +pub async fn get_metadata_links( + State(state): State, + Query(query): Query, +) -> Result>, ApiError> { + let library_id: Option = query + .library_id + .as_deref() + .and_then(|s| s.parse().ok()); + + let rows = sqlx::query( + r#" + SELECT id, library_id, series_name, provider, external_id, external_url, status, confidence, + metadata_json, total_volumes_external, matched_at, approved_at, synced_at + FROM external_metadata_links + WHERE ($1::uuid IS NULL OR library_id = $1) + AND ($2::text IS NULL OR series_name = $2) + ORDER BY updated_at DESC + "#, + ) + .bind(library_id) + .bind(query.series_name.as_deref()) + .fetch_all(&state.pool) + .await?; + + let links: Vec = rows.iter().map(row_to_link_dto).collect(); + + Ok(Json(links)) +} + +// --------------------------------------------------------------------------- +// GET /metadata/missing/:id +// --------------------------------------------------------------------------- + +#[utoipa::path( + get, + path = "/metadata/missing/{id}", + tag = "metadata", + params(("id" = String, Path, description = "Link UUID")), + responses( + (status = 200, body = MissingBooksDto), + (status = 404, description = "Link not found"), + ), + security(("Bearer" = [])) +)] +pub async fn get_missing_books( + State(state): State, + AxumPath(id): AxumPath, +) -> Result, ApiError> { + // Verify link exists + let link = sqlx::query( + "SELECT library_id, series_name FROM external_metadata_links WHERE id = $1", + ) + .bind(id) + .fetch_optional(&state.pool) + .await? + .ok_or_else(|| ApiError::not_found("link not found"))?; + + let library_id: Uuid = link.get("library_id"); + let series_name: String = link.get("series_name"); + + // Count external books + let total_external: i64 = + sqlx::query_scalar("SELECT COUNT(*) FROM external_book_metadata WHERE link_id = $1") + .bind(id) + .fetch_one(&state.pool) + .await?; + + // Count local books + let total_local: i64 = sqlx::query_scalar( + "SELECT COUNT(*) FROM books WHERE library_id = $1 AND COALESCE(NULLIF(series, ''), 'unclassified') = $2", + ) + .bind(library_id) + .bind(&series_name) + .fetch_one(&state.pool) + .await?; + + // Get unmatched external books (no book_id link) + let missing_rows = sqlx::query( + r#" + SELECT title, volume_number, external_book_id + FROM external_book_metadata + WHERE link_id = $1 AND book_id IS NULL + ORDER BY volume_number NULLS LAST + "#, + ) + .bind(id) + .fetch_all(&state.pool) + .await?; + + let missing_books: Vec = missing_rows + .iter() + .map(|row| MissingBookItem { + title: row.get("title"), + volume_number: row.get("volume_number"), + external_book_id: row.get("external_book_id"), + }) + .collect(); + + let missing_count = missing_books.len() as i64; + + Ok(Json(MissingBooksDto { + total_external, + total_local, + missing_count, + missing_books, + })) +} + +// --------------------------------------------------------------------------- +// DELETE /metadata/links/:id +// --------------------------------------------------------------------------- + +#[utoipa::path( + delete, + path = "/metadata/links/{id}", + tag = "metadata", + params(("id" = String, Path, description = "Link UUID")), + responses( + (status = 200, description = "Deleted"), + (status = 404, description = "Link not found"), + ), + security(("Bearer" = [])) +)] +pub async fn delete_metadata_link( + State(state): State, + AxumPath(id): AxumPath, +) -> Result, ApiError> { + let result = sqlx::query("DELETE FROM external_metadata_links WHERE id = $1") + .bind(id) + .execute(&state.pool) + .await?; + + if result.rows_affected() == 0 { + return Err(ApiError::not_found("link not found")); + } + + Ok(Json(serde_json::json!({"deleted": true, "id": id.to_string()}))) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn row_to_link_dto(row: &sqlx::postgres::PgRow) -> ExternalMetadataLinkDto { + let matched_at: chrono::DateTime = row.get("matched_at"); + let approved_at: Option> = row.get("approved_at"); + let synced_at: Option> = row.get("synced_at"); + + ExternalMetadataLinkDto { + id: row.get("id"), + library_id: row.get("library_id"), + series_name: row.get("series_name"), + provider: row.get("provider"), + external_id: row.get("external_id"), + external_url: row.get("external_url"), + status: row.get("status"), + confidence: row.get("confidence"), + metadata_json: row.get("metadata_json"), + total_volumes_external: row.get("total_volumes_external"), + matched_at: matched_at.to_rfc3339(), + approved_at: approved_at.map(|d| d.to_rfc3339()), + synced_at: synced_at.map(|d| d.to_rfc3339()), + } +} + +async fn get_provider_for_library(state: &AppState, library_id: Uuid) -> Result { + // Check library-level provider first + let row = sqlx::query("SELECT metadata_provider FROM libraries WHERE id = $1") + .bind(library_id) + .fetch_optional(&state.pool) + .await? + .ok_or_else(|| ApiError::not_found("library not found"))?; + + let lib_provider: Option = row.get("metadata_provider"); + if let Some(p) = lib_provider { + if !p.is_empty() { + return Ok(p); + } + } + + // Fall back to global setting + let global = sqlx::query("SELECT value FROM app_settings WHERE key = 'metadata_providers'") + .fetch_optional(&state.pool) + .await?; + + if let Some(row) = global { + let value: serde_json::Value = row.get("value"); + if let Some(default) = value.get("default_provider").and_then(|v| v.as_str()) { + if !default.is_empty() { + return Ok(default.to_string()); + } + } + } + + // Default to google_books + Ok("google_books".to_string()) +} + +async fn load_provider_config( + state: &AppState, + provider_name: &str, +) -> metadata_providers::ProviderConfig { + let mut config = metadata_providers::ProviderConfig { + language: "en".to_string(), + ..Default::default() + }; + + if let Ok(Some(row)) = + sqlx::query("SELECT value FROM app_settings WHERE key = 'metadata_providers'") + .fetch_optional(&state.pool) + .await + { + let value: serde_json::Value = row.get("value"); + if let Some(api_key) = value + .get(provider_name) + .and_then(|p| p.get("api_key")) + .and_then(|k| k.as_str()) + { + if !api_key.is_empty() { + config.api_key = Some(api_key.to_string()); + } + } + // Load preferred language (fallback: "en") + if let Some(lang) = value + .get("metadata_language") + .and_then(|l| l.as_str()) + { + if !lang.is_empty() { + config.language = lang.to_string(); + } + } + } + + config +} + +async fn sync_series_metadata( + state: &AppState, + library_id: Uuid, + series_name: &str, + metadata_json: &serde_json::Value, + total_volumes: Option, +) -> Result { + let description = metadata_json + .get("description") + .and_then(|d| d.as_str()); + let authors: Vec = metadata_json + .get("authors") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + let publishers: Vec = metadata_json + .get("publishers") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + let start_year = metadata_json + .get("start_year") + .and_then(|y| y.as_i64()) + .map(|y| y as i32); + + // Fetch existing state before upsert + let existing = sqlx::query( + r#"SELECT description, publishers, start_year, total_volumes, authors, locked_fields + FROM series_metadata WHERE library_id = $1 AND name = $2"#, + ) + .bind(library_id) + .bind(series_name) + .fetch_optional(&state.pool) + .await?; + + // Respect locked_fields: only update fields that are NOT locked + sqlx::query( + r#" + INSERT INTO series_metadata (library_id, name, description, publishers, start_year, total_volumes, authors, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW()) + ON CONFLICT (library_id, name) + DO UPDATE SET + description = CASE + WHEN (series_metadata.locked_fields->>'description')::boolean IS TRUE THEN series_metadata.description + ELSE COALESCE(NULLIF(EXCLUDED.description, ''), series_metadata.description) + END, + publishers = CASE + WHEN (series_metadata.locked_fields->>'publishers')::boolean IS TRUE THEN series_metadata.publishers + WHEN array_length(EXCLUDED.publishers, 1) > 0 THEN EXCLUDED.publishers + ELSE series_metadata.publishers + END, + start_year = CASE + WHEN (series_metadata.locked_fields->>'start_year')::boolean IS TRUE THEN series_metadata.start_year + ELSE COALESCE(EXCLUDED.start_year, series_metadata.start_year) + END, + total_volumes = CASE + WHEN (series_metadata.locked_fields->>'total_volumes')::boolean IS TRUE THEN series_metadata.total_volumes + ELSE COALESCE(EXCLUDED.total_volumes, series_metadata.total_volumes) + END, + authors = CASE + WHEN (series_metadata.locked_fields->>'authors')::boolean IS TRUE THEN series_metadata.authors + WHEN array_length(EXCLUDED.authors, 1) > 0 THEN EXCLUDED.authors + ELSE series_metadata.authors + END, + updated_at = NOW() + "#, + ) + .bind(library_id) + .bind(series_name) + .bind(description) + .bind(&publishers) + .bind(start_year) + .bind(total_volumes) + .bind(&authors) + .execute(&state.pool) + .await?; + + // Build report + let mut report = SeriesSyncReport::default(); + let locked = existing + .as_ref() + .map(|r| r.get::("locked_fields")) + .unwrap_or(serde_json::json!({})); + let is_locked = |field: &str| -> bool { + locked.get(field).and_then(|v| v.as_bool()).unwrap_or(false) + }; + + // Helper: compare and record field changes + struct FieldDef { + name: &'static str, + old: Option, + new: Option, + } + + let fields = vec![ + FieldDef { + name: "description", + old: existing.as_ref().and_then(|r| r.get::, _>("description")).map(|s| serde_json::Value::String(s)), + new: description.map(|s| serde_json::Value::String(s.to_string())), + }, + FieldDef { + name: "authors", + old: existing.as_ref().map(|r| serde_json::json!(r.get::, _>("authors"))), + new: if authors.is_empty() { None } else { Some(serde_json::json!(authors)) }, + }, + FieldDef { + name: "publishers", + old: existing.as_ref().map(|r| serde_json::json!(r.get::, _>("publishers"))), + new: if publishers.is_empty() { None } else { Some(serde_json::json!(publishers)) }, + }, + FieldDef { + name: "start_year", + old: existing.as_ref().and_then(|r| r.get::, _>("start_year")).map(|y| serde_json::json!(y)), + new: start_year.map(|y| serde_json::json!(y)), + }, + FieldDef { + name: "total_volumes", + old: existing.as_ref().and_then(|r| r.get::, _>("total_volumes")).map(|y| serde_json::json!(y)), + new: total_volumes.map(|y| serde_json::json!(y)), + }, + ]; + + for f in fields { + // Skip if no new value to apply + if f.new.is_none() { + continue; + } + let change = FieldChange { + field: f.name.to_string(), + old_value: f.old.clone(), + new_value: f.new.clone(), + }; + if is_locked(f.name) { + report.fields_skipped.push(change); + } else if f.old != f.new { + report.fields_updated.push(change); + } + } + + Ok(report) +} + +async fn sync_books_metadata( + state: &AppState, + link_id: Uuid, + library_id: Uuid, + series_name: &str, + provider_name: &str, + external_id: &str, +) -> Result<(i64, Vec, i64), ApiError> { + let provider = metadata_providers::get_provider(provider_name) + .or_else(|| metadata_providers::get_provider("google_books")) + .ok_or_else(|| ApiError::internal(format!("unknown provider: {provider_name}")))?; + + let provider_config = load_provider_config(state, provider_name).await; + + let books = provider + .get_series_books(external_id, &provider_config) + .await + .map_err(|e| ApiError::internal(format!("provider error: {e}")))?; + + // Delete existing book metadata for this link + sqlx::query("DELETE FROM external_book_metadata WHERE link_id = $1") + .bind(link_id) + .execute(&state.pool) + .await?; + + let mut matched_count: i64 = 0; + let mut book_reports: Vec = Vec::new(); + + for book in &books { + // Try to match with local book by volume_number first, then title + let local_book_id: Option = if let Some(vol) = book.volume_number { + sqlx::query_scalar( + r#" + SELECT id FROM books + WHERE library_id = $1 + AND COALESCE(NULLIF(series, ''), 'unclassified') = $2 + AND volume = $3 + LIMIT 1 + "#, + ) + .bind(library_id) + .bind(series_name) + .bind(vol) + .fetch_optional(&state.pool) + .await? + } else { + None + }; + + let local_book_id = match local_book_id { + Some(id) => Some(id), + None => { + // Try matching by title + let pattern = format!("%{}%", book.title); + sqlx::query_scalar( + r#" + SELECT id FROM books + WHERE library_id = $1 + AND COALESCE(NULLIF(series, ''), 'unclassified') = $2 + AND title ILIKE $3 + LIMIT 1 + "#, + ) + .bind(library_id) + .bind(series_name) + .bind(&pattern) + .fetch_optional(&state.pool) + .await? + } + }; + + sqlx::query( + r#" + INSERT INTO external_book_metadata + (link_id, book_id, external_book_id, volume_number, title, authors, isbn, summary, cover_url, page_count, language, publish_date, metadata_json) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + "#, + ) + .bind(link_id) + .bind(local_book_id) + .bind(&book.external_book_id) + .bind(book.volume_number) + .bind(&book.title) + .bind(&book.authors) + .bind(&book.isbn) + .bind(&book.summary) + .bind(&book.cover_url) + .bind(book.page_count) + .bind(&book.language) + .bind(&book.publish_date) + .bind(&book.metadata_json) + .execute(&state.pool) + .await?; + + // Push external metadata to matched local book (respecting locked fields) + if let Some(book_id) = local_book_id { + // Fetch current state for report + let current = sqlx::query( + "SELECT title, summary, isbn, publish_date, language, authors, locked_fields FROM books WHERE id = $1" + ) + .bind(book_id) + .fetch_one(&state.pool) + .await?; + + sqlx::query( + r#" + UPDATE books SET + summary = CASE + WHEN (locked_fields->>'summary')::boolean IS TRUE THEN summary + ELSE COALESCE(NULLIF($2, ''), summary) + END, + isbn = CASE + WHEN (locked_fields->>'isbn')::boolean IS TRUE THEN isbn + ELSE COALESCE(NULLIF($3, ''), isbn) + END, + publish_date = CASE + WHEN (locked_fields->>'publish_date')::boolean IS TRUE THEN publish_date + ELSE COALESCE(NULLIF($4, ''), publish_date) + END, + language = CASE + WHEN (locked_fields->>'language')::boolean IS TRUE THEN language + ELSE COALESCE(NULLIF($5, ''), language) + END, + authors = CASE + WHEN (locked_fields->>'authors')::boolean IS TRUE THEN authors + WHEN CARDINALITY($6::text[]) > 0 THEN $6 + ELSE authors + END, + author = CASE + WHEN (locked_fields->>'authors')::boolean IS TRUE THEN author + WHEN CARDINALITY($6::text[]) > 0 THEN $6[1] + ELSE author + END, + updated_at = NOW() + WHERE id = $1 + "#, + ) + .bind(book_id) + .bind(&book.summary) + .bind(&book.isbn) + .bind(&book.publish_date) + .bind(&book.language) + .bind(&book.authors) + .execute(&state.pool) + .await?; + + // Build per-book report + let locked_fields = current.get::("locked_fields"); + let is_locked = |field: &str| -> bool { + locked_fields.get(field).and_then(|v| v.as_bool()).unwrap_or(false) + }; + + let book_title: String = current.get("title"); + let mut fields_updated = Vec::new(); + let mut fields_skipped = Vec::new(); + + // Check each syncable field + let field_checks: Vec<(&str, Option, Option)> = vec![ + ("summary", + current.get::, _>("summary").map(|s| serde_json::json!(s)), + book.summary.as_ref().map(|s| serde_json::json!(s))), + ("isbn", + current.get::, _>("isbn").map(|s| serde_json::json!(s)), + book.isbn.as_ref().map(|s| serde_json::json!(s))), + ("publish_date", + current.get::, _>("publish_date").map(|s| serde_json::json!(s)), + book.publish_date.as_ref().map(|s| serde_json::json!(s))), + ("language", + current.get::, _>("language").map(|s| serde_json::json!(s)), + book.language.as_ref().map(|s| serde_json::json!(s))), + ("authors", + Some(serde_json::json!(current.get::, _>("authors"))), + if book.authors.is_empty() { None } else { Some(serde_json::json!(&book.authors)) }), + ]; + + for (name, old, new) in field_checks { + if new.is_none() { continue; } + let change = FieldChange { + field: name.to_string(), + old_value: old.clone(), + new_value: new.clone(), + }; + if is_locked(name) { + fields_skipped.push(change); + } else if old != new { + fields_updated.push(change); + } + } + + // Only include books that had actual changes or skips + if !fields_updated.is_empty() || !fields_skipped.is_empty() { + book_reports.push(BookSyncReport { + book_id, + title: book_title, + volume: book.volume_number, + fields_updated, + fields_skipped, + }); + } + + matched_count += 1; + } + } + + let unmatched = books.len() as i64 - matched_count; + Ok((matched_count, book_reports, unmatched)) +} diff --git a/apps/api/src/metadata_providers/anilist.rs b/apps/api/src/metadata_providers/anilist.rs new file mode 100644 index 0000000..936ee12 --- /dev/null +++ b/apps/api/src/metadata_providers/anilist.rs @@ -0,0 +1,322 @@ +use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; + +pub struct AniListProvider; + +impl MetadataProvider for AniListProvider { + fn name(&self) -> &str { + "anilist" + } + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let query = query.to_string(); + let config = config.clone(); + Box::pin(async move { search_series_impl(&query, &config).await }) + } + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let external_id = external_id.to_string(); + let config = config.clone(); + Box::pin(async move { get_series_books_impl(&external_id, &config).await }) + } +} + +const SEARCH_QUERY: &str = r#" +query ($search: String) { + Page(perPage: 20) { + media(search: $search, type: MANGA, sort: SEARCH_MATCH) { + id + title { romaji english native } + description(asHtml: false) + coverImage { large medium } + startDate { year } + volumes + chapters + staff { edges { node { name { full } } role } } + siteUrl + genres + } + } +} +"#; + +const DETAIL_QUERY: &str = r#" +query ($id: Int) { + Media(id: $id, type: MANGA) { + id + title { romaji english native } + description(asHtml: false) + coverImage { large medium } + startDate { year } + volumes + chapters + staff { edges { node { name { full } } role } } + siteUrl + genres + } +} +"#; + +async fn graphql_request( + client: &reqwest::Client, + query: &str, + variables: serde_json::Value, +) -> Result { + let resp = client + .post("https://graphql.anilist.co") + .header("Content-Type", "application/json") + .json(&serde_json::json!({ + "query": query, + "variables": variables, + })) + .send() + .await + .map_err(|e| format!("AniList request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("AniList returned {status}: {text}")); + } + + resp.json() + .await + .map_err(|e| format!("Failed to parse AniList response: {e}")) +} + +async fn search_series_impl( + query: &str, + _config: &ProviderConfig, +) -> Result, String> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + let data = graphql_request( + &client, + SEARCH_QUERY, + serde_json::json!({ "search": query }), + ) + .await?; + + let media = match data + .get("data") + .and_then(|d| d.get("Page")) + .and_then(|p| p.get("media")) + .and_then(|m| m.as_array()) + { + Some(media) => media, + None => return Ok(vec![]), + }; + + let query_lower = query.to_lowercase(); + + let mut candidates: Vec = media + .iter() + .filter_map(|m| { + let id = m.get("id").and_then(|id| id.as_i64())? as i64; + let title_obj = m.get("title")?; + let title = title_obj + .get("english") + .and_then(|t| t.as_str()) + .or_else(|| title_obj.get("romaji").and_then(|t| t.as_str()))? + .to_string(); + + let description = m + .get("description") + .and_then(|d| d.as_str()) + .map(|d| d.replace("\\n", "\n").trim().to_string()) + .filter(|d| !d.is_empty()); + + let cover_url = m + .get("coverImage") + .and_then(|ci| ci.get("large").or_else(|| ci.get("medium"))) + .and_then(|u| u.as_str()) + .map(String::from); + + let start_year = m + .get("startDate") + .and_then(|sd| sd.get("year")) + .and_then(|y| y.as_i64()) + .map(|y| y as i32); + + let volumes = m + .get("volumes") + .and_then(|v| v.as_i64()) + .map(|v| v as i32); + + let site_url = m + .get("siteUrl") + .and_then(|u| u.as_str()) + .map(String::from); + + let authors = extract_authors(m); + + let confidence = compute_confidence(&title, &query_lower); + + Some(SeriesCandidate { + external_id: id.to_string(), + title, + authors, + description, + publishers: vec![], + start_year, + total_volumes: volumes, + cover_url, + external_url: site_url, + confidence, + metadata_json: serde_json::json!({}), + }) + }) + .collect(); + + candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)); + candidates.truncate(10); + Ok(candidates) +} + +async fn get_series_books_impl( + external_id: &str, + _config: &ProviderConfig, +) -> Result, String> { + let id: i64 = external_id + .parse() + .map_err(|_| "invalid AniList ID".to_string())?; + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + let data = graphql_request( + &client, + DETAIL_QUERY, + serde_json::json!({ "id": id }), + ) + .await?; + + let media = match data.get("data").and_then(|d| d.get("Media")) { + Some(m) => m, + None => return Ok(vec![]), + }; + + let title_obj = media.get("title").cloned().unwrap_or(serde_json::json!({})); + let title = title_obj + .get("english") + .and_then(|t| t.as_str()) + .or_else(|| title_obj.get("romaji").and_then(|t| t.as_str())) + .unwrap_or("") + .to_string(); + + let volumes = media + .get("volumes") + .and_then(|v| v.as_i64()) + .map(|v| v as i32); + + let cover_url = media + .get("coverImage") + .and_then(|ci| ci.get("large").or_else(|| ci.get("medium"))) + .and_then(|u| u.as_str()) + .map(String::from); + + let description = media + .get("description") + .and_then(|d| d.as_str()) + .map(|d| d.replace("\\n", "\n").trim().to_string()); + + let authors = extract_authors(media); + + // AniList doesn't have per-volume data — generate volume entries if volumes count is known + let mut books = Vec::new(); + if let Some(total) = volumes { + for vol in 1..=total { + books.push(BookCandidate { + external_book_id: format!("{}-vol-{}", external_id, vol), + title: format!("{} Vol. {}", title, vol), + volume_number: Some(vol), + authors: authors.clone(), + isbn: None, + summary: if vol == 1 { description.clone() } else { None }, + cover_url: if vol == 1 { cover_url.clone() } else { None }, + page_count: None, + language: Some("ja".to_string()), + publish_date: None, + metadata_json: serde_json::json!({}), + }); + } + } else { + // Single entry for the whole manga + books.push(BookCandidate { + external_book_id: external_id.to_string(), + title, + volume_number: Some(1), + authors, + isbn: None, + summary: description, + cover_url, + page_count: None, + language: Some("ja".to_string()), + publish_date: None, + metadata_json: serde_json::json!({}), + }); + } + + Ok(books) +} + +fn extract_authors(media: &serde_json::Value) -> Vec { + let mut authors = Vec::new(); + if let Some(edges) = media + .get("staff") + .and_then(|s| s.get("edges")) + .and_then(|e| e.as_array()) + { + for edge in edges { + let role = edge + .get("role") + .and_then(|r| r.as_str()) + .unwrap_or(""); + let role_lower = role.to_lowercase(); + if role_lower.contains("story") || role_lower.contains("art") || role_lower.contains("original") { + if let Some(name) = edge + .get("node") + .and_then(|n| n.get("name")) + .and_then(|n| n.get("full")) + .and_then(|f| f.as_str()) + { + if !authors.contains(&name.to_string()) { + authors.push(name.to_string()); + } + } + } + } + } + authors +} + +fn compute_confidence(title: &str, query: &str) -> f32 { + let title_lower = title.to_lowercase(); + if title_lower == query { + 1.0 + } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { + 0.8 + } else if title_lower.contains(query) || query.contains(&title_lower) { + 0.7 + } else { + let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count(); + let max_len = query.len().max(title_lower.len()).max(1); + (common as f32 / max_len as f32).clamp(0.1, 0.6) + } +} diff --git a/apps/api/src/metadata_providers/bedetheque.rs b/apps/api/src/metadata_providers/bedetheque.rs new file mode 100644 index 0000000..285ede4 --- /dev/null +++ b/apps/api/src/metadata_providers/bedetheque.rs @@ -0,0 +1,576 @@ +use scraper::{Html, Selector}; + +use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; + +pub struct BedethequeProvider; + +impl MetadataProvider for BedethequeProvider { + fn name(&self) -> &str { + "bedetheque" + } + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let query = query.to_string(); + let config = config.clone(); + Box::pin(async move { search_series_impl(&query, &config).await }) + } + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let external_id = external_id.to_string(); + let config = config.clone(); + Box::pin(async move { get_series_books_impl(&external_id, &config).await }) + } +} + +fn build_client() -> Result { + reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(20)) + .user_agent("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0") + .default_headers({ + let mut h = reqwest::header::HeaderMap::new(); + h.insert( + reqwest::header::ACCEPT, + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + .parse() + .unwrap(), + ); + h.insert( + reqwest::header::ACCEPT_LANGUAGE, + "fr-FR,fr;q=0.9,en;q=0.5".parse().unwrap(), + ); + h.insert(reqwest::header::REFERER, "https://www.bedetheque.com/".parse().unwrap()); + h + }) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}")) +} + +/// Remove diacritics for URL construction (bedetheque uses ASCII slugs) +fn normalize_for_url(s: &str) -> String { + s.chars() + .map(|c| match c { + 'é' | 'è' | 'ê' | 'ë' | 'É' | 'È' | 'Ê' | 'Ë' => 'e', + 'à' | 'â' | 'ä' | 'À' | 'Â' | 'Ä' => 'a', + 'ù' | 'û' | 'ü' | 'Ù' | 'Û' | 'Ü' => 'u', + 'ô' | 'ö' | 'Ô' | 'Ö' => 'o', + 'î' | 'ï' | 'Î' | 'Ï' => 'i', + 'ç' | 'Ç' => 'c', + 'ñ' | 'Ñ' => 'n', + _ => c, + }) + .collect() +} + +fn urlencoded(s: &str) -> String { + let mut result = String::new(); + for byte in s.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + result.push(byte as char); + } + b' ' => result.push('+'), + _ => result.push_str(&format!("%{:02X}", byte)), + } + } + result +} + +// --------------------------------------------------------------------------- +// Search +// --------------------------------------------------------------------------- + +async fn search_series_impl( + query: &str, + _config: &ProviderConfig, +) -> Result, String> { + let client = build_client()?; + + // Use the full-text search page + let url = format!( + "https://www.bedetheque.com/search/tout?RechTexte={}&RechWhere=0", + urlencoded(&normalize_for_url(query)) + ); + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Bedetheque request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + return Err(format!("Bedetheque returned {status}")); + } + + let html = resp + .text() + .await + .map_err(|e| format!("Failed to read Bedetheque response: {e}"))?; + + // Detect IP blacklist + if html.contains("") || html.contains(" ") { + return Err("Bedetheque: IP may be rate-limited, please retry later".to_string()); + } + + // Parse HTML in a block so the non-Send Html type is dropped before any .await + let candidates = { + let document = Html::parse_document(&html); + let link_sel = + Selector::parse("a[href*='/serie-']").map_err(|e| format!("selector error: {e}"))?; + + let query_lower = query.to_lowercase(); + let mut seen = std::collections::HashSet::new(); + let mut candidates = Vec::new(); + + for el in document.select(&link_sel) { + let href = match el.value().attr("href") { + Some(h) => h.to_string(), + None => continue, + }; + + let (series_id, _slug) = match parse_serie_href(&href) { + Some(v) => v, + None => continue, + }; + + if !seen.insert(series_id.clone()) { + continue; + } + + let title = el.text().collect::().trim().to_string(); + if title.is_empty() { + continue; + } + + let confidence = compute_confidence(&title, &query_lower); + let cover_url = format!( + "https://www.bedetheque.com/cache/thb_series/PlancheS_{}.jpg", + series_id + ); + + candidates.push(SeriesCandidate { + external_id: series_id.clone(), + title: title.clone(), + authors: vec![], + description: None, + publishers: vec![], + start_year: None, + total_volumes: None, + cover_url: Some(cover_url), + external_url: Some(href), + confidence, + metadata_json: serde_json::json!({}), + }); + } + + candidates.sort_by(|a, b| { + b.confidence + .partial_cmp(&a.confidence) + .unwrap_or(std::cmp::Ordering::Equal) + }); + candidates.truncate(10); + candidates + }; // document is dropped here — safe to .await below + + // For the top candidates, fetch series details to enrich metadata + // (limit to top 3 to avoid hammering the site) + let mut enriched = Vec::new(); + for mut c in candidates { + if enriched.len() < 3 { + if let Ok(details) = fetch_series_details(&client, &c.external_id, c.external_url.as_deref()).await { + if let Some(desc) = details.description { + c.description = Some(desc); + } + if !details.authors.is_empty() { + c.authors = details.authors; + } + if !details.publishers.is_empty() { + c.publishers = details.publishers; + } + if let Some(year) = details.start_year { + c.start_year = Some(year); + } + if let Some(count) = details.album_count { + c.total_volumes = Some(count); + } + c.metadata_json = serde_json::json!({ + "description": c.description, + "authors": c.authors, + "publishers": c.publishers, + "start_year": c.start_year, + }); + } + } + enriched.push(c); + } + + Ok(enriched) +} + +/// Parse serie URL to extract (id, slug) +fn parse_serie_href(href: &str) -> Option<(String, String)> { + // Patterns: + // https://www.bedetheque.com/serie-3-BD-Blacksad.html + // /serie-3-BD-Blacksad.html + let re = regex::Regex::new(r"/serie-(\d+)-[A-Za-z]+-(.+?)(?:__\d+)?\.html").ok()?; + let caps = re.captures(href)?; + Some((caps[1].to_string(), caps[2].to_string())) +} + +struct SeriesDetails { + description: Option, + authors: Vec, + publishers: Vec, + start_year: Option, + album_count: Option, +} + +async fn fetch_series_details( + client: &reqwest::Client, + series_id: &str, + series_url: Option<&str>, +) -> Result { + // Build URL — append __10000 to get all albums on one page + let url = match series_url { + Some(u) => { + // Replace .html with __10000.html + u.replace(".html", "__10000.html") + } + None => format!( + "https://www.bedetheque.com/serie-{}-BD-Serie__10000.html", + series_id + ), + }; + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Failed to fetch series page: {e}"))?; + + if !resp.status().is_success() { + return Err(format!("Series page returned {}", resp.status())); + } + + let html = resp + .text() + .await + .map_err(|e| format!("Failed to read series page: {e}"))?; + + let doc = Html::parse_document(&html); + let mut details = SeriesDetails { + description: None, + authors: vec![], + publishers: vec![], + start_year: None, + album_count: None, + }; + + // Description: look for #full-commentaire or .serie-info + if let Ok(sel) = Selector::parse("#full-commentaire") { + if let Some(el) = doc.select(&sel).next() { + let text = el.text().collect::().trim().to_string(); + if !text.is_empty() { + details.description = Some(text); + } + } + } + + // Fallback description from span.infoedition + if details.description.is_none() { + if let Ok(sel) = Selector::parse("span.infoedition") { + if let Some(el) = doc.select(&sel).next() { + let text = el.text().collect::().trim().to_string(); + if !text.is_empty() { + details.description = Some(text); + } + } + } + } + + // Extract authors and publishers from album info blocks + if let Ok(sel) = Selector::parse(".infos li") { + let mut authors_set = std::collections::HashSet::new(); + let mut publishers_set = std::collections::HashSet::new(); + + for li in doc.select(&sel) { + let text = li.text().collect::(); + let text = text.trim(); + + if let Some(val) = extract_info_value(text, "Scénario") { + for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) { + authors_set.insert(a.to_string()); + } + } + if let Some(val) = extract_info_value(text, "Dessin") { + for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) { + authors_set.insert(a.to_string()); + } + } + if let Some(val) = extract_info_value(text, "Editeur") { + for p in val.split(',').map(str::trim).filter(|s| !s.is_empty()) { + publishers_set.insert(p.to_string()); + } + } + } + + details.authors = authors_set.into_iter().collect(); + details.authors.sort(); + details.publishers = publishers_set.into_iter().collect(); + details.publishers.sort(); + } + + // Album count from serie-info text (e.g. "Tomes : 8") + let page_text = doc.root_element().text().collect::(); + if let Ok(re) = regex::Regex::new(r"Tomes?\s*:\s*(\d+)") { + if let Some(caps) = re.captures(&page_text) { + if let Ok(n) = caps[1].parse::() { + details.album_count = Some(n); + } + } + } + + // Start year from first album date (Dépot légal) + if let Ok(re) = regex::Regex::new(r"[Dd][ée]p[ôo]t l[ée]gal\s*:\s*\d{2}/(\d{4})") { + if let Some(caps) = re.captures(&page_text) { + if let Ok(year) = caps[1].parse::() { + details.start_year = Some(year); + } + } + } + + Ok(details) +} + +/// Extract value after a label like "Scénario : Jean-Claude" → "Jean-Claude" +fn extract_info_value<'a>(text: &'a str, label: &str) -> Option<&'a str> { + // Handle both "Label :" and "Label:" + let patterns = [ + format!("{} :", label), + format!("{}:", label), + format!("{} :", &label.to_lowercase()), + ]; + for pat in &patterns { + if let Some(pos) = text.find(pat.as_str()) { + let val = text[pos + pat.len()..].trim(); + if !val.is_empty() { + return Some(val); + } + } + } + None +} + +// --------------------------------------------------------------------------- +// Get series books +// --------------------------------------------------------------------------- + +async fn get_series_books_impl( + external_id: &str, + _config: &ProviderConfig, +) -> Result, String> { + let client = build_client()?; + + // We need to find the series URL — try a direct fetch + // external_id is the numeric series ID + // We try to fetch the series page to get the album list + let url = format!( + "https://www.bedetheque.com/serie-{}-BD-Serie__10000.html", + external_id + ); + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Failed to fetch series: {e}"))?; + + // If the generic slug fails, try without the slug part (bedetheque redirects) + let html = if resp.status().is_success() { + resp.text().await.map_err(|e| format!("Failed to read: {e}"))? + } else { + // Try alternative URL pattern + let alt_url = format!( + "https://www.bedetheque.com/serie-{}__10000.html", + external_id + ); + let resp2 = client + .get(&alt_url) + .send() + .await + .map_err(|e| format!("Failed to fetch series (alt): {e}"))?; + if !resp2.status().is_success() { + return Err(format!("Series page not found for id {external_id}")); + } + resp2.text().await.map_err(|e| format!("Failed to read: {e}"))? + }; + + if html.contains("") { + return Err("Bedetheque: IP may be rate-limited".to_string()); + } + + let doc = Html::parse_document(&html); + let mut books = Vec::new(); + + // Albums are in .album-main blocks + let album_sel = Selector::parse(".album-main").map_err(|e| format!("selector: {e}"))?; + + for album_el in doc.select(&album_sel) { + let album_html = album_el.html(); + let album_doc = Html::parse_fragment(&album_html); + + // Title from .titre + let title = select_text(&album_doc, ".titre") + .or_else(|| { + Selector::parse(".titre a") + .ok() + .and_then(|s| album_doc.select(&s).next()) + .map(|el| el.text().collect::().trim().to_string()) + }) + .unwrap_or_default(); + + if title.is_empty() { + continue; + } + + // Volume number from title or .num span + let volume_number = select_text(&album_doc, ".num") + .and_then(|s| { + s.trim_end_matches('.') + .trim() + .parse::() + .ok() + }) + .or_else(|| extract_volume_from_title(&title)); + + // Album URL + let album_url = Selector::parse("a[href*='/BD-']") + .ok() + .and_then(|s| album_doc.select(&s).next()) + .and_then(|el| el.value().attr("href")) + .map(String::from); + + // External book id from URL + let external_book_id = album_url + .as_deref() + .and_then(|u| { + regex::Regex::new(r"-(\d+)\.html") + .ok() + .and_then(|re| re.captures(u)) + .map(|c| c[1].to_string()) + }) + .unwrap_or_default(); + + // Cover + let cover_url = Selector::parse("img[src*='cache/thb_couv']") + .ok() + .and_then(|s| album_doc.select(&s).next()) + .and_then(|el| el.value().attr("src")) + .map(|s| { + if s.starts_with("http") { + s.to_string() + } else { + format!("https://www.bedetheque.com{}", s) + } + }); + + // Extract info fields + let album_text = album_el.text().collect::(); + let authors = extract_all_authors(&album_text); + let isbn = extract_info_value(&album_text, "EAN/ISBN") + .or_else(|| extract_info_value(&album_text, "ISBN")) + .map(|s| s.trim().to_string()); + let page_count = extract_info_value(&album_text, "Planches") + .and_then(|s| s.trim().parse::().ok()); + let publish_date = extract_info_value(&album_text, "Dépot légal") + .or_else(|| extract_info_value(&album_text, "Depot legal")) + .map(|s| s.trim().to_string()); + + books.push(BookCandidate { + external_book_id, + title, + volume_number, + authors, + isbn, + summary: None, + cover_url, + page_count, + language: Some("fr".to_string()), + publish_date, + metadata_json: serde_json::json!({}), + }); + } + + books.sort_by_key(|b| b.volume_number.unwrap_or(999)); + Ok(books) +} + +fn select_text(doc: &Html, selector: &str) -> Option { + Selector::parse(selector) + .ok() + .and_then(|s| doc.select(&s).next()) + .map(|el| el.text().collect::().trim().to_string()) + .filter(|s| !s.is_empty()) +} + +fn extract_all_authors(text: &str) -> Vec { + let mut authors = Vec::new(); + for label in ["Scénario", "Scenario", "Dessin"] { + if let Some(val) = extract_info_value(text, label) { + for a in val.split(',').map(str::trim).filter(|s| !s.is_empty()) { + if !authors.contains(&a.to_string()) { + authors.push(a.to_string()); + } + } + } + } + authors +} + +fn extract_volume_from_title(title: &str) -> Option { + let patterns = [ + r"(?i)(?:tome|t\.)\s*(\d+)", + r"(?i)(?:vol(?:ume)?\.?)\s*(\d+)", + r"#\s*(\d+)", + ]; + for pattern in &patterns { + if let Ok(re) = regex::Regex::new(pattern) { + if let Some(caps) = re.captures(title) { + if let Ok(n) = caps[1].parse::() { + return Some(n); + } + } + } + } + None +} + +fn compute_confidence(title: &str, query: &str) -> f32 { + let title_lower = title.to_lowercase(); + if title_lower == query { + 1.0 + } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { + 0.85 + } else if title_lower.contains(query) || query.contains(&title_lower) { + 0.7 + } else { + let common: usize = query + .chars() + .filter(|c| title_lower.contains(*c)) + .count(); + let max_len = query.len().max(title_lower.len()).max(1); + (common as f32 / max_len as f32).clamp(0.1, 0.6) + } +} diff --git a/apps/api/src/metadata_providers/comicvine.rs b/apps/api/src/metadata_providers/comicvine.rs new file mode 100644 index 0000000..c7fabf4 --- /dev/null +++ b/apps/api/src/metadata_providers/comicvine.rs @@ -0,0 +1,267 @@ +use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; + +pub struct ComicVineProvider; + +impl MetadataProvider for ComicVineProvider { + fn name(&self) -> &str { + "comicvine" + } + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let query = query.to_string(); + let config = config.clone(); + Box::pin(async move { search_series_impl(&query, &config).await }) + } + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let external_id = external_id.to_string(); + let config = config.clone(); + Box::pin(async move { get_series_books_impl(&external_id, &config).await }) + } +} + +fn build_client() -> Result { + reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .user_agent("StripstreamLibrarian/1.0") + .build() + .map_err(|e| format!("failed to build HTTP client: {e}")) +} + +async fn search_series_impl( + query: &str, + config: &ProviderConfig, +) -> Result, String> { + let api_key = config + .api_key + .as_deref() + .filter(|k| !k.is_empty()) + .ok_or_else(|| "ComicVine requires an API key. Configure it in Settings > Integrations.".to_string())?; + + let client = build_client()?; + + let url = format!( + "https://comicvine.gamespot.com/api/search/?api_key={}&format=json&resources=volume&query={}&limit=20", + api_key, + urlencoded(query) + ); + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("ComicVine request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("ComicVine returned {status}: {text}")); + } + + let data: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse ComicVine response: {e}"))?; + + let results = match data.get("results").and_then(|r| r.as_array()) { + Some(results) => results, + None => return Ok(vec![]), + }; + + let query_lower = query.to_lowercase(); + + let mut candidates: Vec = results + .iter() + .filter_map(|vol| { + let name = vol.get("name").and_then(|n| n.as_str())?.to_string(); + let id = vol.get("id").and_then(|id| id.as_i64())? as i64; + let description = vol + .get("description") + .and_then(|d| d.as_str()) + .map(|d| strip_html(d)); + let publisher = vol + .get("publisher") + .and_then(|p| p.get("name")) + .and_then(|n| n.as_str()) + .map(String::from); + let start_year = vol + .get("start_year") + .and_then(|y| y.as_str()) + .and_then(|y| y.parse::().ok()); + let count_of_issues = vol + .get("count_of_issues") + .and_then(|c| c.as_i64()) + .map(|c| c as i32); + let cover_url = vol + .get("image") + .and_then(|img| img.get("medium_url").or_else(|| img.get("small_url"))) + .and_then(|u| u.as_str()) + .map(String::from); + let site_url = vol + .get("site_detail_url") + .and_then(|u| u.as_str()) + .map(String::from); + + let confidence = compute_confidence(&name, &query_lower); + + Some(SeriesCandidate { + external_id: id.to_string(), + title: name, + authors: vec![], + description, + publishers: publisher.into_iter().collect(), + start_year, + total_volumes: count_of_issues, + cover_url, + external_url: site_url, + confidence, + metadata_json: serde_json::json!({}), + }) + }) + .collect(); + + candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)); + candidates.truncate(10); + Ok(candidates) +} + +async fn get_series_books_impl( + external_id: &str, + config: &ProviderConfig, +) -> Result, String> { + let api_key = config + .api_key + .as_deref() + .filter(|k| !k.is_empty()) + .ok_or_else(|| "ComicVine requires an API key".to_string())?; + + let client = build_client()?; + + let url = format!( + "https://comicvine.gamespot.com/api/issues/?api_key={}&format=json&filter=volume:{}&sort=issue_number:asc&limit=100&field_list=id,name,issue_number,description,image,cover_date,site_detail_url", + api_key, + external_id + ); + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("ComicVine request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("ComicVine returned {status}: {text}")); + } + + let data: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse ComicVine response: {e}"))?; + + let results = match data.get("results").and_then(|r| r.as_array()) { + Some(results) => results, + None => return Ok(vec![]), + }; + + let books: Vec = results + .iter() + .filter_map(|issue| { + let id = issue.get("id").and_then(|id| id.as_i64())? as i64; + let name = issue + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let issue_number = issue + .get("issue_number") + .and_then(|n| n.as_str()) + .and_then(|n| n.parse::().ok()) + .map(|n| n as i32); + let description = issue + .get("description") + .and_then(|d| d.as_str()) + .map(|d| strip_html(d)); + let cover_url = issue + .get("image") + .and_then(|img| img.get("medium_url").or_else(|| img.get("small_url"))) + .and_then(|u| u.as_str()) + .map(String::from); + let cover_date = issue + .get("cover_date") + .and_then(|d| d.as_str()) + .map(String::from); + + Some(BookCandidate { + external_book_id: id.to_string(), + title: name, + volume_number: issue_number, + authors: vec![], + isbn: None, + summary: description, + cover_url, + page_count: None, + language: None, + publish_date: cover_date, + metadata_json: serde_json::json!({}), + }) + }) + .collect(); + + Ok(books) +} + +fn strip_html(s: &str) -> String { + let mut result = String::new(); + let mut in_tag = false; + for ch in s.chars() { + match ch { + '<' => in_tag = true, + '>' => in_tag = false, + _ if !in_tag => result.push(ch), + _ => {} + } + } + result.trim().to_string() +} + +fn compute_confidence(title: &str, query: &str) -> f32 { + let title_lower = title.to_lowercase(); + if title_lower == query { + 1.0 + } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { + 0.8 + } else if title_lower.contains(query) || query.contains(&title_lower) { + 0.7 + } else { + let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count(); + let max_len = query.len().max(title_lower.len()).max(1); + (common as f32 / max_len as f32).clamp(0.1, 0.6) + } +} + +fn urlencoded(s: &str) -> String { + let mut result = String::new(); + for byte in s.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + result.push(byte as char); + } + _ => result.push_str(&format!("%{:02X}", byte)), + } + } + result +} diff --git a/apps/api/src/metadata_providers/google_books.rs b/apps/api/src/metadata_providers/google_books.rs new file mode 100644 index 0000000..4470226 --- /dev/null +++ b/apps/api/src/metadata_providers/google_books.rs @@ -0,0 +1,472 @@ +use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; + +pub struct GoogleBooksProvider; + +impl MetadataProvider for GoogleBooksProvider { + fn name(&self) -> &str { + "google_books" + } + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let query = query.to_string(); + let config = config.clone(); + Box::pin(async move { search_series_impl(&query, &config).await }) + } + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let external_id = external_id.to_string(); + let config = config.clone(); + Box::pin(async move { get_series_books_impl(&external_id, &config).await }) + } +} + +async fn search_series_impl( + query: &str, + config: &ProviderConfig, +) -> Result, String> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + let search_query = format!("intitle:{}", query); + let mut url = format!( + "https://www.googleapis.com/books/v1/volumes?q={}&maxResults=20&printType=books&langRestrict={}", + urlencoded(&search_query), + urlencoded(&config.language), + ); + if let Some(ref key) = config.api_key { + url.push_str(&format!("&key={}", key)); + } + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Google Books request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("Google Books returned {status}: {text}")); + } + + let data: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse Google Books response: {e}"))?; + + let items = match data.get("items").and_then(|i| i.as_array()) { + Some(items) => items, + None => return Ok(vec![]), + }; + + // Group volumes by series name to produce series candidates + let query_lower = query.to_lowercase(); + let mut series_map: std::collections::HashMap = + std::collections::HashMap::new(); + + for item in items { + let volume_info = match item.get("volumeInfo") { + Some(vi) => vi, + None => continue, + }; + + let title = volume_info + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = volume_info + .get("authors") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + let publisher = volume_info + .get("publisher") + .and_then(|p| p.as_str()) + .map(String::from); + let published_date = volume_info + .get("publishedDate") + .and_then(|d| d.as_str()) + .map(String::from); + let description = volume_info + .get("description") + .and_then(|d| d.as_str()) + .map(String::from); + + // Extract series info from title or seriesInfo + let series_name = volume_info + .get("seriesInfo") + .and_then(|si| si.get("title")) + .and_then(|t| t.as_str()) + .map(String::from) + .unwrap_or_else(|| extract_series_name(&title)); + + let cover_url = volume_info + .get("imageLinks") + .and_then(|il| { + il.get("thumbnail") + .or_else(|| il.get("smallThumbnail")) + }) + .and_then(|u| u.as_str()) + .map(|s| s.replace("http://", "https://")); + + let google_id = item + .get("id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + + let entry = series_map + .entry(series_name.clone()) + .or_insert_with(|| SeriesCandidateBuilder { + title: series_name.clone(), + authors: vec![], + description: None, + publishers: vec![], + start_year: None, + volume_count: 0, + cover_url: None, + external_id: google_id.clone(), + external_url: None, + metadata_json: serde_json::json!({}), + }); + + entry.volume_count += 1; + + // Merge authors + for a in &authors { + if !entry.authors.contains(a) { + entry.authors.push(a.clone()); + } + } + + // Set description if not yet set + if entry.description.is_none() { + entry.description = description; + } + + // Merge publisher + if let Some(ref pub_name) = publisher { + if !entry.publishers.contains(pub_name) { + entry.publishers.push(pub_name.clone()); + } + } + + // Extract year + if let Some(ref date) = published_date { + if let Some(year) = extract_year(date) { + if entry.start_year.is_none() || entry.start_year.unwrap() > year { + entry.start_year = Some(year); + } + } + } + + if entry.cover_url.is_none() { + entry.cover_url = cover_url; + } + + entry.external_url = Some(format!( + "https://books.google.com/books?id={}", + google_id + )); + } + + let mut candidates: Vec = series_map + .into_values() + .map(|b| { + let confidence = compute_confidence(&b.title, &query_lower); + SeriesCandidate { + external_id: b.external_id, + title: b.title, + authors: b.authors, + description: b.description, + publishers: b.publishers, + start_year: b.start_year, + total_volumes: if b.volume_count > 1 { + Some(b.volume_count) + } else { + None + }, + cover_url: b.cover_url, + external_url: b.external_url, + confidence, + metadata_json: b.metadata_json, + } + }) + .collect(); + + candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)); + candidates.truncate(10); + + Ok(candidates) +} + +async fn get_series_books_impl( + external_id: &str, + config: &ProviderConfig, +) -> Result, String> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + // First fetch the volume to get its series info + let mut url = format!( + "https://www.googleapis.com/books/v1/volumes/{}", + external_id + ); + if let Some(ref key) = config.api_key { + url.push_str(&format!("?key={}", key)); + } + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Google Books request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("Google Books returned {status}: {text}")); + } + + let volume: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse Google Books response: {e}"))?; + + let volume_info = volume.get("volumeInfo").cloned().unwrap_or(serde_json::json!({})); + let title = volume_info + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or(""); + + // Search for more volumes in this series + let series_name = extract_series_name(title); + let search_query = format!("intitle:{}", series_name); + let mut search_url = format!( + "https://www.googleapis.com/books/v1/volumes?q={}&maxResults=40&printType=books&langRestrict={}", + urlencoded(&search_query), + urlencoded(&config.language), + ); + if let Some(ref key) = config.api_key { + search_url.push_str(&format!("&key={}", key)); + } + + let resp = client + .get(&search_url) + .send() + .await + .map_err(|e| format!("Google Books search failed: {e}"))?; + + if !resp.status().is_success() { + // Return just the single volume as a book + return Ok(vec![volume_to_book_candidate(&volume)]); + } + + let data: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse search response: {e}"))?; + + let items = match data.get("items").and_then(|i| i.as_array()) { + Some(items) => items, + None => return Ok(vec![volume_to_book_candidate(&volume)]), + }; + + let mut books: Vec = items + .iter() + .map(|item| volume_to_book_candidate(item)) + .collect(); + + // Sort by volume number + books.sort_by_key(|b| b.volume_number.unwrap_or(999)); + + Ok(books) +} + +fn volume_to_book_candidate(item: &serde_json::Value) -> BookCandidate { + let volume_info = item.get("volumeInfo").cloned().unwrap_or(serde_json::json!({})); + let title = volume_info + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = volume_info + .get("authors") + .and_then(|a| a.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + let isbn = volume_info + .get("industryIdentifiers") + .and_then(|ids| ids.as_array()) + .and_then(|arr| { + arr.iter() + .find(|id| { + id.get("type") + .and_then(|t| t.as_str()) + .map(|t| t == "ISBN_13" || t == "ISBN_10") + .unwrap_or(false) + }) + .and_then(|id| id.get("identifier").and_then(|i| i.as_str())) + }) + .map(String::from); + let summary = volume_info + .get("description") + .and_then(|d| d.as_str()) + .map(String::from); + let cover_url = volume_info + .get("imageLinks") + .and_then(|il| il.get("thumbnail").or_else(|| il.get("smallThumbnail"))) + .and_then(|u| u.as_str()) + .map(|s| s.replace("http://", "https://")); + let page_count = volume_info + .get("pageCount") + .and_then(|p| p.as_i64()) + .map(|p| p as i32); + let language = volume_info + .get("language") + .and_then(|l| l.as_str()) + .map(String::from); + let publish_date = volume_info + .get("publishedDate") + .and_then(|d| d.as_str()) + .map(String::from); + let google_id = item + .get("id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + let volume_number = extract_volume_number(&title); + + BookCandidate { + external_book_id: google_id, + title, + volume_number, + authors, + isbn, + summary, + cover_url, + page_count, + language, + publish_date, + metadata_json: serde_json::json!({}), + } +} + +fn extract_series_name(title: &str) -> String { + // Remove trailing volume indicators like "Vol. 1", "Tome 2", "#3", "- Volume 1" + let re_patterns = [ + r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", + r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", + r"\s*\(\d+\)\s*$", + r"\s+\d+\s*$", + ]; + + let mut result = title.to_string(); + for pattern in &re_patterns { + if let Ok(re) = regex::Regex::new(pattern) { + let cleaned = re.replace(&result, "").to_string(); + if !cleaned.is_empty() { + result = cleaned; + break; + } + } + } + + result.trim().to_string() +} + +fn extract_volume_number(title: &str) -> Option { + let patterns = [ + r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)", + r"\((\d+)\)\s*$", + r"\b(\d+)\s*$", + ]; + + for pattern in &patterns { + if let Ok(re) = regex::Regex::new(pattern) { + if let Some(caps) = re.captures(title) { + if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::().ok()) { + return Some(num); + } + } + } + } + + None +} + +fn extract_year(date: &str) -> Option { + date.get(..4).and_then(|s| s.parse::().ok()) +} + +fn compute_confidence(title: &str, query: &str) -> f32 { + let title_lower = title.to_lowercase(); + if title_lower == query { + 1.0 + } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { + 0.8 + } else if title_lower.contains(query) || query.contains(&title_lower) { + 0.7 + } else { + // Simple character overlap ratio + let common: usize = query + .chars() + .filter(|c| title_lower.contains(*c)) + .count(); + let max_len = query.len().max(title_lower.len()).max(1); + (common as f32 / max_len as f32).clamp(0.1, 0.6) + } +} + +fn urlencoded(s: &str) -> String { + let mut result = String::new(); + for byte in s.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + result.push(byte as char); + } + _ => { + result.push_str(&format!("%{:02X}", byte)); + } + } + } + result +} + +struct SeriesCandidateBuilder { + title: String, + authors: Vec, + description: Option, + publishers: Vec, + start_year: Option, + volume_count: i32, + cover_url: Option, + external_id: String, + external_url: Option, + metadata_json: serde_json::Value, +} diff --git a/apps/api/src/metadata_providers/mod.rs b/apps/api/src/metadata_providers/mod.rs new file mode 100644 index 0000000..349055c --- /dev/null +++ b/apps/api/src/metadata_providers/mod.rs @@ -0,0 +1,81 @@ +pub mod anilist; +pub mod bedetheque; +pub mod comicvine; +pub mod google_books; +pub mod open_library; + +use serde::{Deserialize, Serialize}; + +/// Configuration passed to providers (API keys, etc.) +#[derive(Debug, Clone, Default)] +pub struct ProviderConfig { + pub api_key: Option, + /// Preferred language for metadata results (ISO 639-1: "en", "fr", "es"). Defaults to "en". + pub language: String, +} + +/// A candidate series returned by a provider search +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SeriesCandidate { + pub external_id: String, + pub title: String, + pub authors: Vec, + pub description: Option, + pub publishers: Vec, + pub start_year: Option, + pub total_volumes: Option, + pub cover_url: Option, + pub external_url: Option, + pub confidence: f32, + pub metadata_json: serde_json::Value, +} + +/// A candidate book within a series +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BookCandidate { + pub external_book_id: String, + pub title: String, + pub volume_number: Option, + pub authors: Vec, + pub isbn: Option, + pub summary: Option, + pub cover_url: Option, + pub page_count: Option, + pub language: Option, + pub publish_date: Option, + pub metadata_json: serde_json::Value, +} + +/// Trait that all metadata providers must implement +pub trait MetadataProvider: Send + Sync { + #[allow(dead_code)] + fn name(&self) -> &str; + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + >; + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + >; +} + +/// Factory function to get a provider by name +pub fn get_provider(name: &str) -> Option> { + match name { + "google_books" => Some(Box::new(google_books::GoogleBooksProvider)), + "open_library" => Some(Box::new(open_library::OpenLibraryProvider)), + "comicvine" => Some(Box::new(comicvine::ComicVineProvider)), + "anilist" => Some(Box::new(anilist::AniListProvider)), + "bedetheque" => Some(Box::new(bedetheque::BedethequeProvider)), + _ => None, + } +} diff --git a/apps/api/src/metadata_providers/open_library.rs b/apps/api/src/metadata_providers/open_library.rs new file mode 100644 index 0000000..8bc13ab --- /dev/null +++ b/apps/api/src/metadata_providers/open_library.rs @@ -0,0 +1,351 @@ +use super::{BookCandidate, MetadataProvider, ProviderConfig, SeriesCandidate}; + +pub struct OpenLibraryProvider; + +impl MetadataProvider for OpenLibraryProvider { + fn name(&self) -> &str { + "open_library" + } + + fn search_series( + &self, + query: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let query = query.to_string(); + let config = config.clone(); + Box::pin(async move { search_series_impl(&query, &config).await }) + } + + fn get_series_books( + &self, + external_id: &str, + config: &ProviderConfig, + ) -> std::pin::Pin< + Box, String>> + Send + '_>, + > { + let external_id = external_id.to_string(); + let config = config.clone(); + Box::pin(async move { get_series_books_impl(&external_id, &config).await }) + } +} + +async fn search_series_impl( + query: &str, + config: &ProviderConfig, +) -> Result, String> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + // Open Library uses 3-letter language codes + let ol_lang = match config.language.as_str() { + "fr" => "fre", + "es" => "spa", + _ => "eng", + }; + + let url = format!( + "https://openlibrary.org/search.json?title={}&limit=20&language={}", + urlencoded(query), + ol_lang, + ); + + let resp = client + .get(&url) + .send() + .await + .map_err(|e| format!("Open Library request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("Open Library returned {status}: {text}")); + } + + let data: serde_json::Value = resp + .json() + .await + .map_err(|e| format!("Failed to parse Open Library response: {e}"))?; + + let docs = match data.get("docs").and_then(|d| d.as_array()) { + Some(docs) => docs, + None => return Ok(vec![]), + }; + + let query_lower = query.to_lowercase(); + let mut series_map: std::collections::HashMap = + std::collections::HashMap::new(); + + for doc in docs { + let title = doc + .get("title") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + let authors: Vec = doc + .get("author_name") + .and_then(|a| a.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect()) + .unwrap_or_default(); + let publishers: Vec = doc + .get("publisher") + .and_then(|a| a.as_array()) + .map(|arr| { + let mut pubs: Vec = arr.iter().filter_map(|v| v.as_str().map(String::from)).collect(); + pubs.truncate(3); + pubs + }) + .unwrap_or_default(); + let first_publish_year = doc + .get("first_publish_year") + .and_then(|y| y.as_i64()) + .map(|y| y as i32); + let cover_i = doc.get("cover_i").and_then(|c| c.as_i64()); + let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id)); + let key = doc + .get("key") + .and_then(|k| k.as_str()) + .unwrap_or("") + .to_string(); + + let series_name = extract_series_name(&title); + + let entry = series_map + .entry(series_name.clone()) + .or_insert_with(|| SeriesCandidateBuilder { + title: series_name.clone(), + authors: vec![], + description: None, + publishers: vec![], + start_year: None, + volume_count: 0, + cover_url: None, + external_id: key.clone(), + external_url: if key.is_empty() { + None + } else { + Some(format!("https://openlibrary.org{}", key)) + }, + }); + + entry.volume_count += 1; + + for a in &authors { + if !entry.authors.contains(a) { + entry.authors.push(a.clone()); + } + } + for p in &publishers { + if !entry.publishers.contains(p) { + entry.publishers.push(p.clone()); + } + } + if entry.start_year.is_none() || first_publish_year.map_or(false, |y| entry.start_year.unwrap() > y) { + if first_publish_year.is_some() { + entry.start_year = first_publish_year; + } + } + if entry.cover_url.is_none() { + entry.cover_url = cover_url; + } + } + + let mut candidates: Vec = series_map + .into_values() + .map(|b| { + let confidence = compute_confidence(&b.title, &query_lower); + SeriesCandidate { + external_id: b.external_id, + title: b.title, + authors: b.authors, + description: b.description, + publishers: b.publishers, + start_year: b.start_year, + total_volumes: if b.volume_count > 1 { Some(b.volume_count) } else { None }, + cover_url: b.cover_url, + external_url: b.external_url, + confidence, + metadata_json: serde_json::json!({}), + } + }) + .collect(); + + candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)); + candidates.truncate(10); + Ok(candidates) +} + +async fn get_series_books_impl( + external_id: &str, + _config: &ProviderConfig, +) -> Result, String> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(15)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + // Fetch the work to get its title for series search + let url = format!("https://openlibrary.org{}.json", external_id); + let resp = client.get(&url).send().await.map_err(|e| format!("Open Library request failed: {e}"))?; + + let work: serde_json::Value = if resp.status().is_success() { + resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))? + } else { + serde_json::json!({}) + }; + + let title = work.get("title").and_then(|t| t.as_str()).unwrap_or(""); + let series_name = extract_series_name(title); + + // Search for editions of this series + let search_url = format!( + "https://openlibrary.org/search.json?title={}&limit=40", + urlencoded(&series_name) + ); + let resp = client.get(&search_url).send().await.map_err(|e| format!("Open Library search failed: {e}"))?; + + if !resp.status().is_success() { + return Ok(vec![]); + } + + let data: serde_json::Value = resp.json().await.map_err(|e| format!("Failed to parse response: {e}"))?; + let docs = match data.get("docs").and_then(|d| d.as_array()) { + Some(docs) => docs, + None => return Ok(vec![]), + }; + + let mut books: Vec = docs + .iter() + .map(|doc| { + let title = doc.get("title").and_then(|t| t.as_str()).unwrap_or("").to_string(); + let authors: Vec = doc + .get("author_name") + .and_then(|a| a.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect()) + .unwrap_or_default(); + let isbn = doc + .get("isbn") + .and_then(|a| a.as_array()) + .and_then(|arr| arr.first()) + .and_then(|v| v.as_str()) + .map(String::from); + let page_count = doc + .get("number_of_pages_median") + .and_then(|n| n.as_i64()) + .map(|n| n as i32); + let cover_i = doc.get("cover_i").and_then(|c| c.as_i64()); + let cover_url = cover_i.map(|id| format!("https://covers.openlibrary.org/b/id/{}-M.jpg", id)); + let language = doc + .get("language") + .and_then(|a| a.as_array()) + .and_then(|arr| arr.first()) + .and_then(|v| v.as_str()) + .map(String::from); + let publish_date = doc + .get("first_publish_year") + .and_then(|y| y.as_i64()) + .map(|y| y.to_string()); + let key = doc.get("key").and_then(|k| k.as_str()).unwrap_or("").to_string(); + let volume_number = extract_volume_number(&title); + + BookCandidate { + external_book_id: key, + title, + volume_number, + authors, + isbn, + summary: None, + cover_url, + page_count, + language, + publish_date, + metadata_json: serde_json::json!({}), + } + }) + .collect(); + + books.sort_by_key(|b| b.volume_number.unwrap_or(999)); + Ok(books) +} + +fn extract_series_name(title: &str) -> String { + let re_patterns = [ + r"(?i)\s*[-–—]\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", + r"(?i)\s*,?\s*(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*\d+.*$", + r"\s*\(\d+\)\s*$", + r"\s+\d+\s*$", + ]; + let mut result = title.to_string(); + for pattern in &re_patterns { + if let Ok(re) = regex::Regex::new(pattern) { + let cleaned = re.replace(&result, "").to_string(); + if !cleaned.is_empty() { + result = cleaned; + break; + } + } + } + result.trim().to_string() +} + +fn extract_volume_number(title: &str) -> Option { + let patterns = [ + r"(?i)(?:vol(?:ume)?\.?\s*|tome\s*|t\.\s*|#)\s*(\d+)", + r"\((\d+)\)\s*$", + r"\b(\d+)\s*$", + ]; + for pattern in &patterns { + if let Ok(re) = regex::Regex::new(pattern) { + if let Some(caps) = re.captures(title) { + if let Some(num) = caps.get(1).and_then(|m| m.as_str().parse::().ok()) { + return Some(num); + } + } + } + } + None +} + +fn compute_confidence(title: &str, query: &str) -> f32 { + let title_lower = title.to_lowercase(); + if title_lower == query { + 1.0 + } else if title_lower.starts_with(query) || query.starts_with(&title_lower) { + 0.8 + } else if title_lower.contains(query) || query.contains(&title_lower) { + 0.7 + } else { + let common: usize = query.chars().filter(|c| title_lower.contains(*c)).count(); + let max_len = query.len().max(title_lower.len()).max(1); + (common as f32 / max_len as f32).clamp(0.1, 0.6) + } +} + +fn urlencoded(s: &str) -> String { + let mut result = String::new(); + for byte in s.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + result.push(byte as char); + } + _ => result.push_str(&format!("%{:02X}", byte)), + } + } + result +} + +struct SeriesCandidateBuilder { + title: String, + authors: Vec, + description: Option, + publishers: Vec, + start_year: Option, + volume_count: i32, + cover_url: Option, + external_id: String, + external_url: Option, +} diff --git a/apps/api/src/openapi.rs b/apps/api/src/openapi.rs index 5efde5f..124c9f5 100644 --- a/apps/api/src/openapi.rs +++ b/apps/api/src/openapi.rs @@ -46,6 +46,13 @@ use utoipa::OpenApi; crate::settings::clear_cache, crate::settings::get_cache_stats, crate::settings::get_thumbnail_stats, + crate::metadata::search_metadata, + crate::metadata::create_metadata_match, + crate::metadata::approve_metadata, + crate::metadata::reject_metadata, + crate::metadata::get_metadata_links, + crate::metadata::get_missing_books, + crate::metadata::delete_metadata_link, ), components( schemas( @@ -94,6 +101,18 @@ use utoipa::OpenApi; crate::stats::LibraryStats, crate::stats::TopSeries, crate::stats::MonthlyAdditions, + crate::metadata::ApproveRequest, + crate::metadata::ApproveResponse, + crate::metadata::SyncReport, + crate::metadata::SeriesSyncReport, + crate::metadata::BookSyncReport, + crate::metadata::FieldChange, + crate::metadata::MetadataSearchRequest, + crate::metadata::SeriesCandidateDto, + crate::metadata::MetadataMatchRequest, + crate::metadata::ExternalMetadataLinkDto, + crate::metadata::MissingBooksDto, + crate::metadata::MissingBookItem, ErrorResponse, ) ), diff --git a/apps/backoffice/app/api/libraries/[id]/metadata-provider/route.ts b/apps/backoffice/app/api/libraries/[id]/metadata-provider/route.ts new file mode 100644 index 0000000..6350ab5 --- /dev/null +++ b/apps/backoffice/app/api/libraries/[id]/metadata-provider/route.ts @@ -0,0 +1,20 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch, LibraryDto } from "@/lib/api"; + +export async function PATCH( + request: NextRequest, + { params }: { params: Promise<{ id: string }> } +) { + const { id } = await params; + try { + const body = await request.json(); + const data = await apiFetch(`/libraries/${id}/metadata-provider`, { + method: "PATCH", + body: JSON.stringify(body), + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to update metadata provider"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/approve/route.ts b/apps/backoffice/app/api/metadata/approve/route.ts new file mode 100644 index 0000000..77bf50c --- /dev/null +++ b/apps/backoffice/app/api/metadata/approve/route.ts @@ -0,0 +1,17 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch } from "@/lib/api"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { id, ...rest } = body; + const data = await apiFetch<{ status: string; books_synced: number }>(`/metadata/approve/${id}`, { + method: "POST", + body: JSON.stringify(rest), + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to approve metadata"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/links/route.ts b/apps/backoffice/app/api/metadata/links/route.ts new file mode 100644 index 0000000..709a2e1 --- /dev/null +++ b/apps/backoffice/app/api/metadata/links/route.ts @@ -0,0 +1,35 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch, ExternalMetadataLinkDto } from "@/lib/api"; + +export async function GET(request: NextRequest) { + try { + const { searchParams } = new URL(request.url); + const libraryId = searchParams.get("library_id") || ""; + const seriesName = searchParams.get("series_name") || ""; + const params = new URLSearchParams(); + if (libraryId) params.set("library_id", libraryId); + if (seriesName) params.set("series_name", seriesName); + const data = await apiFetch(`/metadata/links?${params.toString()}`); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to fetch metadata links"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} + +export async function DELETE(request: NextRequest) { + try { + const { searchParams } = new URL(request.url); + const id = searchParams.get("id"); + if (!id) { + return NextResponse.json({ error: "id is required" }, { status: 400 }); + } + const data = await apiFetch<{ deleted: boolean }>(`/metadata/links/${id}`, { + method: "DELETE", + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to delete metadata link"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/match/route.ts b/apps/backoffice/app/api/metadata/match/route.ts new file mode 100644 index 0000000..03dfac8 --- /dev/null +++ b/apps/backoffice/app/api/metadata/match/route.ts @@ -0,0 +1,16 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch, ExternalMetadataLinkDto } from "@/lib/api"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const data = await apiFetch("/metadata/match", { + method: "POST", + body: JSON.stringify(body), + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to create metadata match"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/missing/route.ts b/apps/backoffice/app/api/metadata/missing/route.ts new file mode 100644 index 0000000..4ac0377 --- /dev/null +++ b/apps/backoffice/app/api/metadata/missing/route.ts @@ -0,0 +1,17 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch, MissingBooksDto } from "@/lib/api"; + +export async function GET(request: NextRequest) { + try { + const { searchParams } = new URL(request.url); + const id = searchParams.get("id"); + if (!id) { + return NextResponse.json({ error: "id is required" }, { status: 400 }); + } + const data = await apiFetch(`/metadata/missing/${id}`); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to fetch missing books"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/reject/route.ts b/apps/backoffice/app/api/metadata/reject/route.ts new file mode 100644 index 0000000..23bdad0 --- /dev/null +++ b/apps/backoffice/app/api/metadata/reject/route.ts @@ -0,0 +1,15 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch } from "@/lib/api"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const data = await apiFetch<{ status: string }>(`/metadata/reject/${body.id}`, { + method: "POST", + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to reject metadata"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/api/metadata/search/route.ts b/apps/backoffice/app/api/metadata/search/route.ts new file mode 100644 index 0000000..4765735 --- /dev/null +++ b/apps/backoffice/app/api/metadata/search/route.ts @@ -0,0 +1,16 @@ +import { NextRequest, NextResponse } from "next/server"; +import { apiFetch, SeriesCandidateDto } from "@/lib/api"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const data = await apiFetch("/metadata/search", { + method: "POST", + body: JSON.stringify(body), + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to search metadata"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/apps/backoffice/app/books/[id]/page.tsx b/apps/backoffice/app/books/[id]/page.tsx index bd78ad1..d7b4f6e 100644 --- a/apps/backoffice/app/books/[id]/page.tsx +++ b/apps/backoffice/app/books/[id]/page.tsx @@ -3,6 +3,7 @@ import { BookPreview } from "../../components/BookPreview"; import { ConvertButton } from "../../components/ConvertButton"; import { MarkBookReadButton } from "../../components/MarkBookReadButton"; import { EditBookForm } from "../../components/EditBookForm"; +import { SafeHtml } from "../../components/SafeHtml"; import Image from "next/image"; import Link from "next/link"; import { notFound } from "next/navigation"; @@ -15,31 +16,6 @@ const readingStatusConfig: Record - - {label} - {status === "reading" && currentPage != null && ` · p. ${currentPage}`} - - {lastReadAt && ( - - {new Date(lastReadAt).toLocaleDateString()} - - )} - - ); -} - async function fetchBook(bookId: string): Promise { try { return await apiFetch(`/books/${bookId}`); @@ -64,163 +40,195 @@ export default async function BookDetailPage({ } const library = libraries.find(l => l.id === book.library_id); + const formatBadge = (book.format ?? book.kind).toUpperCase(); + const formatColor = + formatBadge === "CBZ" ? "bg-success/10 text-success border-success/30" : + formatBadge === "CBR" ? "bg-warning/10 text-warning border-warning/30" : + formatBadge === "PDF" ? "bg-destructive/10 text-destructive border-destructive/30" : + "bg-muted/50 text-muted-foreground border-border"; + const { label: statusLabel, className: statusClassName } = readingStatusConfig[book.reading_status]; return ( - <> -
- - ← Back to books +
+ {/* Breadcrumb */} +
+ + Libraries + / + {library && ( + <> + + {library.name} + + / + + )} + {book.series && ( + <> + + {book.series} + + / + + )} + {book.title}
-
+ {/* Hero */} +
+ {/* Cover */}
-
+
{`Cover
-
-
-
+ {/* Info */} +
+
+

{book.title}

- -
- - {book.author && ( -

by {book.author}

- )} - - {book.series && ( -

- {book.series} - {book.volume && Volume {book.volume}} -

- )} - -
- {book.reading_status && ( -
- Lecture : -
- - -
-
- )} - -
- Format: - - {(book.format ?? book.kind).toUpperCase()} - -
- - {book.volume && ( -
- Volume: - {book.volume} -
- )} - - {book.language && ( -
- Language: - {book.language.toUpperCase()} -
- )} - - {book.page_count && ( -
- Pages: - {book.page_count} -
- )} - -
- Library: - {library?.name || book.library_id} -
- - {book.series && ( -
- Series: - {book.series} -
- )} - - {book.file_format && ( -
- File Format: -
- {book.file_format.toUpperCase()} - {book.file_format === "cbr" && } -
-
- )} - - {book.file_parse_status && ( -
- Parse Status: - - {book.file_parse_status} - -
- )} - - {book.file_path && ( -
- File Path: - {book.file_path} -
- )} - -
- Book ID: - {book.id} -
- -
- Library ID: - {book.library_id} -
- - {book.updated_at && ( -
- Updated: - {new Date(book.updated_at).toLocaleString()} -
+ {book.author && ( +

{book.author}

)}
+
+ + {/* Series + Volume link */} + {book.series && ( +
+ + {book.series} + + {book.volume != null && ( + + Vol. {book.volume} + + )} +
+ )} + + {/* Reading status + actions */} +
+ + {statusLabel} + {book.reading_status === "reading" && book.reading_current_page != null && ` · p. ${book.reading_current_page}`} + + {book.reading_last_read_at && ( + + {new Date(book.reading_last_read_at).toLocaleDateString()} + + )} + + {book.file_format === "cbr" && } +
+ + {/* Metadata pills */} +
+ + {formatBadge} + + {book.page_count && ( + + {book.page_count} pages + + )} + {book.language && ( + + {book.language.toUpperCase()} + + )} + {book.isbn && ( + + ISBN {book.isbn} + + )} + {book.publish_date && ( + + {book.publish_date} + + )} +
+ + {/* Description */} + {book.summary && ( + + )}
- {book.page_count && book.page_count > 0 && ( -
- + {/* Technical info (collapsible) */} +
+ + + + + Informations techniques + +
+ {book.file_path && ( +
+ Fichier + {book.file_path} +
+ )} + {book.file_format && ( +
+ Format fichier + {book.file_format.toUpperCase()} +
+ )} + {book.file_parse_status && ( +
+ Parsing + + {book.file_parse_status} + +
+ )} +
+ Book ID + {book.id} +
+
+ Library ID + {book.library_id} +
+ {book.updated_at && ( +
+ Mis à jour + {new Date(book.updated_at).toLocaleString()} +
+ )}
+
+ + {/* Book Preview */} + {book.page_count && book.page_count > 0 && ( + )} - +
); } diff --git a/apps/backoffice/app/books/page.tsx b/apps/backoffice/app/books/page.tsx index 00a8759..a4cf5bb 100644 --- a/apps/backoffice/app/books/page.tsx +++ b/apps/backoffice/app/books/page.tsx @@ -53,6 +53,9 @@ export default async function BooksPage({ reading_status: "unread" as const, reading_current_page: null, reading_last_read_at: null, + summary: null, + isbn: null, + publish_date: null, })); totalHits = searchResponse.estimated_total_hits; } diff --git a/apps/backoffice/app/components/EditBookForm.tsx b/apps/backoffice/app/components/EditBookForm.tsx index 48872a5..96e8154 100644 --- a/apps/backoffice/app/components/EditBookForm.tsx +++ b/apps/backoffice/app/components/EditBookForm.tsx @@ -6,6 +6,40 @@ import { useRouter } from "next/navigation"; import { BookDto } from "@/lib/api"; import { FormField, FormLabel, FormInput } from "./ui/Form"; +function LockButton({ + locked, + onToggle, + disabled, +}: { + locked: boolean; + onToggle: () => void; + disabled?: boolean; +}) { + return ( + + ); +} + interface EditBookFormProps { book: BookDto; } @@ -23,6 +57,14 @@ export function EditBookForm({ book }: EditBookFormProps) { const [series, setSeries] = useState(book.series ?? ""); const [volume, setVolume] = useState(book.volume?.toString() ?? ""); const [language, setLanguage] = useState(book.language ?? ""); + const [summary, setSummary] = useState(book.summary ?? ""); + const [isbn, setIsbn] = useState(book.isbn ?? ""); + const [publishDate, setPublishDate] = useState(book.publish_date ?? ""); + const [lockedFields, setLockedFields] = useState>(book.locked_fields ?? {}); + + const toggleLock = (field: string) => { + setLockedFields((prev) => ({ ...prev, [field]: !prev[field] })); + }; const addAuthor = () => { const v = authorInput.trim(); @@ -51,6 +93,10 @@ export function EditBookForm({ book }: EditBookFormProps) { setSeries(book.series ?? ""); setVolume(book.volume?.toString() ?? ""); setLanguage(book.language ?? ""); + setSummary(book.summary ?? ""); + setIsbn(book.isbn ?? ""); + setPublishDate(book.publish_date ?? ""); + setLockedFields(book.locked_fields ?? {}); setError(null); setIsOpen(false); }, [book]); @@ -85,6 +131,10 @@ export function EditBookForm({ book }: EditBookFormProps) { series: series.trim() || null, volume: volume.trim() ? parseInt(volume.trim(), 10) : null, language: language.trim() || null, + summary: summary.trim() || null, + isbn: isbn.trim() || null, + publish_date: publishDate.trim() || null, + locked_fields: lockedFields, }), }); if (!res.ok) { @@ -130,7 +180,10 @@ export function EditBookForm({ book }: EditBookFormProps) {
- Titre +
+ Titre + toggleLock("title")} disabled={isPending} /> +
setTitle(e.target.value)} @@ -141,7 +194,10 @@ export function EditBookForm({ book }: EditBookFormProps) { {/* Auteurs — multi-valeur */} - Auteur(s) +
+ Auteur(s) + toggleLock("authors")} disabled={isPending} /> +
{authors.length > 0 && (
@@ -187,7 +243,10 @@ export function EditBookForm({ book }: EditBookFormProps) { - Langue +
+ Langue + toggleLock("language")} disabled={isPending} /> +
setLanguage(e.target.value)} @@ -197,7 +256,10 @@ export function EditBookForm({ book }: EditBookFormProps) {
- Série +
+ Série + toggleLock("series")} disabled={isPending} /> +
setSeries(e.target.value)} @@ -207,7 +269,10 @@ export function EditBookForm({ book }: EditBookFormProps) {
- Volume +
+ Volume + toggleLock("volume")} disabled={isPending} /> +
+ + +
+ ISBN + toggleLock("isbn")} disabled={isPending} /> +
+ setIsbn(e.target.value)} + disabled={isPending} + placeholder="ISBN" + /> +
+ + +
+ Date de publication + toggleLock("publish_date")} disabled={isPending} /> +
+ setPublishDate(e.target.value)} + disabled={isPending} + placeholder="ex : 2023-01-15" + /> +
+ + +
+ Description + toggleLock("summary")} disabled={isPending} /> +
+