From 82294a1beed999fe4616bc61d45330658339570f Mon Sep 17 00:00:00 2001 From: Froidefond Julien Date: Thu, 5 Mar 2026 23:32:01 +0100 Subject: [PATCH] feat: change volume from string to integer type Parser: - Change volume type from Option to Option - Parse volume as integer to remove leading zeros - Keep original title with volume info Indexer: - Update SQL queries to insert volume as integer - Add volume column to INSERT and UPDATE statements API: - Change BookItem.volume and BookDetails.volume to Option - Add natural sorting for books Backoffice: - Update volume type to number - Update book detail page - Add CSS styles --- Cargo.lock | 1 + apps/api/src/books.rs | 40 +++++++++++--- apps/backoffice/app/books/[id]/page.tsx | 49 ++++++++++++++++- apps/backoffice/app/books/page.tsx | 3 ++ apps/backoffice/app/globals.css | 23 ++++++++ apps/backoffice/lib/api.ts | 7 ++- apps/indexer/src/main.rs | 14 ++--- crates/parsers/Cargo.toml | 1 + crates/parsers/src/lib.rs | 70 ++++++++++++++++++++++++- 9 files changed, 190 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d1a15f9..225b778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1447,6 +1447,7 @@ version = "0.1.0" dependencies = [ "anyhow", "lopdf", + "regex", "zip 2.4.2", ] diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs index 3a49e73..216db31 100644 --- a/apps/api/src/books.rs +++ b/apps/api/src/books.rs @@ -31,7 +31,7 @@ pub struct BookItem { pub title: String, pub author: Option, pub series: Option, - pub volume: Option, + pub volume: Option, pub language: Option, pub page_count: Option, #[schema(value_type = String)] @@ -55,7 +55,7 @@ pub struct BookDetails { pub title: String, pub author: Option, pub series: Option, - pub volume: Option, + pub volume: Option, pub language: Option, pub page_count: Option, pub file_path: Option, @@ -102,7 +102,16 @@ pub async fn list_books( AND ($2::text IS NULL OR kind = $2) AND ($3::uuid IS NULL OR id > $3) {} - ORDER BY id ASC + ORDER BY + -- Extract text part before numbers (case insensitive) + REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'), + -- Extract first number group and convert to integer for numeric sort + COALESCE( + NULLIF(REGEXP_REPLACE(LOWER(title), '^[^0-9]*', '', 'g'), '')::int, + 0 + ), + -- Then by full title as fallback + title ASC LIMIT $4 "#, series_condition @@ -235,11 +244,18 @@ pub async fn list_series( ) -> Result>, ApiError> { let rows = sqlx::query( r#" - WITH series_books AS ( + WITH sorted_books AS ( SELECT COALESCE(NULLIF(series, ''), 'unclassified') as name, id, - ROW_NUMBER() OVER (PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') ORDER BY id) as rn + -- Natural sort order for books within series + ROW_NUMBER() OVER ( + PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') + ORDER BY + REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'), + COALESCE(NULLIF(REGEXP_REPLACE(LOWER(title), '^[^0-9]*', '', 'g'), '')::int, 0), + title ASC + ) as rn FROM books WHERE library_id = $1 ), @@ -247,7 +263,7 @@ pub async fn list_series( SELECT name, COUNT(*) as book_count - FROM series_books + FROM sorted_books GROUP BY name ) SELECT @@ -255,8 +271,16 @@ pub async fn list_series( sc.book_count, sb.id as first_book_id FROM series_counts sc - JOIN series_books sb ON sb.name = sc.name AND sb.rn = 1 - ORDER BY sc.name ASC + JOIN sorted_books sb ON sb.name = sc.name AND sb.rn = 1 + ORDER BY + -- Natural sort: extract text part before numbers + REGEXP_REPLACE(LOWER(sc.name), '[0-9]+', '', 'g'), + -- Extract first number group and convert to integer + COALESCE( + NULLIF(REGEXP_REPLACE(LOWER(sc.name), '^[^0-9]*', '', 'g'), '')::int, + 0 + ), + sc.name ASC "#, ) .bind(library_id) diff --git a/apps/backoffice/app/books/[id]/page.tsx b/apps/backoffice/app/books/[id]/page.tsx index fb11f4e..0d95739 100644 --- a/apps/backoffice/app/books/[id]/page.tsx +++ b/apps/backoffice/app/books/[id]/page.tsx @@ -68,6 +68,13 @@ export default async function BookDetailPage({ {book.kind.toUpperCase()} + {book.volume && ( +
+ Volume: + {book.volume} +
+ )} + {book.language && (
Language: @@ -87,10 +94,50 @@ export default async function BookDetailPage({ {library?.name || book.library_id}
+ {book.series && ( +
+ Series: + {book.series} +
+ )} + + {book.file_format && ( +
+ File Format: + {book.file_format.toUpperCase()} +
+ )} + + {book.file_parse_status && ( +
+ Parse Status: + {book.file_parse_status} +
+ )} + + {book.file_path && ( +
+ File Path: + {book.file_path} +
+ )} +
- ID: + Book ID: {book.id}
+ +
+ Library ID: + {book.library_id} +
+ + {book.updated_at && ( +
+ Updated: + {new Date(book.updated_at).toLocaleString()} +
+ )} diff --git a/apps/backoffice/app/books/page.tsx b/apps/backoffice/app/books/page.tsx index dfa57a4..fdb510c 100644 --- a/apps/backoffice/app/books/page.tsx +++ b/apps/backoffice/app/books/page.tsx @@ -36,6 +36,9 @@ export default async function BooksPage({ volume: hit.volume, language: hit.language, page_count: null, + file_path: null, + file_format: null, + file_parse_status: null, updated_at: "" })); totalHits = searchResponse.estimated_total_hits; diff --git a/apps/backoffice/app/globals.css b/apps/backoffice/app/globals.css index be96f88..4da0009 100644 --- a/apps/backoffice/app/globals.css +++ b/apps/backoffice/app/globals.css @@ -706,3 +706,26 @@ button:hover { .dark .series-cover { background: linear-gradient(135deg, hsl(221 24% 20%), hsl(221 24% 15%)); } + +/* Status badges */ +.status-ok { + color: hsl(142 60% 45%); + font-weight: 700; +} + +.status-error { + color: hsl(2 72% 48%); + font-weight: 700; +} + +.status-pending { + color: hsl(45 93% 47%); + font-weight: 700; +} + +.file-path { + font-size: 0.8rem; + word-break: break-all; + max-width: 400px; + display: inline-block; +} diff --git a/apps/backoffice/lib/api.ts b/apps/backoffice/lib/api.ts index def3402..06bf4a2 100644 --- a/apps/backoffice/lib/api.ts +++ b/apps/backoffice/lib/api.ts @@ -37,9 +37,12 @@ export type BookDto = { title: string; author: string | null; series: string | null; - volume: string | null; + volume: number | null; language: string | null; page_count: number | null; + file_path: string | null; + file_format: string | null; + file_parse_status: string | null; updated_at: string; }; @@ -54,7 +57,7 @@ export type SearchHitDto = { title: string; author: string | null; series: string | null; - volume: string | null; + volume: number | null; kind: string; language: string | null; }; diff --git a/apps/indexer/src/main.rs b/apps/indexer/src/main.rs index ff0d08a..2b56ff7 100644 --- a/apps/indexer/src/main.rs +++ b/apps/indexer/src/main.rs @@ -231,12 +231,13 @@ async fn scan_library( match parse_metadata(path, format, root) { Ok(parsed) => { sqlx::query( - "UPDATE books SET title = $2, kind = $3, series = $4, page_count = $5, updated_at = NOW() WHERE id = $1", + "UPDATE books SET title = $2, kind = $3, series = $4, volume = $5, page_count = $6, updated_at = NOW() WHERE id = $1", ) .bind(book_id) - .bind(parsed.title) + .bind(&parsed.title) .bind(kind_from_format(format)) - .bind(parsed.series) + .bind(&parsed.series) + .bind(&parsed.volume) .bind(parsed.page_count) .execute(&state.pool) .await?; @@ -274,13 +275,14 @@ async fn scan_library( let book_id = Uuid::new_v4(); let file_id = Uuid::new_v4(); sqlx::query( - "INSERT INTO books (id, library_id, kind, title, series, page_count) VALUES ($1, $2, $3, $4, $5, $6)", + "INSERT INTO books (id, library_id, kind, title, series, volume, page_count) VALUES ($1, $2, $3, $4, $5, $6, $7)", ) .bind(book_id) .bind(library_id) .bind(kind_from_format(format)) - .bind(parsed.title) - .bind(parsed.series) + .bind(&parsed.title) + .bind(&parsed.series) + .bind(&parsed.volume) .bind(parsed.page_count) .execute(&state.pool) .await?; diff --git a/crates/parsers/Cargo.toml b/crates/parsers/Cargo.toml index b354cfd..fdb4452 100644 --- a/crates/parsers/Cargo.toml +++ b/crates/parsers/Cargo.toml @@ -7,4 +7,5 @@ license.workspace = true [dependencies] anyhow.workspace = true lopdf = "0.35" +regex = "1" zip = { version = "2.2", default-features = false, features = ["deflate"] } diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs index ca3147e..2f04bcd 100644 --- a/crates/parsers/src/lib.rs +++ b/crates/parsers/src/lib.rs @@ -22,6 +22,7 @@ impl BookFormat { pub struct ParsedMetadata { pub title: String, pub series: Option, + pub volume: Option, pub page_count: Option, } @@ -40,11 +41,17 @@ pub fn parse_metadata( format: BookFormat, library_root: &Path, ) -> Result { - let title = path + let filename = path .file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_else(|| "Untitled".to_string()); + // Extract volume from filename (patterns: T01, T02, Vol 1, Volume 1, #1, - 01, etc.) + let volume = extract_volume(&filename); + + // Keep original filename as title (don't clean it) + let title = filename; + // Determine series from parent folder relative to library root let series = path.parent().and_then(|parent| { // Get the relative path from library root to parent @@ -69,10 +76,71 @@ pub fn parse_metadata( Ok(ParsedMetadata { title, series, + volume, page_count, }) } +fn extract_volume(filename: &str) -> Option { + // Common volume patterns: T01, T02, T1, T2, Vol 1, Vol. 1, Volume 1, #1, #01, - 1, - 01 + let patterns = [ + // T01, T02 pattern (most common for manga/comics) + (r"(?i)T(\d+)", 1), + // Vol 1, Vol. 1, Volume 1 + (r"(?i)Vol\.?\s*(\d+)", 1), + (r"(?i)Volume\s*(\d+)", 1), + // #1, #01 + (r"#(\d+)", 1), + // - 1, - 01 at the end + (r"-\s*(\d+)\s*$", 1), + ]; + + for (pattern, group) in &patterns { + if let Ok(re) = regex::Regex::new(pattern) { + if let Some(caps) = re.captures(filename) { + if let Some(mat) = caps.get(*group) { + // Parse as integer to remove leading zeros + return mat.as_str().parse::().ok(); + } + } + } + } + + None +} + +#[allow(dead_code)] +fn clean_title(filename: &str) -> String { + // Remove volume patterns from title to clean it up + let cleaned = regex::Regex::new(r"(?i)\s*T\d+\s*") + .ok() + .and_then(|re| Some(re.replace_all(filename, " ").to_string())) + .unwrap_or_else(|| filename.to_string()); + + let cleaned = regex::Regex::new(r"(?i)\s*Vol\.?\s*\d+\s*") + .ok() + .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string())) + .unwrap_or_else(|| cleaned); + + let cleaned = regex::Regex::new(r"(?i)\s*Volume\s*\d+\s*") + .ok() + .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string())) + .unwrap_or_else(|| cleaned); + + let cleaned = regex::Regex::new(r"#\d+") + .ok() + .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string())) + .unwrap_or_else(|| cleaned); + + let cleaned = regex::Regex::new(r"-\s*\d+\s*$") + .ok() + .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string())) + .unwrap_or_else(|| cleaned); + + // Clean up extra spaces + cleaned.split_whitespace().collect::>().join(" ") +} + fn parse_cbz_page_count(path: &Path) -> Result { let file = std::fs::File::open(path) .with_context(|| format!("cannot open cbz: {}", path.display()))?;