From 82294a1beed999fe4616bc61d45330658339570f Mon Sep 17 00:00:00 2001
From: Froidefond Julien <julienfroidefond@gmail.com>
Date: Thu, 5 Mar 2026 23:32:01 +0100
Subject: [PATCH] feat: change volume from string to integer type

Parser:
- Change volume type from Option<String> to Option<i32>
- Parse volume as integer to remove leading zeros
- Keep original title with volume info

Indexer:
- Update SQL queries to insert volume as integer
- Add volume column to INSERT and UPDATE statements

API:
- Change BookItem.volume and BookDetails.volume to Option<i32>
- Add natural sorting for books

Backoffice:
- Update volume type to number
- Update book detail page
- Add CSS styles
---
 Cargo.lock                              |  1 +
 apps/api/src/books.rs                   | 40 +++++++++++---
 apps/backoffice/app/books/[id]/page.tsx | 49 ++++++++++++++++-
 apps/backoffice/app/books/page.tsx      |  3 ++
 apps/backoffice/app/globals.css         | 23 ++++++++
 apps/backoffice/lib/api.ts              |  7 ++-
 apps/indexer/src/main.rs                | 14 ++---
 crates/parsers/Cargo.toml               |  1 +
 crates/parsers/src/lib.rs               | 70 ++++++++++++++++++++++++-
 9 files changed, 190 insertions(+), 18 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index d1a15f9..225b778 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1447,6 +1447,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "lopdf",
+ "regex",
  "zip 2.4.2",
 ]
 
diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs
index 3a49e73..216db31 100644
--- a/apps/api/src/books.rs
+++ b/apps/api/src/books.rs
@@ -31,7 +31,7 @@ pub struct BookItem {
     pub title: String,
     pub author: Option<String>,
     pub series: Option<String>,
-    pub volume: Option<String>,
+    pub volume: Option<i32>,
     pub language: Option<String>,
     pub page_count: Option<i32>,
     #[schema(value_type = String)]
@@ -55,7 +55,7 @@ pub struct BookDetails {
     pub title: String,
     pub author: Option<String>,
     pub series: Option<String>,
-    pub volume: Option<String>,
+    pub volume: Option<i32>,
     pub language: Option<String>,
     pub page_count: Option<i32>,
     pub file_path: Option<String>,
@@ -102,7 +102,16 @@ pub async fn list_books(
           AND ($2::text IS NULL OR kind = $2)
           AND ($3::uuid IS NULL OR id > $3)
           {}
-        ORDER BY id ASC
+        ORDER BY 
+            -- Extract text part before numbers (case insensitive)
+            REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'),
+            -- Extract first number group and convert to integer for numeric sort
+            COALESCE(
+                NULLIF(REGEXP_REPLACE(LOWER(title), '^[^0-9]*', '', 'g'), '')::int, 
+                0
+            ),
+            -- Then by full title as fallback
+            title ASC
         LIMIT $4
         "#,
         series_condition
@@ -235,11 +244,18 @@ pub async fn list_series(
 ) -> Result<Json<Vec<SeriesItem>>, ApiError> {
     let rows = sqlx::query(
         r#"
-        WITH series_books AS (
+        WITH sorted_books AS (
             SELECT 
                 COALESCE(NULLIF(series, ''), 'unclassified') as name,
                 id,
-                ROW_NUMBER() OVER (PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') ORDER BY id) as rn
+                -- Natural sort order for books within series
+                ROW_NUMBER() OVER (
+                    PARTITION BY COALESCE(NULLIF(series, ''), 'unclassified') 
+                    ORDER BY 
+                        REGEXP_REPLACE(LOWER(title), '[0-9]+', '', 'g'),
+                        COALESCE(NULLIF(REGEXP_REPLACE(LOWER(title), '^[^0-9]*', '', 'g'), '')::int, 0),
+                        title ASC
+                ) as rn
             FROM books
             WHERE library_id = $1
         ),
@@ -247,7 +263,7 @@ pub async fn list_series(
             SELECT 
                 name,
                 COUNT(*) as book_count
-            FROM series_books
+            FROM sorted_books
             GROUP BY name
         )
         SELECT 
@@ -255,8 +271,16 @@ pub async fn list_series(
             sc.book_count,
             sb.id as first_book_id
         FROM series_counts sc
-        JOIN series_books sb ON sb.name = sc.name AND sb.rn = 1
-        ORDER BY sc.name ASC
+        JOIN sorted_books sb ON sb.name = sc.name AND sb.rn = 1
+        ORDER BY 
+            -- Natural sort: extract text part before numbers
+            REGEXP_REPLACE(LOWER(sc.name), '[0-9]+', '', 'g'),
+            -- Extract first number group and convert to integer
+            COALESCE(
+                NULLIF(REGEXP_REPLACE(LOWER(sc.name), '^[^0-9]*', '', 'g'), '')::int, 
+                0
+            ),
+            sc.name ASC
         "#,
     )
     .bind(library_id)
diff --git a/apps/backoffice/app/books/[id]/page.tsx b/apps/backoffice/app/books/[id]/page.tsx
index fb11f4e..0d95739 100644
--- a/apps/backoffice/app/books/[id]/page.tsx
+++ b/apps/backoffice/app/books/[id]/page.tsx
@@ -68,6 +68,13 @@ export default async function BookDetailPage({
               <span className={`book-kind ${book.kind}`}>{book.kind.toUpperCase()}</span>
             </div>
             
+            {book.volume && (
+              <div className="meta-row">
+                <span className="meta-label">Volume:</span>
+                <span>{book.volume}</span>
+              </div>
+            )}
+            
             {book.language && (
               <div className="meta-row">
                 <span className="meta-label">Language:</span>
@@ -87,10 +94,50 @@ export default async function BookDetailPage({
               <span>{library?.name || book.library_id}</span>
             </div>
 
+            {book.series && (
+              <div className="meta-row">
+                <span className="meta-label">Series:</span>
+                <span>{book.series}</span>
+              </div>
+            )}
+
+            {book.file_format && (
+              <div className="meta-row">
+                <span className="meta-label">File Format:</span>
+                <span>{book.file_format.toUpperCase()}</span>
+              </div>
+            )}
+
+            {book.file_parse_status && (
+              <div className="meta-row">
+                <span className="meta-label">Parse Status:</span>
+                <span className={`status-${book.file_parse_status}`}>{book.file_parse_status}</span>
+              </div>
+            )}
+
+            {book.file_path && (
+              <div className="meta-row">
+                <span className="meta-label">File Path:</span>
+                <code className="file-path">{book.file_path}</code>
+              </div>
+            )}
+
             <div className="meta-row">
-              <span className="meta-label">ID:</span>
+              <span className="meta-label">Book ID:</span>
               <code className="book-id">{book.id}</code>
             </div>
+
+            <div className="meta-row">
+              <span className="meta-label">Library ID:</span>
+              <code className="book-id">{book.library_id}</code>
+            </div>
+
+            {book.updated_at && (
+              <div className="meta-row">
+                <span className="meta-label">Updated:</span>
+                <span>{new Date(book.updated_at).toLocaleString()}</span>
+              </div>
+            )}
           </div>
         </div>
       </div>
diff --git a/apps/backoffice/app/books/page.tsx b/apps/backoffice/app/books/page.tsx
index dfa57a4..fdb510c 100644
--- a/apps/backoffice/app/books/page.tsx
+++ b/apps/backoffice/app/books/page.tsx
@@ -36,6 +36,9 @@ export default async function BooksPage({
         volume: hit.volume,
         language: hit.language,
         page_count: null,
+        file_path: null,
+        file_format: null,
+        file_parse_status: null,
         updated_at: ""
       }));
       totalHits = searchResponse.estimated_total_hits;
diff --git a/apps/backoffice/app/globals.css b/apps/backoffice/app/globals.css
index be96f88..4da0009 100644
--- a/apps/backoffice/app/globals.css
+++ b/apps/backoffice/app/globals.css
@@ -706,3 +706,26 @@ button:hover {
 .dark .series-cover {
   background: linear-gradient(135deg, hsl(221 24% 20%), hsl(221 24% 15%));
 }
+
+/* Status badges */
+.status-ok {
+  color: hsl(142 60% 45%);
+  font-weight: 700;
+}
+
+.status-error {
+  color: hsl(2 72% 48%);
+  font-weight: 700;
+}
+
+.status-pending {
+  color: hsl(45 93% 47%);
+  font-weight: 700;
+}
+
+.file-path {
+  font-size: 0.8rem;
+  word-break: break-all;
+  max-width: 400px;
+  display: inline-block;
+}
diff --git a/apps/backoffice/lib/api.ts b/apps/backoffice/lib/api.ts
index def3402..06bf4a2 100644
--- a/apps/backoffice/lib/api.ts
+++ b/apps/backoffice/lib/api.ts
@@ -37,9 +37,12 @@ export type BookDto = {
   title: string;
   author: string | null;
   series: string | null;
-  volume: string | null;
+  volume: number | null;
   language: string | null;
   page_count: number | null;
+  file_path: string | null;
+  file_format: string | null;
+  file_parse_status: string | null;
   updated_at: string;
 };
 
@@ -54,7 +57,7 @@ export type SearchHitDto = {
   title: string;
   author: string | null;
   series: string | null;
-  volume: string | null;
+  volume: number | null;
   kind: string;
   language: string | null;
 };
diff --git a/apps/indexer/src/main.rs b/apps/indexer/src/main.rs
index ff0d08a..2b56ff7 100644
--- a/apps/indexer/src/main.rs
+++ b/apps/indexer/src/main.rs
@@ -231,12 +231,13 @@ async fn scan_library(
             match parse_metadata(path, format, root) {
                 Ok(parsed) => {
                     sqlx::query(
-                        "UPDATE books SET title = $2, kind = $3, series = $4, page_count = $5, updated_at = NOW() WHERE id = $1",
+                        "UPDATE books SET title = $2, kind = $3, series = $4, volume = $5, page_count = $6, updated_at = NOW() WHERE id = $1",
                     )
                     .bind(book_id)
-                    .bind(parsed.title)
+                    .bind(&parsed.title)
                     .bind(kind_from_format(format))
-                    .bind(parsed.series)
+                    .bind(&parsed.series)
+                    .bind(&parsed.volume)
                     .bind(parsed.page_count)
                     .execute(&state.pool)
                     .await?;
@@ -274,13 +275,14 @@ async fn scan_library(
                 let book_id = Uuid::new_v4();
                 let file_id = Uuid::new_v4();
                 sqlx::query(
-                    "INSERT INTO books (id, library_id, kind, title, series, page_count) VALUES ($1, $2, $3, $4, $5, $6)",
+                    "INSERT INTO books (id, library_id, kind, title, series, volume, page_count) VALUES ($1, $2, $3, $4, $5, $6, $7)",
                 )
                 .bind(book_id)
                 .bind(library_id)
                 .bind(kind_from_format(format))
-                .bind(parsed.title)
-                .bind(parsed.series)
+                .bind(&parsed.title)
+                .bind(&parsed.series)
+                .bind(&parsed.volume)
                 .bind(parsed.page_count)
                 .execute(&state.pool)
                 .await?;
diff --git a/crates/parsers/Cargo.toml b/crates/parsers/Cargo.toml
index b354cfd..fdb4452 100644
--- a/crates/parsers/Cargo.toml
+++ b/crates/parsers/Cargo.toml
@@ -7,4 +7,5 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 lopdf = "0.35"
+regex = "1"
 zip = { version = "2.2", default-features = false, features = ["deflate"] }
diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs
index ca3147e..2f04bcd 100644
--- a/crates/parsers/src/lib.rs
+++ b/crates/parsers/src/lib.rs
@@ -22,6 +22,7 @@ impl BookFormat {
 pub struct ParsedMetadata {
     pub title: String,
     pub series: Option<String>,
+    pub volume: Option<i32>,
     pub page_count: Option<i32>,
 }
 
@@ -40,11 +41,17 @@ pub fn parse_metadata(
     format: BookFormat,
     library_root: &Path,
 ) -> Result<ParsedMetadata> {
-    let title = path
+    let filename = path
         .file_stem()
         .map(|s| s.to_string_lossy().to_string())
         .unwrap_or_else(|| "Untitled".to_string());
 
+    // Extract volume from filename (patterns: T01, T02, Vol 1, Volume 1, #1, - 01, etc.)
+    let volume = extract_volume(&filename);
+
+    // Keep original filename as title (don't clean it)
+    let title = filename;
+
     // Determine series from parent folder relative to library root
     let series = path.parent().and_then(|parent| {
         // Get the relative path from library root to parent
@@ -69,10 +76,71 @@ pub fn parse_metadata(
     Ok(ParsedMetadata {
         title,
         series,
+        volume,
         page_count,
     })
 }
 
+fn extract_volume(filename: &str) -> Option<i32> {
+    // Common volume patterns: T01, T02, T1, T2, Vol 1, Vol. 1, Volume 1, #1, #01, - 1, - 01
+    let patterns = [
+        // T01, T02 pattern (most common for manga/comics)
+        (r"(?i)T(\d+)", 1),
+        // Vol 1, Vol. 1, Volume 1
+        (r"(?i)Vol\.?\s*(\d+)", 1),
+        (r"(?i)Volume\s*(\d+)", 1),
+        // #1, #01
+        (r"#(\d+)", 1),
+        // - 1, - 01 at the end
+        (r"-\s*(\d+)\s*$", 1),
+    ];
+
+    for (pattern, group) in &patterns {
+        if let Ok(re) = regex::Regex::new(pattern) {
+            if let Some(caps) = re.captures(filename) {
+                if let Some(mat) = caps.get(*group) {
+                    // Parse as integer to remove leading zeros
+                    return mat.as_str().parse::<i32>().ok();
+                }
+            }
+        }
+    }
+
+    None
+}
+
+#[allow(dead_code)]
+fn clean_title(filename: &str) -> String {
+    // Remove volume patterns from title to clean it up
+    let cleaned = regex::Regex::new(r"(?i)\s*T\d+\s*")
+        .ok()
+        .and_then(|re| Some(re.replace_all(filename, " ").to_string()))
+        .unwrap_or_else(|| filename.to_string());
+
+    let cleaned = regex::Regex::new(r"(?i)\s*Vol\.?\s*\d+\s*")
+        .ok()
+        .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string()))
+        .unwrap_or_else(|| cleaned);
+
+    let cleaned = regex::Regex::new(r"(?i)\s*Volume\s*\d+\s*")
+        .ok()
+        .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string()))
+        .unwrap_or_else(|| cleaned);
+
+    let cleaned = regex::Regex::new(r"#\d+")
+        .ok()
+        .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string()))
+        .unwrap_or_else(|| cleaned);
+
+    let cleaned = regex::Regex::new(r"-\s*\d+\s*$")
+        .ok()
+        .and_then(|re| Some(re.replace_all(&cleaned, " ").to_string()))
+        .unwrap_or_else(|| cleaned);
+
+    // Clean up extra spaces
+    cleaned.split_whitespace().collect::<Vec<_>>().join(" ")
+}
+
 fn parse_cbz_page_count(path: &Path) -> Result<i32> {
     let file = std::fs::File::open(path)
         .with_context(|| format!("cannot open cbz: {}", path.display()))?;