feat(books): ajouter le champ format en base et l'exposer dans l'API

- Migration 0020 : colonne format sur books, backfill depuis book_files - batch.rs / scanner.rs : l'indexer écrit le format dans books - books.rs : format dans BookItem + filtre ?format= dans list_books - perf_pages.sh : benchmarks par format CBZ/CBR/PDF Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 08:55:18 +01:00
parent 85e0945c9d
commit 5db2a7501b
5 changed files with 236 additions and 13 deletions
--- a/apps/api/src/books.rs
+++ b/apps/api/src/books.rs
@@ -13,6 +13,8 @@ pub struct ListBooksQuery {
    pub library_id: Option<Uuid>,
    #[schema(value_type = Option<String>)]
    pub kind: Option<String>,
+    #[schema(value_type = Option<String>, example = "cbz")]
+    pub format: Option<String>,
    #[schema(value_type = Option<String>)]
    pub series: Option<String>,
    #[schema(value_type = Option<String>, example = "unread,reading")]
@@ -30,6 +32,7 @@ pub struct BookItem {
    #[schema(value_type = String)]
    pub library_id: Uuid,
    pub kind: String,
+    pub format: Option<String>,
    pub title: String,
    pub author: Option<String>,
    pub series: Option<String>,
@@ -110,8 +113,8 @@ pub async fn list_books(
        s.split(',').map(|v| v.trim().to_string()).filter(|v| !v.is_empty()).collect()
    });

-    // Conditions partagées COUNT et DATA — $1=library_id $2=kind, puis optionnels
-    let mut p: usize = 2;
+    // Conditions partagées COUNT et DATA — $1=library_id $2=kind $3=format, puis optionnels
+    let mut p: usize = 3;
    let series_cond = match query.series.as_deref() {
        Some("unclassified") => "AND (b.series IS NULL OR b.series = '')".to_string(),
        Some(_) => { p += 1; format!("AND b.series = ${p}") }
@@ -126,6 +129,7 @@ pub async fn list_books(
           LEFT JOIN book_reading_progress brp ON brp.book_id = b.id
           WHERE ($1::uuid IS NULL OR b.library_id = $1)
             AND ($2::text IS NULL OR b.kind = $2)
+             AND ($3::text IS NULL OR b.format = $3)
             {series_cond}
             {rs_cond}"#
    );
@@ -135,7 +139,7 @@ pub async fn list_books(
    let offset_p = p + 2;
    let data_sql = format!(
        r#"
-        SELECT b.id, b.library_id, b.kind, b.title, b.author, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.updated_at,
+        SELECT b.id, b.library_id, b.kind, b.format, b.title, b.author, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.updated_at,
               COALESCE(brp.status, 'unread') AS reading_status,
               brp.current_page AS reading_current_page,
               brp.last_read_at AS reading_last_read_at
@@ -143,6 +147,7 @@ pub async fn list_books(
        LEFT JOIN book_reading_progress brp ON brp.book_id = b.id
        WHERE ($1::uuid IS NULL OR b.library_id = $1)
          AND ($2::text IS NULL OR b.kind = $2)
+          AND ($3::text IS NULL OR b.format = $3)
          {series_cond}
          {rs_cond}
        ORDER BY
@@ -158,10 +163,12 @@ pub async fn list_books(

    let mut count_builder = sqlx::query(&count_sql)
        .bind(query.library_id)
-        .bind(query.kind.as_deref());
+        .bind(query.kind.as_deref())
+        .bind(query.format.as_deref());
    let mut data_builder = sqlx::query(&data_sql)
        .bind(query.library_id)
-        .bind(query.kind.as_deref());
+        .bind(query.kind.as_deref())
+        .bind(query.format.as_deref());

    if let Some(s) = query.series.as_deref() {
        if s != "unclassified" {
@@ -190,6 +197,7 @@ pub async fn list_books(
                id: row.get("id"),
                library_id: row.get("library_id"),
                kind: row.get("kind"),
+                format: row.get("format"),
                title: row.get("title"),
                author: row.get("author"),
                series: row.get("series"),
--- a/apps/indexer/src/batch.rs
+++ b/apps/indexer/src/batch.rs
@@ -8,6 +8,7 @@ pub struct BookUpdate {
    pub book_id: Uuid,
    pub title: String,
    pub kind: String,
+    pub format: String,
    pub series: Option<String>,
    pub volume: Option<i32>,
    pub page_count: Option<i32>,
@@ -25,6 +26,7 @@ pub struct BookInsert {
    pub book_id: Uuid,
    pub library_id: Uuid,
    pub kind: String,
+    pub format: String,
    pub title: String,
    pub series: Option<String>,
    pub volume: Option<i32>,
@@ -70,22 +72,24 @@ pub async fn flush_all_batches(
        let book_ids: Vec<Uuid> = books_update.iter().map(|b| b.book_id).collect();
        let titles: Vec<String> = books_update.iter().map(|b| b.title.clone()).collect();
        let kinds: Vec<String> = books_update.iter().map(|b| b.kind.clone()).collect();
+        let formats: Vec<String> = books_update.iter().map(|b| b.format.clone()).collect();
        let series: Vec<Option<String>> = books_update.iter().map(|b| b.series.clone()).collect();
        let volumes: Vec<Option<i32>> = books_update.iter().map(|b| b.volume).collect();
        let page_counts: Vec<Option<i32>> = books_update.iter().map(|b| b.page_count).collect();
-        
+
        sqlx::query(
            r#"
-            UPDATE books SET 
+            UPDATE books SET
                title = data.title,
                kind = data.kind,
+                format = data.format,
                series = data.series,
                volume = data.volume,
                page_count = data.page_count,
                updated_at = NOW()
            FROM (
-                SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[], $4::text[], $5::int[], $6::int[])
-                AS t(book_id, title, kind, series, volume, page_count)
+                SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::int[], $7::int[])
+                AS t(book_id, title, kind, format, series, volume, page_count)
            ) AS data
            WHERE books.id = data.book_id
            "#
@@ -93,6 +97,7 @@ pub async fn flush_all_batches(
        .bind(&book_ids)
        .bind(&titles)
        .bind(&kinds)
+        .bind(&formats)
        .bind(&series)
        .bind(&volumes)
        .bind(&page_counts)
@@ -143,22 +148,24 @@ pub async fn flush_all_batches(
        let book_ids: Vec<Uuid> = books_insert.iter().map(|b| b.book_id).collect();
        let library_ids: Vec<Uuid> = books_insert.iter().map(|b| b.library_id).collect();
        let kinds: Vec<String> = books_insert.iter().map(|b| b.kind.clone()).collect();
+        let formats: Vec<String> = books_insert.iter().map(|b| b.format.clone()).collect();
        let titles: Vec<String> = books_insert.iter().map(|b| b.title.clone()).collect();
        let series: Vec<Option<String>> = books_insert.iter().map(|b| b.series.clone()).collect();
        let volumes: Vec<Option<i32>> = books_insert.iter().map(|b| b.volume).collect();
        let page_counts: Vec<Option<i32>> = books_insert.iter().map(|b| b.page_count).collect();
        let thumbnail_paths: Vec<Option<String>> = books_insert.iter().map(|b| b.thumbnail_path.clone()).collect();
-        
+
        sqlx::query(
            r#"
-            INSERT INTO books (id, library_id, kind, title, series, volume, page_count, thumbnail_path)
-            SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::text[], $6::int[], $7::int[], $8::text[])
-            AS t(id, library_id, kind, title, series, volume, page_count, thumbnail_path)
+            INSERT INTO books (id, library_id, kind, format, title, series, volume, page_count, thumbnail_path)
+            SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::text[], $6::text[], $7::int[], $8::int[], $9::text[])
+            AS t(id, library_id, kind, format, title, series, volume, page_count, thumbnail_path)
            "#
        )
        .bind(&book_ids)
        .bind(&library_ids)
        .bind(&kinds)
+        .bind(&formats)
        .bind(&titles)
        .bind(&series)
        .bind(&volumes)
--- a/apps/indexer/src/scanner.rs
+++ b/apps/indexer/src/scanner.rs
@@ -281,6 +281,7 @@ pub async fn scan_library_discovery(
                book_id,
                title: parsed.title,
                kind: utils::kind_from_format(format).to_string(),
+                format: format.as_str().to_string(),
                series: parsed.series,
                volume: parsed.volume,
                // Reset page_count so analyzer re-processes this book
@@ -335,6 +336,7 @@ pub async fn scan_library_discovery(
            book_id,
            library_id,
            kind: utils::kind_from_format(format).to_string(),
+            format: format.as_str().to_string(),
            title: parsed.title,
            series: parsed.series,
            volume: parsed.volume,
--- a/infra/migrations/0020_add_format_to_books.sql
+++ b/infra/migrations/0020_add_format_to_books.sql
@@ -0,0 +1,13 @@
+-- Add format column to books table (denormalized from book_files for easy API access)
+ALTER TABLE books ADD COLUMN IF NOT EXISTS format TEXT CHECK (format IN ('pdf', 'cbz', 'cbr'));
+
+-- Backfill from book_files (take the format of the most recent file per book)
+UPDATE books b
+SET format = bf.format
+FROM (
+    SELECT DISTINCT ON (book_id) book_id, format
+    FROM book_files
+    ORDER BY book_id, updated_at DESC
+) bf
+WHERE b.id = bf.book_id
+  AND b.format IS NULL;
--- a/infra/perf_pages.sh
+++ b/infra/perf_pages.sh
@@ -0,0 +1,193 @@
+#!/usr/bin/env bash
+# perf_pages.sh — Page render performance benchmark
+#
+# Tests archive reading performance:
+#   - Cold render latency per format (CBZ/CBR/PDF, cache-busted widths)
+#   - Warm render latency (disk cache hit)
+#   - Sequential pages 1-10 (archive open/close overhead)
+#   - Concurrent rendering throughput (N simultaneous requests)
+#
+# Usage:
+#   BASE_API=http://localhost:7080 API_TOKEN=my-token bash infra/perf_pages.sh
+#
+# Optional:
+#   BENCH_N    requests per latency measurement (default 10)
+#   CONC_N     concurrent requests for throughput test (default 10)
+#
+# Requires migration 0020 (format column on books table) for per-format tests.
+
+set -euo pipefail
+
+BASE_API="${BASE_API:-http://127.0.0.1:7080}"
+TOKEN="${API_TOKEN:-stripstream-dev-bootstrap-token}"
+BENCH_N="${BENCH_N:-10}"
+CONC_N="${CONC_N:-10}"
+export BASE_API TOKEN
+
+BOLD="\033[1m"; RESET="\033[0m"; GREEN="\033[32m"; YELLOW="\033[33m"; CYAN="\033[36m"
+header() { echo -e "\n${BOLD}${CYAN}▶ $*${RESET}"; }
+ok()     { echo -e "  ${GREEN}✓${RESET} $*"; }
+warn()   { echo -e "  ${YELLOW}⚠${RESET} $*"; }
+row()    { printf "  %-44s %s\n" "$1" "$2"; }
+
+auth()   { curl -fsS -H "Authorization: Bearer $TOKEN" "$@"; }
+now_ms() { python3 -c "import time; print(int(time.time()*1000))"; }
+
+# ─── health ──────────────────────────────────────────────────────────────────
+
+header "Health"
+curl -fsS "$BASE_API/health" >/dev/null && ok "API reachable"
+
+BOOKS_JSON="$(auth "$BASE_API/books?limit=100")"
+BOOK_COUNT="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('total',0))")"
+ok "Books in index: $BOOK_COUNT"
+
+if [ "$BOOK_COUNT" -eq 0 ]; then
+  echo "No books found — aborting"; exit 1
+fi
+
+# Default benchmark target: first book
+FIRST_BOOK_ID="$(echo "$BOOKS_JSON" | python3 -c "
+import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')
+")"
+FIRST_BOOK_FORMAT="$(echo "$BOOKS_JSON" | python3 -c "
+import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0].get('format') or '?' if items else '?')
+")"
+ok "Default target: $FIRST_BOOK_ID  (format: $FIRST_BOOK_FORMAT)"
+
+# One book per format — uses ?format= filter (requires migration 0020)
+find_book() {
+  local fmt="$1"
+  auth "$BASE_API/books?format=$fmt&limit=1" 2>/dev/null \
+    | python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')" \
+    2>/dev/null || echo ""
+}
+BOOK_CBZ=$(find_book cbz)
+BOOK_CBR=$(find_book cbr)
+BOOK_PDF=$(find_book pdf)
+
+[ -n "$BOOK_CBZ" ] && ok "CBZ sample: $BOOK_CBZ" || warn "No CBZ (run migration 0020 + rebuild?)"
+[ -n "$BOOK_CBR" ] && ok "CBR sample: $BOOK_CBR" || warn "No CBR"
+[ -n "$BOOK_PDF" ] && ok "PDF sample: $BOOK_PDF" || warn "No PDF"
+
+# ─── helpers ─────────────────────────────────────────────────────────────────
+
+# Cold render: cycle widths to bypass disk cache
+measure_cold() {
+  local label="$1" book_id="$2" page="${3:-1}" n="${4:-$BENCH_N}"
+  local total=0 i
+  for i in $(seq 1 "$n"); do
+    local w=$((480 + i))
+    local t
+    t=$(curl -s -o /dev/null -w '%{time_total}' \
+      -H "Authorization: Bearer $TOKEN" \
+      "$BASE_API/books/$book_id/pages/$page?format=webp&quality=80&width=$w")
+    total=$(python3 -c "print($total + $t)")
+  done
+  local avg_ms
+  avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))")
+  row "$label" "${avg_ms}ms  avg  (cold, n=$n)"
+}
+
+# Warm render: prime cache then measure
+measure_warm() {
+  local label="$1" book_id="$2" n="${3:-$BENCH_N}"
+  local url="$BASE_API/books/$book_id/pages/1?format=webp&quality=80&width=600"
+  curl -s -o /dev/null -H "Authorization: Bearer $TOKEN" "$url" >/dev/null
+  local total=0 i
+  for i in $(seq 1 "$n"); do
+    local t
+    t=$(curl -s -o /dev/null -w '%{time_total}' -H "Authorization: Bearer $TOKEN" "$url")
+    total=$(python3 -c "print($total + $t)")
+  done
+  local avg_ms
+  avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))")
+  row "$label" "${avg_ms}ms  avg  (warm/cached, n=$n)"
+}
+
+# ─── 1. Cold render by format ────────────────────────────────────────────────
+
+header "1 / Cold Render by Format  (cache-busted, n=$BENCH_N per format)"
+if [ -n "$BOOK_CBZ" ]; then
+  measure_cold "CBZ  page 1" "$BOOK_CBZ"
+else
+  warn "skip CBZ"
+fi
+if [ -n "$BOOK_CBR" ]; then
+  measure_cold "CBR  page 1" "$BOOK_CBR"
+else
+  warn "skip CBR"
+fi
+if [ -n "$BOOK_PDF" ]; then
+  measure_cold "PDF  page 1" "$BOOK_PDF"
+else
+  warn "skip PDF"
+fi
+
+# ─── 2. Warm render ──────────────────────────────────────────────────────────
+
+header "2 / Warm Render  (disk cache, n=$BENCH_N)"
+measure_warm "Default book  page 1  ($FIRST_BOOK_FORMAT)" "$FIRST_BOOK_ID"
+
+# ─── 3. Sequential pages ─────────────────────────────────────────────────────
+
+header "3 / Sequential Pages  (pages 1–10, default book, cold widths)"
+echo "  book: $FIRST_BOOK_ID  (format: $FIRST_BOOK_FORMAT)"
+SEQ_TOTAL=0
+for PAGE in $(seq 1 10); do
+  T=$(curl -s -o /dev/null -w '%{time_total}' \
+    -H "Authorization: Bearer $TOKEN" \
+    "$BASE_API/books/$FIRST_BOOK_ID/pages/$PAGE?format=webp&quality=80&width=$((500 + PAGE * 3))")
+  MS=$(python3 -c "print(round($T*1000, 1))")
+  SEQ_TOTAL=$(python3 -c "print($SEQ_TOTAL + $T)")
+  row "  page $PAGE" "${MS}ms"
+done
+SEQ_AVG=$(python3 -c "print(round($SEQ_TOTAL / 10 * 1000, 1))")
+echo "  ──────────────────────────────────────────────────"
+row "  avg (10 pages)" "${SEQ_AVG}ms"
+
+# ─── 4. Concurrent throughput ────────────────────────────────────────────────
+
+header "4 / Concurrent Throughput  ($CONC_N simultaneous requests)"
+echo "  book: $FIRST_BOOK_ID  (format: $FIRST_BOOK_FORMAT)"
+T_START=$(now_ms)
+PIDS=()
+for i in $(seq 1 "$CONC_N"); do
+  curl -s -o /dev/null \
+    -H "Authorization: Bearer $TOKEN" \
+    "$BASE_API/books/$FIRST_BOOK_ID/pages/$i?format=webp&quality=80&width=$((550 + i * 3))" &
+  PIDS+=($!)
+done
+for PID in "${PIDS[@]}"; do wait "$PID" 2>/dev/null || true; done
+T_END=$(now_ms)
+CONC_MS=$((T_END - T_START))
+CONC_PER=$(python3 -c "print(round($CONC_MS / $CONC_N, 1))")
+CONC_TPS=$(python3 -c "print(round($CONC_N / ($CONC_MS / 1000), 1))")
+
+row "  wall time  ($CONC_N pages in parallel)" "${CONC_MS}ms"
+row "  avg per page" "${CONC_PER}ms"
+row "  throughput" "${CONC_TPS} pages/s"
+
+# ─── 5. Format cross-check ───────────────────────────────────────────────────
+
+if [ -n "$BOOK_CBZ" ] && [ -n "$BOOK_CBR" ] && [ -n "$BOOK_PDF" ]; then
+  header "5 / Format Cross-Check  (5 pages each, cold)"
+  for PAIR in "CBZ:$BOOK_CBZ" "CBR:$BOOK_CBR" "PDF:$BOOK_PDF"; do
+    FMT="${PAIR%%:*}"
+    BID="${PAIR##*:}"
+    FMT_TOTAL=0
+    for PAGE in 1 2 3 4 5; do
+      T=$(curl -s -o /dev/null -w '%{time_total}' \
+        -H "Authorization: Bearer $TOKEN" \
+        "$BASE_API/books/$BID/pages/$PAGE?format=webp&quality=80&width=$((490 + PAGE * 7))")
+      FMT_TOTAL=$(python3 -c "print($FMT_TOTAL + $T)")
+    done
+    AVG=$(python3 -c "print(round($FMT_TOTAL / 5 * 1000, 1))")
+    row "$FMT  avg pages 1-5  (cold)" "${AVG}ms"
+  done
+fi
+
+# ─── done ────────────────────────────────────────────────────────────────────
+
+header "Done"
+echo -e "  Run again after the parsers refactoring to compare.\n"