diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs index 76d1572..9fce20c 100644 --- a/apps/api/src/books.rs +++ b/apps/api/src/books.rs @@ -13,6 +13,8 @@ pub struct ListBooksQuery { pub library_id: Option, #[schema(value_type = Option)] pub kind: Option, + #[schema(value_type = Option, example = "cbz")] + pub format: Option, #[schema(value_type = Option)] pub series: Option, #[schema(value_type = Option, example = "unread,reading")] @@ -30,6 +32,7 @@ pub struct BookItem { #[schema(value_type = String)] pub library_id: Uuid, pub kind: String, + pub format: Option, pub title: String, pub author: Option, pub series: Option, @@ -110,8 +113,8 @@ pub async fn list_books( s.split(',').map(|v| v.trim().to_string()).filter(|v| !v.is_empty()).collect() }); - // Conditions partagées COUNT et DATA — $1=library_id $2=kind, puis optionnels - let mut p: usize = 2; + // Conditions partagées COUNT et DATA — $1=library_id $2=kind $3=format, puis optionnels + let mut p: usize = 3; let series_cond = match query.series.as_deref() { Some("unclassified") => "AND (b.series IS NULL OR b.series = '')".to_string(), Some(_) => { p += 1; format!("AND b.series = ${p}") } @@ -126,6 +129,7 @@ pub async fn list_books( LEFT JOIN book_reading_progress brp ON brp.book_id = b.id WHERE ($1::uuid IS NULL OR b.library_id = $1) AND ($2::text IS NULL OR b.kind = $2) + AND ($3::text IS NULL OR b.format = $3) {series_cond} {rs_cond}"# ); @@ -135,7 +139,7 @@ pub async fn list_books( let offset_p = p + 2; let data_sql = format!( r#" - SELECT b.id, b.library_id, b.kind, b.title, b.author, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.updated_at, + SELECT b.id, b.library_id, b.kind, b.format, b.title, b.author, b.series, b.volume, b.language, b.page_count, b.thumbnail_path, b.updated_at, COALESCE(brp.status, 'unread') AS reading_status, brp.current_page AS reading_current_page, brp.last_read_at AS reading_last_read_at @@ -143,6 +147,7 @@ pub async fn list_books( LEFT JOIN book_reading_progress brp ON brp.book_id = b.id WHERE ($1::uuid IS NULL OR b.library_id = $1) AND ($2::text IS NULL OR b.kind = $2) + AND ($3::text IS NULL OR b.format = $3) {series_cond} {rs_cond} ORDER BY @@ -158,10 +163,12 @@ pub async fn list_books( let mut count_builder = sqlx::query(&count_sql) .bind(query.library_id) - .bind(query.kind.as_deref()); + .bind(query.kind.as_deref()) + .bind(query.format.as_deref()); let mut data_builder = sqlx::query(&data_sql) .bind(query.library_id) - .bind(query.kind.as_deref()); + .bind(query.kind.as_deref()) + .bind(query.format.as_deref()); if let Some(s) = query.series.as_deref() { if s != "unclassified" { @@ -190,6 +197,7 @@ pub async fn list_books( id: row.get("id"), library_id: row.get("library_id"), kind: row.get("kind"), + format: row.get("format"), title: row.get("title"), author: row.get("author"), series: row.get("series"), diff --git a/apps/indexer/src/batch.rs b/apps/indexer/src/batch.rs index 0015aef..7da22ee 100644 --- a/apps/indexer/src/batch.rs +++ b/apps/indexer/src/batch.rs @@ -8,6 +8,7 @@ pub struct BookUpdate { pub book_id: Uuid, pub title: String, pub kind: String, + pub format: String, pub series: Option, pub volume: Option, pub page_count: Option, @@ -25,6 +26,7 @@ pub struct BookInsert { pub book_id: Uuid, pub library_id: Uuid, pub kind: String, + pub format: String, pub title: String, pub series: Option, pub volume: Option, @@ -70,22 +72,24 @@ pub async fn flush_all_batches( let book_ids: Vec = books_update.iter().map(|b| b.book_id).collect(); let titles: Vec = books_update.iter().map(|b| b.title.clone()).collect(); let kinds: Vec = books_update.iter().map(|b| b.kind.clone()).collect(); + let formats: Vec = books_update.iter().map(|b| b.format.clone()).collect(); let series: Vec> = books_update.iter().map(|b| b.series.clone()).collect(); let volumes: Vec> = books_update.iter().map(|b| b.volume).collect(); let page_counts: Vec> = books_update.iter().map(|b| b.page_count).collect(); - + sqlx::query( r#" - UPDATE books SET + UPDATE books SET title = data.title, kind = data.kind, + format = data.format, series = data.series, volume = data.volume, page_count = data.page_count, updated_at = NOW() FROM ( - SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[], $4::text[], $5::int[], $6::int[]) - AS t(book_id, title, kind, series, volume, page_count) + SELECT * FROM UNNEST($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::int[], $7::int[]) + AS t(book_id, title, kind, format, series, volume, page_count) ) AS data WHERE books.id = data.book_id "# @@ -93,6 +97,7 @@ pub async fn flush_all_batches( .bind(&book_ids) .bind(&titles) .bind(&kinds) + .bind(&formats) .bind(&series) .bind(&volumes) .bind(&page_counts) @@ -143,22 +148,24 @@ pub async fn flush_all_batches( let book_ids: Vec = books_insert.iter().map(|b| b.book_id).collect(); let library_ids: Vec = books_insert.iter().map(|b| b.library_id).collect(); let kinds: Vec = books_insert.iter().map(|b| b.kind.clone()).collect(); + let formats: Vec = books_insert.iter().map(|b| b.format.clone()).collect(); let titles: Vec = books_insert.iter().map(|b| b.title.clone()).collect(); let series: Vec> = books_insert.iter().map(|b| b.series.clone()).collect(); let volumes: Vec> = books_insert.iter().map(|b| b.volume).collect(); let page_counts: Vec> = books_insert.iter().map(|b| b.page_count).collect(); let thumbnail_paths: Vec> = books_insert.iter().map(|b| b.thumbnail_path.clone()).collect(); - + sqlx::query( r#" - INSERT INTO books (id, library_id, kind, title, series, volume, page_count, thumbnail_path) - SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::text[], $6::int[], $7::int[], $8::text[]) - AS t(id, library_id, kind, title, series, volume, page_count, thumbnail_path) + INSERT INTO books (id, library_id, kind, format, title, series, volume, page_count, thumbnail_path) + SELECT * FROM UNNEST($1::uuid[], $2::uuid[], $3::text[], $4::text[], $5::text[], $6::text[], $7::int[], $8::int[], $9::text[]) + AS t(id, library_id, kind, format, title, series, volume, page_count, thumbnail_path) "# ) .bind(&book_ids) .bind(&library_ids) .bind(&kinds) + .bind(&formats) .bind(&titles) .bind(&series) .bind(&volumes) diff --git a/apps/indexer/src/scanner.rs b/apps/indexer/src/scanner.rs index 30deccf..3c15735 100644 --- a/apps/indexer/src/scanner.rs +++ b/apps/indexer/src/scanner.rs @@ -281,6 +281,7 @@ pub async fn scan_library_discovery( book_id, title: parsed.title, kind: utils::kind_from_format(format).to_string(), + format: format.as_str().to_string(), series: parsed.series, volume: parsed.volume, // Reset page_count so analyzer re-processes this book @@ -335,6 +336,7 @@ pub async fn scan_library_discovery( book_id, library_id, kind: utils::kind_from_format(format).to_string(), + format: format.as_str().to_string(), title: parsed.title, series: parsed.series, volume: parsed.volume, diff --git a/infra/migrations/0020_add_format_to_books.sql b/infra/migrations/0020_add_format_to_books.sql new file mode 100644 index 0000000..3f888f6 --- /dev/null +++ b/infra/migrations/0020_add_format_to_books.sql @@ -0,0 +1,13 @@ +-- Add format column to books table (denormalized from book_files for easy API access) +ALTER TABLE books ADD COLUMN IF NOT EXISTS format TEXT CHECK (format IN ('pdf', 'cbz', 'cbr')); + +-- Backfill from book_files (take the format of the most recent file per book) +UPDATE books b +SET format = bf.format +FROM ( + SELECT DISTINCT ON (book_id) book_id, format + FROM book_files + ORDER BY book_id, updated_at DESC +) bf +WHERE b.id = bf.book_id + AND b.format IS NULL; diff --git a/infra/perf_pages.sh b/infra/perf_pages.sh new file mode 100644 index 0000000..af84a75 --- /dev/null +++ b/infra/perf_pages.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# perf_pages.sh — Page render performance benchmark +# +# Tests archive reading performance: +# - Cold render latency per format (CBZ/CBR/PDF, cache-busted widths) +# - Warm render latency (disk cache hit) +# - Sequential pages 1-10 (archive open/close overhead) +# - Concurrent rendering throughput (N simultaneous requests) +# +# Usage: +# BASE_API=http://localhost:7080 API_TOKEN=my-token bash infra/perf_pages.sh +# +# Optional: +# BENCH_N requests per latency measurement (default 10) +# CONC_N concurrent requests for throughput test (default 10) +# +# Requires migration 0020 (format column on books table) for per-format tests. + +set -euo pipefail + +BASE_API="${BASE_API:-http://127.0.0.1:7080}" +TOKEN="${API_TOKEN:-stripstream-dev-bootstrap-token}" +BENCH_N="${BENCH_N:-10}" +CONC_N="${CONC_N:-10}" +export BASE_API TOKEN + +BOLD="\033[1m"; RESET="\033[0m"; GREEN="\033[32m"; YELLOW="\033[33m"; CYAN="\033[36m" +header() { echo -e "\n${BOLD}${CYAN}▶ $*${RESET}"; } +ok() { echo -e " ${GREEN}✓${RESET} $*"; } +warn() { echo -e " ${YELLOW}⚠${RESET} $*"; } +row() { printf " %-44s %s\n" "$1" "$2"; } + +auth() { curl -fsS -H "Authorization: Bearer $TOKEN" "$@"; } +now_ms() { python3 -c "import time; print(int(time.time()*1000))"; } + +# ─── health ────────────────────────────────────────────────────────────────── + +header "Health" +curl -fsS "$BASE_API/health" >/dev/null && ok "API reachable" + +BOOKS_JSON="$(auth "$BASE_API/books?limit=100")" +BOOK_COUNT="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; print(json.load(sys.stdin).get('total',0))")" +ok "Books in index: $BOOK_COUNT" + +if [ "$BOOK_COUNT" -eq 0 ]; then + echo "No books found — aborting"; exit 1 +fi + +# Default benchmark target: first book +FIRST_BOOK_ID="$(echo "$BOOKS_JSON" | python3 -c " +import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '') +")" +FIRST_BOOK_FORMAT="$(echo "$BOOKS_JSON" | python3 -c " +import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0].get('format') or '?' if items else '?') +")" +ok "Default target: $FIRST_BOOK_ID (format: $FIRST_BOOK_FORMAT)" + +# One book per format — uses ?format= filter (requires migration 0020) +find_book() { + local fmt="$1" + auth "$BASE_API/books?format=$fmt&limit=1" 2>/dev/null \ + | python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')" \ + 2>/dev/null || echo "" +} +BOOK_CBZ=$(find_book cbz) +BOOK_CBR=$(find_book cbr) +BOOK_PDF=$(find_book pdf) + +[ -n "$BOOK_CBZ" ] && ok "CBZ sample: $BOOK_CBZ" || warn "No CBZ (run migration 0020 + rebuild?)" +[ -n "$BOOK_CBR" ] && ok "CBR sample: $BOOK_CBR" || warn "No CBR" +[ -n "$BOOK_PDF" ] && ok "PDF sample: $BOOK_PDF" || warn "No PDF" + +# ─── helpers ───────────────────────────────────────────────────────────────── + +# Cold render: cycle widths to bypass disk cache +measure_cold() { + local label="$1" book_id="$2" page="${3:-1}" n="${4:-$BENCH_N}" + local total=0 i + for i in $(seq 1 "$n"); do + local w=$((480 + i)) + local t + t=$(curl -s -o /dev/null -w '%{time_total}' \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$book_id/pages/$page?format=webp&quality=80&width=$w") + total=$(python3 -c "print($total + $t)") + done + local avg_ms + avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))") + row "$label" "${avg_ms}ms avg (cold, n=$n)" +} + +# Warm render: prime cache then measure +measure_warm() { + local label="$1" book_id="$2" n="${3:-$BENCH_N}" + local url="$BASE_API/books/$book_id/pages/1?format=webp&quality=80&width=600" + curl -s -o /dev/null -H "Authorization: Bearer $TOKEN" "$url" >/dev/null + local total=0 i + for i in $(seq 1 "$n"); do + local t + t=$(curl -s -o /dev/null -w '%{time_total}' -H "Authorization: Bearer $TOKEN" "$url") + total=$(python3 -c "print($total + $t)") + done + local avg_ms + avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))") + row "$label" "${avg_ms}ms avg (warm/cached, n=$n)" +} + +# ─── 1. Cold render by format ──────────────────────────────────────────────── + +header "1 / Cold Render by Format (cache-busted, n=$BENCH_N per format)" +if [ -n "$BOOK_CBZ" ]; then + measure_cold "CBZ page 1" "$BOOK_CBZ" +else + warn "skip CBZ" +fi +if [ -n "$BOOK_CBR" ]; then + measure_cold "CBR page 1" "$BOOK_CBR" +else + warn "skip CBR" +fi +if [ -n "$BOOK_PDF" ]; then + measure_cold "PDF page 1" "$BOOK_PDF" +else + warn "skip PDF" +fi + +# ─── 2. Warm render ────────────────────────────────────────────────────────── + +header "2 / Warm Render (disk cache, n=$BENCH_N)" +measure_warm "Default book page 1 ($FIRST_BOOK_FORMAT)" "$FIRST_BOOK_ID" + +# ─── 3. Sequential pages ───────────────────────────────────────────────────── + +header "3 / Sequential Pages (pages 1–10, default book, cold widths)" +echo " book: $FIRST_BOOK_ID (format: $FIRST_BOOK_FORMAT)" +SEQ_TOTAL=0 +for PAGE in $(seq 1 10); do + T=$(curl -s -o /dev/null -w '%{time_total}' \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/$PAGE?format=webp&quality=80&width=$((500 + PAGE * 3))") + MS=$(python3 -c "print(round($T*1000, 1))") + SEQ_TOTAL=$(python3 -c "print($SEQ_TOTAL + $T)") + row " page $PAGE" "${MS}ms" +done +SEQ_AVG=$(python3 -c "print(round($SEQ_TOTAL / 10 * 1000, 1))") +echo " ──────────────────────────────────────────────────" +row " avg (10 pages)" "${SEQ_AVG}ms" + +# ─── 4. Concurrent throughput ──────────────────────────────────────────────── + +header "4 / Concurrent Throughput ($CONC_N simultaneous requests)" +echo " book: $FIRST_BOOK_ID (format: $FIRST_BOOK_FORMAT)" +T_START=$(now_ms) +PIDS=() +for i in $(seq 1 "$CONC_N"); do + curl -s -o /dev/null \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/$i?format=webp&quality=80&width=$((550 + i * 3))" & + PIDS+=($!) +done +for PID in "${PIDS[@]}"; do wait "$PID" 2>/dev/null || true; done +T_END=$(now_ms) +CONC_MS=$((T_END - T_START)) +CONC_PER=$(python3 -c "print(round($CONC_MS / $CONC_N, 1))") +CONC_TPS=$(python3 -c "print(round($CONC_N / ($CONC_MS / 1000), 1))") + +row " wall time ($CONC_N pages in parallel)" "${CONC_MS}ms" +row " avg per page" "${CONC_PER}ms" +row " throughput" "${CONC_TPS} pages/s" + +# ─── 5. Format cross-check ─────────────────────────────────────────────────── + +if [ -n "$BOOK_CBZ" ] && [ -n "$BOOK_CBR" ] && [ -n "$BOOK_PDF" ]; then + header "5 / Format Cross-Check (5 pages each, cold)" + for PAIR in "CBZ:$BOOK_CBZ" "CBR:$BOOK_CBR" "PDF:$BOOK_PDF"; do + FMT="${PAIR%%:*}" + BID="${PAIR##*:}" + FMT_TOTAL=0 + for PAGE in 1 2 3 4 5; do + T=$(curl -s -o /dev/null -w '%{time_total}' \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$BID/pages/$PAGE?format=webp&quality=80&width=$((490 + PAGE * 7))") + FMT_TOTAL=$(python3 -c "print($FMT_TOTAL + $T)") + done + AVG=$(python3 -c "print(round($FMT_TOTAL / 5 * 1000, 1))") + row "$FMT avg pages 1-5 (cold)" "${AVG}ms" + done +fi + +# ─── done ──────────────────────────────────────────────────────────────────── + +header "Done" +echo -e " Run again after the parsers refactoring to compare.\n"