diff --git a/apps/backoffice/app/api/libraries/[id]/reading-status-provider/route.ts b/apps/backoffice/app/api/libraries/[id]/reading-status-provider/route.ts new file mode 100644 index 0000000..58138bb --- /dev/null +++ b/apps/backoffice/app/api/libraries/[id]/reading-status-provider/route.ts @@ -0,0 +1,21 @@ +import { NextResponse } from "next/server"; +import { apiFetch } from "@/lib/api"; + +export async function PATCH( + request: Request, + { params }: { params: Promise<{ id: string }> } +) { + const { id } = await params; + try { + const body = await request.json(); + const data = await apiFetch(`/libraries/${id}/reading-status-provider`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + return NextResponse.json(data); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to update reading status provider"; + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/infra/migrations/0050_add_users.sql b/infra/migrations/0050_add_users.sql new file mode 100644 index 0000000..d4c1dc2 --- /dev/null +++ b/infra/migrations/0050_add_users.sql @@ -0,0 +1,36 @@ +CREATE TABLE users ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + username TEXT NOT NULL UNIQUE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Les tokens read ont un user_id obligatoire, les tokens admin NULL +ALTER TABLE api_tokens ADD COLUMN user_id UUID REFERENCES users(id) ON DELETE CASCADE; + +-- Rendre book_reading_progress par user +ALTER TABLE book_reading_progress DROP CONSTRAINT book_reading_progress_pkey; +ALTER TABLE book_reading_progress ADD COLUMN user_id UUID REFERENCES users(id) ON DELETE CASCADE; + +-- Créer un user par défaut si des données existantes doivent être migrées +INSERT INTO users (id, username) + SELECT '00000000-0000-0000-0000-000000000001', 'default' + WHERE EXISTS ( + SELECT 1 FROM book_reading_progress WHERE user_id IS NULL + UNION ALL + SELECT 1 FROM api_tokens WHERE scope = 'read' AND user_id IS NULL + ); + +-- Rattacher les anciennes progressions de lecture au user default +UPDATE book_reading_progress + SET user_id = '00000000-0000-0000-0000-000000000001' + WHERE user_id IS NULL; + +-- Rattacher les anciens tokens read au user default +UPDATE api_tokens + SET user_id = '00000000-0000-0000-0000-000000000001' + WHERE scope = 'read' AND user_id IS NULL; + +ALTER TABLE book_reading_progress ALTER COLUMN user_id SET NOT NULL; +ALTER TABLE book_reading_progress ADD PRIMARY KEY (book_id, user_id); +DROP INDEX IF EXISTS idx_book_reading_progress_status; +CREATE INDEX idx_book_reading_progress_status ON book_reading_progress(status, user_id); diff --git a/infra/migrations/0052_add_anilist_integration.sql b/infra/migrations/0052_add_anilist_integration.sql new file mode 100644 index 0000000..a4a2023 --- /dev/null +++ b/infra/migrations/0052_add_anilist_integration.sql @@ -0,0 +1,16 @@ +-- Add AniList sync support +ALTER TABLE libraries ADD COLUMN anilist_enabled BOOLEAN NOT NULL DEFAULT FALSE; + +CREATE TABLE anilist_series_links ( + library_id UUID NOT NULL REFERENCES libraries(id) ON DELETE CASCADE, + series_name TEXT NOT NULL, + anilist_id INTEGER NOT NULL, + anilist_title TEXT, + anilist_url TEXT, + status TEXT NOT NULL DEFAULT 'linked', -- 'linked' | 'synced' | 'error' + linked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + synced_at TIMESTAMPTZ, + PRIMARY KEY (library_id, series_name) +); + +CREATE INDEX idx_anilist_series_links_library ON anilist_series_links(library_id); diff --git a/infra/migrations/0053_add_reading_status_provider.sql b/infra/migrations/0053_add_reading_status_provider.sql new file mode 100644 index 0000000..9159ce7 --- /dev/null +++ b/infra/migrations/0053_add_reading_status_provider.sql @@ -0,0 +1,10 @@ +-- Replace anilist_enabled boolean with generic reading_status_provider +ALTER TABLE libraries ADD COLUMN reading_status_provider TEXT; +UPDATE libraries SET reading_status_provider = 'anilist' WHERE anilist_enabled = TRUE; +ALTER TABLE libraries DROP COLUMN anilist_enabled; + +-- Add provider column to anilist_series_links for future multi-provider support +ALTER TABLE anilist_series_links ADD COLUMN provider TEXT NOT NULL DEFAULT 'anilist'; +-- Update the primary key to include provider +ALTER TABLE anilist_series_links DROP CONSTRAINT anilist_series_links_pkey; +ALTER TABLE anilist_series_links ADD PRIMARY KEY (library_id, series_name, provider); diff --git a/infra/perf.sh b/infra/perf.sh new file mode 100755 index 0000000..451b371 --- /dev/null +++ b/infra/perf.sh @@ -0,0 +1,383 @@ +#!/usr/bin/env bash +# perf.sh — Performance benchmarks for Stripstream Librarian +# +# Measures: +# - Indexer: full rebuild phase durations (discovery / extracting_pages / generating_thumbnails) +# - Indexer: incremental rebuild speed (should skip unchanged dirs via mtime cache) +# - Indexer: thumbnail rebuild (generate missing) and regenerate (force all) +# - API: page render latency (cold + warm/cached), thumbnail fetch, books list, search +# +# Usage: +# BASE_API=http://localhost:7080 API_TOKEN=my-token bash infra/perf.sh +# +# Optional env: +# JOB_TIMEOUT seconds to wait for a job to complete (default 600) +# BENCH_N number of API requests per endpoint for latency measurement (default 10) +# LIBRARY_ID restrict rebuild jobs to a specific library UUID + +set -euo pipefail + +BASE_API="${BASE_API:-http://127.0.0.1:7080}" +TOKEN="${API_TOKEN:-stripstream-dev-bootstrap-token}" +JOB_TIMEOUT="${JOB_TIMEOUT:-600}" +BENCH_N="${BENCH_N:-10}" +LIBRARY_ID="${LIBRARY_ID:-}" +export BASE_API TOKEN + +# ─── colours ──────────────────────────────────────────────────────────────── + +BOLD="\033[1m"; RESET="\033[0m"; GREEN="\033[32m"; YELLOW="\033[33m"; CYAN="\033[36m"; RED="\033[31m" +header() { echo -e "\n${BOLD}${CYAN}▶ $*${RESET}"; } +ok() { echo -e " ${GREEN}✓${RESET} $*"; } +warn() { echo -e " ${YELLOW}⚠${RESET} $*"; } +fail() { echo -e " ${RED}✗${RESET} $*"; } +row() { printf " %-40s %s\n" "$1" "$2"; } + +# ─── helpers ──────────────────────────────────────────────────────────────── + +auth() { curl -fsS -H "Authorization: Bearer $TOKEN" "$@"; } + +# Wait for job to finish; print a dot every 2s. +wait_job() { + local job_id="$1" label="${2:-job}" waited=0 status + printf " waiting for %s ." "$label" + while true; do + status="$(auth "$BASE_API/index/jobs/$job_id" \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))")" + case "$status" in + success) echo " done"; return 0 ;; + failed) echo " FAILED"; fail "$label failed"; return 1 ;; + cancelled) echo " cancelled"; fail "$label was cancelled"; return 1 ;; + esac + if [ "$waited" -ge "$JOB_TIMEOUT" ]; then + echo " timeout"; fail "$label timed out after ${JOB_TIMEOUT}s (last: $status)"; return 1 + fi + printf "."; sleep 2; waited=$((waited + 2)) + done +} + +# Fetch /index/jobs/:id/details and pretty-print phase durations + throughput. +report_job() { + local job_id="$1" label="$2" + local details + details="$(auth "$BASE_API/index/jobs/$job_id")" + export PERF_DETAILS="$details" PERF_LABEL="$label" + python3 - <<'PY' +import json, os +from datetime import datetime, timezone + +def parse(s): + if not s: return None + # Handle both with and without microseconds + for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"): + try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc) + except ValueError: pass + return None + +d = json.loads(os.environ["PERF_DETAILS"]) +label = os.environ["PERF_LABEL"] + +started = parse(d.get("started_at")) +phase2 = parse(d.get("phase2_started_at")) +thumbs = parse(d.get("generating_thumbnails_started_at")) +finished = parse(d.get("finished_at")) +stats = d.get("stats_json") or {} +total_files = d.get("total_files") or 0 + +def secs(a, b): + if a and b: return (b - a).total_seconds() + return None + +def fmt(s): + if s is None: return "n/a" + if s < 1: return f"{s*1000:.0f}ms" + return f"{s:.1f}s" + +def tps(n, s): + if n and s and s > 0: return f"{n/s:.1f}/s" + return "n/a" + +t_total = secs(started, finished) +t_discover = secs(started, phase2) +t_extract = secs(phase2, thumbs) +t_thumbs = secs(thumbs, finished) +indexed = stats.get("indexed_files", 0) + +print(f" {'Total':38s} {fmt(t_total)}") +if t_discover is not None: + print(f" {' Phase 1 – discovery':38s} {fmt(t_discover)} ({tps(indexed, t_discover)} books indexed)") +if t_extract is not None: + print(f" {' Phase 2A – extracting_pages':38s} {fmt(t_extract)} ({tps(total_files, t_extract)} books/s)") +if t_thumbs is not None: + print(f" {' Phase 2B – generating_thumbnails':38s} {fmt(t_thumbs)} ({tps(total_files, t_thumbs)} thumbs/s)") +print(f" {' Files indexed':38s} {indexed} / {total_files}") +if stats.get("errors"): + print(f" {' Errors':38s} {stats['errors']}") +PY +} + +# Measure avg latency of a GET endpoint over N requests. +measure_latency() { + local label="$1" url="$2" n="${3:-$BENCH_N}" + local total=0 i + for i in $(seq 1 "$n"); do + local t + t=$(curl -s -o /dev/null -w '%{time_total}' -H "Authorization: Bearer $TOKEN" "$url") + total=$(python3 -c "print($total + $t)") + done + local avg_ms + avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))") + row "$label" "${avg_ms}ms (n=$n)" +} + +# Build optional library_id JSON fragment +lib_json() { + if [ -n "$LIBRARY_ID" ]; then echo "\"library_id\":\"$LIBRARY_ID\","; else echo ""; fi +} + +enqueue_rebuild() { + local full="${1:-false}" + auth -X POST -H "Content-Type: application/json" \ + -d "{$(lib_json)\"full\":$full}" \ + "$BASE_API/index/rebuild" \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" +} + +enqueue_thumb_rebuild() { + auth -X POST -H "Content-Type: application/json" \ + -d "{$(lib_json | sed 's/,$//')}" \ + "$BASE_API/index/thumbnails/rebuild" \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" +} + +enqueue_thumb_regen() { + auth -X POST -H "Content-Type: application/json" \ + -d "{$(lib_json | sed 's/,$//')}" \ + "$BASE_API/index/thumbnails/regenerate" \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" +} + +# ─── health check ──────────────────────────────────────────────────────────── + +header "Health" +curl -fsS "$BASE_API/health" >/dev/null && ok "API healthy" + +BOOKS_JSON="$(auth "$BASE_API/books")" +BOOK_COUNT="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('total',0))")" +FIRST_BOOK_ID="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')")" +ok "Books in index: $BOOK_COUNT" +if [ -n "$LIBRARY_ID" ]; then ok "Scoped to library: $LIBRARY_ID"; fi + +# ─── 1. full rebuild ───────────────────────────────────────────────────────── + +header "1 / Full Rebuild" +JOB_FULL="$(enqueue_rebuild true)" +ok "job $JOB_FULL" +wait_job "$JOB_FULL" "full rebuild" +report_job "$JOB_FULL" "full rebuild" + +# ─── 2. incremental rebuild (dirs unchanged → mtime skip) ─────────────────── + +header "2 / Incremental Rebuild (should be fast — mtime cache)" +JOB_INCR="$(enqueue_rebuild false)" +ok "job $JOB_INCR" +wait_job "$JOB_INCR" "incremental rebuild" +report_job "$JOB_INCR" "incremental rebuild" + +python3 - <<'PY' +import json, os +from datetime import datetime, timezone + +def parse(s): + if not s: return None + for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"): + try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc) + except ValueError: pass + return None + +full_id = os.environ.get("PERF_FULL_ID", "") +incr_id = os.environ.get("PERF_INCR_ID", "") +if not full_id or not incr_id: + exit(0) +PY +# Speedup ratio via env export +export PERF_FULL_ID="$JOB_FULL" PERF_INCR_ID="$JOB_INCR" +python3 - <<'PY' +import json, os, subprocess +from datetime import datetime, timezone + +def parse(s): + if not s: return None + for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"): + try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc) + except ValueError: pass + return None + +base = os.environ.get("BASE_API", "http://127.0.0.1:7080") +token = os.environ.get("TOKEN", "") + +import urllib.request + +def fetch(url): + req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"}) + with urllib.request.urlopen(req) as r: + return json.loads(r.read()) + +def duration(job_id): + d = fetch(f"{base}/index/jobs/{job_id}") + s = parse(d.get("started_at")) + f = parse(d.get("finished_at")) + if s and f: return (f - s).total_seconds() + return None + +t_full = duration(os.environ["PERF_FULL_ID"]) +t_incr = duration(os.environ["PERF_INCR_ID"]) + +if t_full and t_incr: + ratio = t_full / t_incr if t_incr > 0 else 0 + print(f" {'Speedup (full vs incremental)':38s} {ratio:.1f}x ({t_full:.1f}s → {t_incr:.1f}s)") +PY + +# ─── 3. thumbnail rebuild (generate missing) ───────────────────────────────── + +header "3 / Thumbnail Rebuild (generate missing only)" +JOB_TREB="$(enqueue_thumb_rebuild)" +ok "job $JOB_TREB" +wait_job "$JOB_TREB" "thumbnail rebuild" +report_job "$JOB_TREB" "thumbnail rebuild" + +# ─── 4. thumbnail regenerate (force all) ───────────────────────────────────── + +header "4 / Thumbnail Regenerate (force all)" +JOB_TREG="$(enqueue_thumb_regen)" +ok "job $JOB_TREG" +wait_job "$JOB_TREG" "thumbnail regenerate" +report_job "$JOB_TREG" "thumbnail regenerate" + +# ─── 5. API latency ────────────────────────────────────────────────────────── + +header "5 / API Latency (n=$BENCH_N requests each)" + +measure_latency "books list" "$BASE_API/books" +measure_latency "search (query)" "$BASE_API/search?q=marvel" + +if [ -n "$FIRST_BOOK_ID" ]; then + # Cold page render: clear cache between runs by using different params + measure_latency "page render (width=1080, webp)" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=1080" + + # Warm render: same URL repeated → should hit LRU cache + measure_latency "page render (warm/cached)" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=1080" + + measure_latency "thumbnail fetch" \ + "$BASE_API/books/$FIRST_BOOK_ID/thumbnail" +else + warn "No books found — skipping page/thumbnail latency tests" +fi + +# ─── 6. Page render deep-dive ──────────────────────────────────────────────── +# +# Tests what the refactoring touches: archive reading for each format. +# Uses width-cycling to bypass disk cache and measure real decode cost. +# Tests: per-format cold render, sequential pages, concurrent throughput. + +header "6 / Page Render Deep-Dive" + +if [ -z "$FIRST_BOOK_ID" ]; then + warn "No books found — skipping deep-dive" +else + + # Resolve one book per format (API may not support ?format= filter; graceful fallback) + resolve_book_by_format() { + local fmt="$1" + local id + id=$(auth "$BASE_API/books?format=$fmt&limit=1" 2>/dev/null \ + | python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')" 2>/dev/null || echo "") + echo "$id" + } + BOOK_CBZ=$(resolve_book_by_format cbz) + BOOK_CBR=$(resolve_book_by_format cbr) + BOOK_PDF=$(resolve_book_by_format pdf) + + # Cold render: cycle widths (480..487) across N requests so each misses disk cache + measure_latency_cold() { + local label="$1" book_id="$2" n="${3:-$BENCH_N}" + local total=0 i + for i in $(seq 1 "$n"); do + local w=$((480 + i)) # unique width → unique cache key + local t + t=$(curl -s -o /dev/null -w '%{time_total}' \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$book_id/pages/1?format=webp&quality=80&width=$w") + total=$(python3 -c "print($total + $t)") + done + local avg_ms + avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))") + row "$label" "${avg_ms}ms (cold, n=$n)" + } + + echo "" + echo " Cold render latency by format (cache-busted widths):" + [ -n "$BOOK_CBZ" ] && measure_latency_cold "CBZ page 1 (cold)" "$BOOK_CBZ" \ + || warn "No CBZ book found" + [ -n "$BOOK_CBR" ] && measure_latency_cold "CBR page 1 (cold)" "$BOOK_CBR" \ + || warn "No CBR book found" + [ -n "$BOOK_PDF" ] && measure_latency_cold "PDF page 1 (cold)" "$BOOK_PDF" \ + || warn "No PDF book found" + + # Warm render: same URL repeated → LRU / disk cache + echo "" + echo " Warm render (disk cache, same URL):" + # One cold request first, then N warm + curl -s -o /dev/null -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=600" >/dev/null + measure_latency "page render (warm/disk-cached)" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=600" + + # Sequential pages: measures archive open+close overhead across consecutive pages + echo "" + echo " Sequential pages (pages 1–10, same book, cold widths):" + SEQ_TOTAL=0 + for PAGE in $(seq 1 10); do + local_t=$(curl -s -o /dev/null -w '%{time_total}' \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/$PAGE?format=webp&quality=80&width=$((500 + PAGE))") + local_ms=$(python3 -c "print(round($local_t*1000, 1))") + SEQ_TOTAL=$(python3 -c "print($SEQ_TOTAL + $local_t)") + row " page $PAGE" "${local_ms}ms" + done + SEQ_AVG=$(python3 -c "print(round($SEQ_TOTAL / 10 * 1000, 1))") + row " avg (10 pages)" "${SEQ_AVG}ms" + + # Concurrent throughput: N requests in parallel → measures semaphore + CPU saturation + CONC_N="${CONC_N:-10}" + echo "" + echo " Concurrent rendering ($CONC_N simultaneous requests, cold widths):" + CONC_START=$(date +%s%3N) + PIDS=() + for i in $(seq 1 "$CONC_N"); do + curl -s -o /dev/null \ + -H "Authorization: Bearer $TOKEN" \ + "$BASE_API/books/$FIRST_BOOK_ID/pages/$i?format=webp&quality=80&width=$((550 + i))" & + PIDS+=($!) + done + for PID in "${PIDS[@]}"; do wait "$PID" 2>/dev/null || true; done + CONC_END=$(date +%s%3N) + CONC_MS=$((CONC_END - CONC_START)) + CONC_PER=$(python3 -c "print(round($CONC_MS / $CONC_N, 1))") + row " wall time (${CONC_N} pages in parallel)" "${CONC_MS}ms (~${CONC_PER}ms/page)" + +fi + +# ─── summary ───────────────────────────────────────────────────────────────── + +header "Summary" +ok "Full rebuild job: $JOB_FULL" +ok "Incremental rebuild job: $JOB_INCR" +ok "Thumbnail rebuild job: $JOB_TREB" +ok "Thumbnail regenerate job: $JOB_TREG" +echo -e "\n${BOLD}perf done${RESET}"