Files
stripstream-librarian/infra/perf.sh
Froidefond Julien fd0f57824d
All checks were successful
Deploy with Docker Compose / deploy (push) Successful in 6s
chore: add missing migrations and routes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 17:35:49 +01:00

384 lines
15 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# perf.sh — Performance benchmarks for Stripstream Librarian
#
# Measures:
# - Indexer: full rebuild phase durations (discovery / extracting_pages / generating_thumbnails)
# - Indexer: incremental rebuild speed (should skip unchanged dirs via mtime cache)
# - Indexer: thumbnail rebuild (generate missing) and regenerate (force all)
# - API: page render latency (cold + warm/cached), thumbnail fetch, books list, search
#
# Usage:
# BASE_API=http://localhost:7080 API_TOKEN=my-token bash infra/perf.sh
#
# Optional env:
# JOB_TIMEOUT seconds to wait for a job to complete (default 600)
# BENCH_N number of API requests per endpoint for latency measurement (default 10)
# LIBRARY_ID restrict rebuild jobs to a specific library UUID
set -euo pipefail
BASE_API="${BASE_API:-http://127.0.0.1:7080}"
TOKEN="${API_TOKEN:-stripstream-dev-bootstrap-token}"
JOB_TIMEOUT="${JOB_TIMEOUT:-600}"
BENCH_N="${BENCH_N:-10}"
LIBRARY_ID="${LIBRARY_ID:-}"
export BASE_API TOKEN
# ─── colours ────────────────────────────────────────────────────────────────
BOLD="\033[1m"; RESET="\033[0m"; GREEN="\033[32m"; YELLOW="\033[33m"; CYAN="\033[36m"; RED="\033[31m"
header() { echo -e "\n${BOLD}${CYAN}$*${RESET}"; }
ok() { echo -e " ${GREEN}${RESET} $*"; }
warn() { echo -e " ${YELLOW}${RESET} $*"; }
fail() { echo -e " ${RED}${RESET} $*"; }
row() { printf " %-40s %s\n" "$1" "$2"; }
# ─── helpers ────────────────────────────────────────────────────────────────
auth() { curl -fsS -H "Authorization: Bearer $TOKEN" "$@"; }
# Wait for job to finish; print a dot every 2s.
wait_job() {
local job_id="$1" label="${2:-job}" waited=0 status
printf " waiting for %s ." "$label"
while true; do
status="$(auth "$BASE_API/index/jobs/$job_id" \
| python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))")"
case "$status" in
success) echo " done"; return 0 ;;
failed) echo " FAILED"; fail "$label failed"; return 1 ;;
cancelled) echo " cancelled"; fail "$label was cancelled"; return 1 ;;
esac
if [ "$waited" -ge "$JOB_TIMEOUT" ]; then
echo " timeout"; fail "$label timed out after ${JOB_TIMEOUT}s (last: $status)"; return 1
fi
printf "."; sleep 2; waited=$((waited + 2))
done
}
# Fetch /index/jobs/:id/details and pretty-print phase durations + throughput.
report_job() {
local job_id="$1" label="$2"
local details
details="$(auth "$BASE_API/index/jobs/$job_id")"
export PERF_DETAILS="$details" PERF_LABEL="$label"
python3 - <<'PY'
import json, os
from datetime import datetime, timezone
def parse(s):
if not s: return None
# Handle both with and without microseconds
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"):
try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError: pass
return None
d = json.loads(os.environ["PERF_DETAILS"])
label = os.environ["PERF_LABEL"]
started = parse(d.get("started_at"))
phase2 = parse(d.get("phase2_started_at"))
thumbs = parse(d.get("generating_thumbnails_started_at"))
finished = parse(d.get("finished_at"))
stats = d.get("stats_json") or {}
total_files = d.get("total_files") or 0
def secs(a, b):
if a and b: return (b - a).total_seconds()
return None
def fmt(s):
if s is None: return "n/a"
if s < 1: return f"{s*1000:.0f}ms"
return f"{s:.1f}s"
def tps(n, s):
if n and s and s > 0: return f"{n/s:.1f}/s"
return "n/a"
t_total = secs(started, finished)
t_discover = secs(started, phase2)
t_extract = secs(phase2, thumbs)
t_thumbs = secs(thumbs, finished)
indexed = stats.get("indexed_files", 0)
print(f" {'Total':38s} {fmt(t_total)}")
if t_discover is not None:
print(f" {' Phase 1 discovery':38s} {fmt(t_discover)} ({tps(indexed, t_discover)} books indexed)")
if t_extract is not None:
print(f" {' Phase 2A extracting_pages':38s} {fmt(t_extract)} ({tps(total_files, t_extract)} books/s)")
if t_thumbs is not None:
print(f" {' Phase 2B generating_thumbnails':38s} {fmt(t_thumbs)} ({tps(total_files, t_thumbs)} thumbs/s)")
print(f" {' Files indexed':38s} {indexed} / {total_files}")
if stats.get("errors"):
print(f" {' Errors':38s} {stats['errors']}")
PY
}
# Measure avg latency of a GET endpoint over N requests.
measure_latency() {
local label="$1" url="$2" n="${3:-$BENCH_N}"
local total=0 i
for i in $(seq 1 "$n"); do
local t
t=$(curl -s -o /dev/null -w '%{time_total}' -H "Authorization: Bearer $TOKEN" "$url")
total=$(python3 -c "print($total + $t)")
done
local avg_ms
avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))")
row "$label" "${avg_ms}ms (n=$n)"
}
# Build optional library_id JSON fragment
lib_json() {
if [ -n "$LIBRARY_ID" ]; then echo "\"library_id\":\"$LIBRARY_ID\","; else echo ""; fi
}
enqueue_rebuild() {
local full="${1:-false}"
auth -X POST -H "Content-Type: application/json" \
-d "{$(lib_json)\"full\":$full}" \
"$BASE_API/index/rebuild" \
| python3 -c "import sys,json; print(json.load(sys.stdin)['id'])"
}
enqueue_thumb_rebuild() {
auth -X POST -H "Content-Type: application/json" \
-d "{$(lib_json | sed 's/,$//')}" \
"$BASE_API/index/thumbnails/rebuild" \
| python3 -c "import sys,json; print(json.load(sys.stdin)['id'])"
}
enqueue_thumb_regen() {
auth -X POST -H "Content-Type: application/json" \
-d "{$(lib_json | sed 's/,$//')}" \
"$BASE_API/index/thumbnails/regenerate" \
| python3 -c "import sys,json; print(json.load(sys.stdin)['id'])"
}
# ─── health check ────────────────────────────────────────────────────────────
header "Health"
curl -fsS "$BASE_API/health" >/dev/null && ok "API healthy"
BOOKS_JSON="$(auth "$BASE_API/books")"
BOOK_COUNT="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('total',0))")"
FIRST_BOOK_ID="$(echo "$BOOKS_JSON" | python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')")"
ok "Books in index: $BOOK_COUNT"
if [ -n "$LIBRARY_ID" ]; then ok "Scoped to library: $LIBRARY_ID"; fi
# ─── 1. full rebuild ─────────────────────────────────────────────────────────
header "1 / Full Rebuild"
JOB_FULL="$(enqueue_rebuild true)"
ok "job $JOB_FULL"
wait_job "$JOB_FULL" "full rebuild"
report_job "$JOB_FULL" "full rebuild"
# ─── 2. incremental rebuild (dirs unchanged → mtime skip) ───────────────────
header "2 / Incremental Rebuild (should be fast — mtime cache)"
JOB_INCR="$(enqueue_rebuild false)"
ok "job $JOB_INCR"
wait_job "$JOB_INCR" "incremental rebuild"
report_job "$JOB_INCR" "incremental rebuild"
python3 - <<'PY'
import json, os
from datetime import datetime, timezone
def parse(s):
if not s: return None
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"):
try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError: pass
return None
full_id = os.environ.get("PERF_FULL_ID", "")
incr_id = os.environ.get("PERF_INCR_ID", "")
if not full_id or not incr_id:
exit(0)
PY
# Speedup ratio via env export
export PERF_FULL_ID="$JOB_FULL" PERF_INCR_ID="$JOB_INCR"
python3 - <<'PY'
import json, os, subprocess
from datetime import datetime, timezone
def parse(s):
if not s: return None
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%f+00:00", "%Y-%m-%dT%H:%M:%S+00:00"):
try: return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError: pass
return None
base = os.environ.get("BASE_API", "http://127.0.0.1:7080")
token = os.environ.get("TOKEN", "")
import urllib.request
def fetch(url):
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
with urllib.request.urlopen(req) as r:
return json.loads(r.read())
def duration(job_id):
d = fetch(f"{base}/index/jobs/{job_id}")
s = parse(d.get("started_at"))
f = parse(d.get("finished_at"))
if s and f: return (f - s).total_seconds()
return None
t_full = duration(os.environ["PERF_FULL_ID"])
t_incr = duration(os.environ["PERF_INCR_ID"])
if t_full and t_incr:
ratio = t_full / t_incr if t_incr > 0 else 0
print(f" {'Speedup (full vs incremental)':38s} {ratio:.1f}x ({t_full:.1f}s → {t_incr:.1f}s)")
PY
# ─── 3. thumbnail rebuild (generate missing) ─────────────────────────────────
header "3 / Thumbnail Rebuild (generate missing only)"
JOB_TREB="$(enqueue_thumb_rebuild)"
ok "job $JOB_TREB"
wait_job "$JOB_TREB" "thumbnail rebuild"
report_job "$JOB_TREB" "thumbnail rebuild"
# ─── 4. thumbnail regenerate (force all) ─────────────────────────────────────
header "4 / Thumbnail Regenerate (force all)"
JOB_TREG="$(enqueue_thumb_regen)"
ok "job $JOB_TREG"
wait_job "$JOB_TREG" "thumbnail regenerate"
report_job "$JOB_TREG" "thumbnail regenerate"
# ─── 5. API latency ──────────────────────────────────────────────────────────
header "5 / API Latency (n=$BENCH_N requests each)"
measure_latency "books list" "$BASE_API/books"
measure_latency "search (query)" "$BASE_API/search?q=marvel"
if [ -n "$FIRST_BOOK_ID" ]; then
# Cold page render: clear cache between runs by using different params
measure_latency "page render (width=1080, webp)" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=1080"
# Warm render: same URL repeated → should hit LRU cache
measure_latency "page render (warm/cached)" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=1080"
measure_latency "thumbnail fetch" \
"$BASE_API/books/$FIRST_BOOK_ID/thumbnail"
else
warn "No books found — skipping page/thumbnail latency tests"
fi
# ─── 6. Page render deep-dive ────────────────────────────────────────────────
#
# Tests what the refactoring touches: archive reading for each format.
# Uses width-cycling to bypass disk cache and measure real decode cost.
# Tests: per-format cold render, sequential pages, concurrent throughput.
header "6 / Page Render Deep-Dive"
if [ -z "$FIRST_BOOK_ID" ]; then
warn "No books found — skipping deep-dive"
else
# Resolve one book per format (API may not support ?format= filter; graceful fallback)
resolve_book_by_format() {
local fmt="$1"
local id
id=$(auth "$BASE_API/books?format=$fmt&limit=1" 2>/dev/null \
| python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); print(items[0]['id'] if items else '')" 2>/dev/null || echo "")
echo "$id"
}
BOOK_CBZ=$(resolve_book_by_format cbz)
BOOK_CBR=$(resolve_book_by_format cbr)
BOOK_PDF=$(resolve_book_by_format pdf)
# Cold render: cycle widths (480..487) across N requests so each misses disk cache
measure_latency_cold() {
local label="$1" book_id="$2" n="${3:-$BENCH_N}"
local total=0 i
for i in $(seq 1 "$n"); do
local w=$((480 + i)) # unique width → unique cache key
local t
t=$(curl -s -o /dev/null -w '%{time_total}' \
-H "Authorization: Bearer $TOKEN" \
"$BASE_API/books/$book_id/pages/1?format=webp&quality=80&width=$w")
total=$(python3 -c "print($total + $t)")
done
local avg_ms
avg_ms=$(python3 -c "print(round(($total / $n)*1000, 1))")
row "$label" "${avg_ms}ms (cold, n=$n)"
}
echo ""
echo " Cold render latency by format (cache-busted widths):"
[ -n "$BOOK_CBZ" ] && measure_latency_cold "CBZ page 1 (cold)" "$BOOK_CBZ" \
|| warn "No CBZ book found"
[ -n "$BOOK_CBR" ] && measure_latency_cold "CBR page 1 (cold)" "$BOOK_CBR" \
|| warn "No CBR book found"
[ -n "$BOOK_PDF" ] && measure_latency_cold "PDF page 1 (cold)" "$BOOK_PDF" \
|| warn "No PDF book found"
# Warm render: same URL repeated → LRU / disk cache
echo ""
echo " Warm render (disk cache, same URL):"
# One cold request first, then N warm
curl -s -o /dev/null -H "Authorization: Bearer $TOKEN" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=600" >/dev/null
measure_latency "page render (warm/disk-cached)" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/1?format=webp&quality=80&width=600"
# Sequential pages: measures archive open+close overhead across consecutive pages
echo ""
echo " Sequential pages (pages 110, same book, cold widths):"
SEQ_TOTAL=0
for PAGE in $(seq 1 10); do
local_t=$(curl -s -o /dev/null -w '%{time_total}' \
-H "Authorization: Bearer $TOKEN" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/$PAGE?format=webp&quality=80&width=$((500 + PAGE))")
local_ms=$(python3 -c "print(round($local_t*1000, 1))")
SEQ_TOTAL=$(python3 -c "print($SEQ_TOTAL + $local_t)")
row " page $PAGE" "${local_ms}ms"
done
SEQ_AVG=$(python3 -c "print(round($SEQ_TOTAL / 10 * 1000, 1))")
row " avg (10 pages)" "${SEQ_AVG}ms"
# Concurrent throughput: N requests in parallel → measures semaphore + CPU saturation
CONC_N="${CONC_N:-10}"
echo ""
echo " Concurrent rendering ($CONC_N simultaneous requests, cold widths):"
CONC_START=$(date +%s%3N)
PIDS=()
for i in $(seq 1 "$CONC_N"); do
curl -s -o /dev/null \
-H "Authorization: Bearer $TOKEN" \
"$BASE_API/books/$FIRST_BOOK_ID/pages/$i?format=webp&quality=80&width=$((550 + i))" &
PIDS+=($!)
done
for PID in "${PIDS[@]}"; do wait "$PID" 2>/dev/null || true; done
CONC_END=$(date +%s%3N)
CONC_MS=$((CONC_END - CONC_START))
CONC_PER=$(python3 -c "print(round($CONC_MS / $CONC_N, 1))")
row " wall time (${CONC_N} pages in parallel)" "${CONC_MS}ms (~${CONC_PER}ms/page)"
fi
# ─── summary ─────────────────────────────────────────────────────────────────
header "Summary"
ok "Full rebuild job: $JOB_FULL"
ok "Incremental rebuild job: $JOB_INCR"
ok "Thumbnail rebuild job: $JOB_TREB"
ok "Thumbnail regenerate job: $JOB_TREG"
echo -e "\n${BOLD}perf done${RESET}"