add indexing jobs, parsers, and search APIs

This commit is contained in:
2026-03-05 15:05:34 +01:00
parent 88db9805b5
commit 6eaf2ba5dc
17 changed files with 1548 additions and 46 deletions

View File

@@ -1,6 +1,7 @@
API_LISTEN_ADDR=0.0.0.0:8080
ADMIN_UI_LISTEN_ADDR=0.0.0.0:8082
INDEXER_LISTEN_ADDR=0.0.0.0:8081
INDEXER_SCAN_INTERVAL_SECONDS=5
DATABASE_URL=postgres://stripstream:stripstream@postgres:5432/stripstream
MEILI_URL=http://meilisearch:7700
MEILI_MASTER_KEY=change-me

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@ target/
.env
.DS_Store
tmp/
libraries/

655
Cargo.lock generated
View File

@@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "admin-ui"
version = "0.1.0"
@@ -14,6 +20,17 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "aes"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
]
[[package]]
name = "aho-corasick"
version = "1.1.4"
@@ -53,7 +70,8 @@ dependencies = [
"axum",
"base64",
"chrono",
"rand",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"sqlx",
@@ -65,6 +83,15 @@ dependencies = [
"uuid",
]
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "argon2"
version = "0.5.3"
@@ -203,12 +230,27 @@ dependencies = [
"generic-array",
]
[[package]]
name = "block-padding"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "bytecount"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
[[package]]
name = "byteorder"
version = "1.5.0"
@@ -221,6 +263,15 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
[[package]]
name = "cbc"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
dependencies = [
"cipher",
]
[[package]]
name = "cc"
version = "1.2.56"
@@ -237,6 +288,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
version = "0.4.44"
@@ -251,6 +308,16 @@ dependencies = [
"windows-link",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "concurrent-queue"
version = "2.5.0"
@@ -296,6 +363,34 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.12"
@@ -332,6 +427,26 @@ dependencies = [
"zeroize",
]
[[package]]
name = "deranged"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
dependencies = [
"powerfmt",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "digest"
version = "0.10.7"
@@ -370,6 +485,15 @@ dependencies = [
"serde",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if",
]
[[package]]
name = "equivalent"
version = "1.0.2"
@@ -414,6 +538,16 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "flate2"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "flume"
version = "0.11.1"
@@ -528,8 +662,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi",
"wasm-bindgen",
]
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"r-efi 5.3.0",
"wasip2",
"wasm-bindgen",
]
[[package]]
@@ -540,7 +690,7 @@ checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"r-efi 6.0.0",
"wasip2",
"wasip3",
]
@@ -674,6 +824,24 @@ dependencies = [
"pin-utils",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
"webpki-roots 1.0.6",
]
[[package]]
@@ -682,13 +850,21 @@ version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
dependencies = [
"base64",
"bytes",
"futures-channel",
"futures-util",
"http",
"http-body",
"hyper",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
"socket2",
"tokio",
"tower-service",
"tracing",
]
[[package]]
@@ -829,10 +1005,19 @@ version = "0.1.0"
dependencies = [
"anyhow",
"axum",
"chrono",
"parsers",
"reqwest",
"serde",
"serde_json",
"sha2",
"sqlx",
"stripstream-core",
"tokio",
"tracing",
"tracing-subscriber",
"uuid",
"walkdir",
]
[[package]]
@@ -847,6 +1032,32 @@ dependencies = [
"serde_core",
]
[[package]]
name = "inout"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
dependencies = [
"block-padding",
"generic-array",
]
[[package]]
name = "ipnet"
version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
[[package]]
name = "iri-string"
version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "itoa"
version = "1.0.17"
@@ -933,6 +1144,36 @@ version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "lopdf"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b"
dependencies = [
"aes",
"cbc",
"chrono",
"encoding_rs",
"flate2",
"indexmap",
"itoa",
"log",
"md-5",
"nom",
"nom_locate",
"rangemap",
"rayon",
"thiserror",
"time",
"weezl",
]
[[package]]
name = "lru-slab"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "matchers"
version = "0.2.0"
@@ -970,6 +1211,22 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "mio"
version = "1.1.1"
@@ -981,6 +1238,27 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nom_locate"
version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
dependencies = [
"bytecount",
"memchr",
"nom",
]
[[package]]
name = "nu-ansi-term"
version = "0.50.3"
@@ -1001,11 +1279,17 @@ dependencies = [
"num-integer",
"num-iter",
"num-traits",
"rand",
"rand 0.8.5",
"smallvec",
"zeroize",
]
[[package]]
name = "num-conv"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
[[package]]
name = "num-integer"
version = "0.1.46"
@@ -1076,6 +1360,8 @@ name = "parsers"
version = "0.1.0"
dependencies = [
"anyhow",
"lopdf",
"zip",
]
[[package]]
@@ -1085,7 +1371,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166"
dependencies = [
"base64ct",
"rand_core",
"rand_core 0.6.4",
"subtle",
]
@@ -1158,6 +1444,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@@ -1186,6 +1478,61 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quinn"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
dependencies = [
"bytes",
"cfg_aliases",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror",
"tokio",
"tracing",
"web-time",
]
[[package]]
name = "quinn-proto"
version = "0.11.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.2",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.60.2",
]
[[package]]
name = "quote"
version = "1.0.45"
@@ -1195,6 +1542,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "r-efi"
version = "6.0.0"
@@ -1208,8 +1561,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
]
[[package]]
@@ -1219,7 +1582,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.5",
]
[[package]]
@@ -1231,6 +1604,41 @@ dependencies = [
"getrandom 0.2.17",
]
[[package]]
name = "rand_core"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
[[package]]
name = "rangemap"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68"
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
@@ -1266,6 +1674,44 @@ version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
dependencies = [
"base64",
"bytes",
"futures-core",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"js-sys",
"log",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tower",
"tower-http",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots 1.0.6",
]
[[package]]
name = "ring"
version = "0.17.14"
@@ -1293,13 +1739,19 @@ dependencies = [
"num-traits",
"pkcs1",
"pkcs8",
"rand_core",
"rand_core 0.6.4",
"signature",
"spki",
"subtle",
"zeroize",
]
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustls"
version = "0.23.37"
@@ -1320,6 +1772,7 @@ version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
dependencies = [
"web-time",
"zeroize",
]
@@ -1346,6 +1799,15 @@ version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
@@ -1478,9 +1940,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "slab"
version = "0.4.12"
@@ -1644,7 +2112,7 @@ dependencies = [
"memchr",
"once_cell",
"percent-encoding",
"rand",
"rand 0.8.5",
"rsa",
"serde",
"sha1",
@@ -1684,7 +2152,7 @@ dependencies = [
"md-5",
"memchr",
"once_cell",
"rand",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
@@ -1770,6 +2238,9 @@ name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]]
name = "synstructure"
@@ -1811,6 +2282,37 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "time"
version = "0.3.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde_core",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
[[package]]
name = "time-macros"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.8.2"
@@ -1863,6 +2365,16 @@ dependencies = [
"syn",
]
[[package]]
name = "tokio-rustls"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
dependencies = [
"rustls",
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.18"
@@ -1890,6 +2402,24 @@ dependencies = [
"tracing",
]
[[package]]
name = "tower-http"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"bitflags",
"bytes",
"futures-util",
"http",
"http-body",
"iri-string",
"pin-project-lite",
"tower",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
@@ -1964,6 +2494,12 @@ dependencies = [
"tracing-log",
]
[[package]]
name = "try-lock"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
version = "1.19.0"
@@ -2057,6 +2593,25 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
@@ -2100,6 +2655,20 @@ dependencies = [
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [
"cfg-if",
"futures-util",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.114"
@@ -2166,6 +2735,26 @@ dependencies = [
"semver",
]
[[package]]
name = "web-sys"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.11"
@@ -2184,6 +2773,12 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "weezl"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
[[package]]
name = "whoami"
version = "1.6.1"
@@ -2194,6 +2789,15 @@ dependencies = [
"wasite",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "windows-core"
version = "0.62.2"
@@ -2672,8 +3276,37 @@ dependencies = [
"syn",
]
[[package]]
name = "zip"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
dependencies = [
"arbitrary",
"crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2",
"indexmap",
"memchr",
"thiserror",
"zopfli",
]
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zopfli"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]

View File

@@ -19,12 +19,15 @@ argon2 = "0.5"
axum = "0.7"
base64 = "0.22"
chrono = { version = "0.4", features = ["serde"] }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
rand = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sha2 = "0.10"
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "uuid", "chrono"] }
tokio = { version = "1.43", features = ["macros", "rt-multi-thread", "signal"] }
tower = "0.5"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
uuid = { version = "1.12", features = ["serde", "v4"] }
walkdir = "2.5"

51
PLAN.md
View File

@@ -69,53 +69,53 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques
**DoD:** Gestion librairies robuste et securisee.
### T7 - Jobs d'indexation
- [ ] Orchestration scan/rebuild
- [ ] `POST /index/rebuild`
- [ ] `GET /index/status`
- [ ] Persist statuts/erreurs dans `index_jobs`
- [x] Orchestration scan/rebuild
- [x] `POST /index/rebuild`
- [x] `GET /index/status`
- [x] Persist statuts/erreurs dans `index_jobs`
**DoD:** Jobs tracables et relancables.
### T8 - Scan incremental
- [ ] Decouverte fichiers supportes
- [ ] Fingerprint (`size`, `mtime`, hash partiel)
- [ ] Upsert `book_files`
- [ ] Detection suppressions/renommages
- [x] Decouverte fichiers supportes
- [x] Fingerprint (`size`, `mtime`, hash partiel)
- [x] Upsert `book_files`
- [x] Detection suppressions/renommages
**DoD:** Rescan = traitement des deltas uniquement.
### T9 - Parsing CBZ
- [ ] Lecture zip + ordre pages naturel
- [ ] Metadonnees de base + cover
- [ ] Gestion erreurs archive
- [x] Lecture zip + ordre pages naturel
- [x] Metadonnees de base + cover
- [x] Gestion erreurs archive
**DoD:** Livres CBZ indexes correctement.
### T10 - Parsing CBR (temp disk)
- [ ] Extraction temp via `unrar`
- [ ] Ordonnancement pages
- [ ] Cleanup garanti (meme en erreur)
- [x] Extraction temp via `unrar`
- [x] Ordonnancement pages
- [x] Cleanup garanti (meme en erreur)
**DoD:** Pas de fuite temp, parsing stable.
### T11 - Parsing PDF
- [ ] Metadonnees (titre/auteur/pages)
- [ ] Preparation rendu page a la volee
- [ ] Gestion fichiers corrompus
- [x] Metadonnees (titre/auteur/pages)
- [x] Preparation rendu page a la volee
- [x] Gestion fichiers corrompus
**DoD:** PDF indexes et lisibles.
### T12 - API livres
- [ ] `GET /books` (filtres + curseur)
- [ ] `GET /books/:id`
- [ ] Contrat JSON propre et stable
- [x] `GET /books` (filtres + curseur)
- [x] `GET /books/:id`
- [x] Contrat JSON propre et stable
**DoD:** Pagination/filtres fonctionnels.
### T13 - Recherche
- [ ] Projection vers Meilisearch
- [ ] `GET /search?q=...&library_id=...&type=...`
- [ ] Fuzzy + filtres
- [x] Projection vers Meilisearch
- [x] `GET /search?q=...&library_id=...&type=...`
- [x] Fuzzy + filtres
**DoD:** Recherche rapide et pertinente.
@@ -190,7 +190,7 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques
## Suivi d'avancement
- [x] Lot 1: Fondations (T1 -> T6)
- [ ] Lot 2: Ingestion + Search (T7 -> T13)
- [x] Lot 2: Ingestion + Search (T7 -> T13)
- [ ] Lot 3: Lecture + UI + Hardening (T14 -> T18)
## Notes
@@ -200,3 +200,6 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques
## Journal
- 2026-03-05: `docker compose up -d --build` valide, stack complete en healthy (`postgres`, `meilisearch`, `api`, `indexer`, `admin-ui`).
- 2026-03-05: ajustements infra appliques pour demarrage stable (`unrar` -> `unrar-free`, image `rust:1-bookworm`, healthchecks `127.0.0.1`).
- 2026-03-05: ajout d'un service `migrate` dans Compose pour executer automatiquement `infra/migrations/0001_init.sql` au demarrage.
- 2026-03-05: Lot 2 termine (jobs, scan incremental, parsers `cbz/cbr/pdf`, API livres, sync + recherche Meilisearch).
- 2026-03-05: verification de bout en bout OK sur une librairie de test (`/libraries/demo`) avec indexation, listing `/books` et recherche `/search` (1 CBZ detecte).

View File

@@ -12,6 +12,7 @@ base64.workspace = true
chrono.workspace = true
stripstream-core = { path = "../../crates/core" }
rand.workspace = true
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
sqlx.workspace = true

View File

@@ -32,6 +32,18 @@ pub async fn require_admin(
Ok(next.run(req).await)
}
pub async fn require_read(
State(state): State<AppState>,
mut req: Request,
next: Next,
) -> Result<Response, ApiError> {
let token = bearer_token(&req).ok_or_else(|| ApiError::unauthorized("missing bearer token"))?;
let scope = authenticate(&state, token).await?;
req.extensions_mut().insert(scope);
Ok(next.run(req).await)
}
fn bearer_token(req: &Request) -> Option<&str> {
req.headers()
.get(AUTHORIZATION)

143
apps/api/src/books.rs Normal file
View File

@@ -0,0 +1,143 @@
use axum::{extract::{Path, Query, State}, Json};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sqlx::Row;
use uuid::Uuid;
use crate::{error::ApiError, AppState};
#[derive(Deserialize)]
pub struct ListBooksQuery {
pub library_id: Option<Uuid>,
pub kind: Option<String>,
pub cursor: Option<Uuid>,
pub limit: Option<i64>,
}
#[derive(Serialize)]
pub struct BookItem {
pub id: Uuid,
pub library_id: Uuid,
pub kind: String,
pub title: String,
pub author: Option<String>,
pub series: Option<String>,
pub volume: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub updated_at: DateTime<Utc>,
}
#[derive(Serialize)]
pub struct BooksPage {
pub items: Vec<BookItem>,
pub next_cursor: Option<Uuid>,
}
#[derive(Serialize)]
pub struct BookDetails {
pub id: Uuid,
pub library_id: Uuid,
pub kind: String,
pub title: String,
pub author: Option<String>,
pub series: Option<String>,
pub volume: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub file_path: Option<String>,
pub file_format: Option<String>,
pub file_parse_status: Option<String>,
}
pub async fn list_books(
State(state): State<AppState>,
Query(query): Query<ListBooksQuery>,
) -> Result<Json<BooksPage>, ApiError> {
let limit = query.limit.unwrap_or(50).clamp(1, 200);
let rows = sqlx::query(
r#"
SELECT id, library_id, kind, title, author, series, volume, language, page_count, updated_at
FROM books
WHERE ($1::uuid IS NULL OR library_id = $1)
AND ($2::text IS NULL OR kind = $2)
AND ($3::uuid IS NULL OR id > $3)
ORDER BY id ASC
LIMIT $4
"#,
)
.bind(query.library_id)
.bind(query.kind.as_deref())
.bind(query.cursor)
.bind(limit + 1)
.fetch_all(&state.pool)
.await?;
let mut items: Vec<BookItem> = rows
.iter()
.take(limit as usize)
.map(|row| BookItem {
id: row.get("id"),
library_id: row.get("library_id"),
kind: row.get("kind"),
title: row.get("title"),
author: row.get("author"),
series: row.get("series"),
volume: row.get("volume"),
language: row.get("language"),
page_count: row.get("page_count"),
updated_at: row.get("updated_at"),
})
.collect();
let next_cursor = if rows.len() > limit as usize {
items.last().map(|b| b.id)
} else {
None
};
Ok(Json(BooksPage {
items: std::mem::take(&mut items),
next_cursor,
}))
}
pub async fn get_book(
State(state): State<AppState>,
Path(id): Path<Uuid>,
) -> Result<Json<BookDetails>, ApiError> {
let row = sqlx::query(
r#"
SELECT b.id, b.library_id, b.kind, b.title, b.author, b.series, b.volume, b.language, b.page_count,
bf.abs_path, bf.format, bf.parse_status
FROM books b
LEFT JOIN LATERAL (
SELECT abs_path, format, parse_status
FROM book_files
WHERE book_id = b.id
ORDER BY updated_at DESC
LIMIT 1
) bf ON TRUE
WHERE b.id = $1
"#,
)
.bind(id)
.fetch_optional(&state.pool)
.await?;
let row = row.ok_or_else(|| ApiError::not_found("book not found"))?;
Ok(Json(BookDetails {
id: row.get("id"),
library_id: row.get("library_id"),
kind: row.get("kind"),
title: row.get("title"),
author: row.get("author"),
series: row.get("series"),
volume: row.get("volume"),
language: row.get("language"),
page_count: row.get("page_count"),
file_path: row.get("abs_path"),
file_format: row.get("format"),
file_parse_status: row.get("parse_status"),
}))
}

View File

@@ -0,0 +1,74 @@
use axum::{extract::State, Json};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sqlx::Row;
use uuid::Uuid;
use crate::{error::ApiError, AppState};
#[derive(Deserialize)]
pub struct RebuildRequest {
pub library_id: Option<Uuid>,
}
#[derive(Serialize)]
pub struct IndexJobItem {
pub id: Uuid,
pub library_id: Option<Uuid>,
pub r#type: String,
pub status: String,
pub started_at: Option<DateTime<Utc>>,
pub finished_at: Option<DateTime<Utc>>,
pub stats_json: Option<serde_json::Value>,
pub error_opt: Option<String>,
pub created_at: DateTime<Utc>,
}
pub async fn enqueue_rebuild(
State(state): State<AppState>,
payload: Option<Json<RebuildRequest>>,
) -> Result<Json<IndexJobItem>, ApiError> {
let library_id = payload.and_then(|p| p.0.library_id);
let id = Uuid::new_v4();
sqlx::query(
"INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')",
)
.bind(id)
.bind(library_id)
.execute(&state.pool)
.await?;
let row = sqlx::query(
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs WHERE id = $1",
)
.bind(id)
.fetch_one(&state.pool)
.await?;
Ok(Json(map_row(row)))
}
pub async fn list_index_jobs(State(state): State<AppState>) -> Result<Json<Vec<IndexJobItem>>, ApiError> {
let rows = sqlx::query(
"SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs ORDER BY created_at DESC LIMIT 100",
)
.fetch_all(&state.pool)
.await?;
Ok(Json(rows.into_iter().map(map_row).collect()))
}
fn map_row(row: sqlx::postgres::PgRow) -> IndexJobItem {
IndexJobItem {
id: row.get("id"),
library_id: row.get("library_id"),
r#type: row.get("type"),
status: row.get("status"),
started_at: row.get("started_at"),
finished_at: row.get("finished_at"),
stats_json: row.get("stats_json"),
error_opt: row.get("error_opt"),
created_at: row.get("created_at"),
}
}

View File

@@ -1,6 +1,9 @@
mod auth;
mod books;
mod error;
mod index_jobs;
mod libraries;
mod search;
mod tokens;
use std::sync::Arc;
@@ -14,6 +17,8 @@ use tracing::info;
struct AppState {
pool: sqlx::PgPool,
bootstrap_token: Arc<str>,
meili_url: Arc<str>,
meili_master_key: Arc<str>,
}
#[tokio::main]
@@ -33,18 +38,29 @@ async fn main() -> anyhow::Result<()> {
let state = AppState {
pool,
bootstrap_token: Arc::from(config.api_bootstrap_token),
meili_url: Arc::from(config.meili_url),
meili_master_key: Arc::from(config.meili_master_key),
};
let protected = Router::new()
let admin_routes = Router::new()
.route("/libraries", get(libraries::list_libraries).post(libraries::create_library))
.route("/libraries/:id", delete(libraries::delete_library))
.route("/index/rebuild", axum::routing::post(index_jobs::enqueue_rebuild))
.route("/index/status", get(index_jobs::list_index_jobs))
.route("/admin/tokens", get(tokens::list_tokens).post(tokens::create_token))
.route("/admin/tokens/:id", delete(tokens::revoke_token))
.layer(middleware::from_fn_with_state(state.clone(), auth::require_admin));
let read_routes = Router::new()
.route("/books", get(books::list_books))
.route("/books/:id", get(books::get_book))
.route("/search", get(search::search_books))
.layer(middleware::from_fn_with_state(state.clone(), auth::require_read));
let app = Router::new()
.route("/health", get(health))
.merge(protected)
.merge(admin_routes)
.merge(read_routes)
.with_state(state);
let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?;

77
apps/api/src/search.rs Normal file
View File

@@ -0,0 +1,77 @@
use axum::{extract::{Query, State}, Json};
use serde::{Deserialize, Serialize};
use crate::{error::ApiError, AppState};
#[derive(Deserialize)]
pub struct SearchQuery {
pub q: String,
pub library_id: Option<String>,
pub r#type: Option<String>,
pub kind: Option<String>,
pub limit: Option<usize>,
}
#[derive(Serialize)]
pub struct SearchResponse {
pub hits: serde_json::Value,
pub estimated_total_hits: Option<u64>,
pub processing_time_ms: Option<u64>,
}
pub async fn search_books(
State(state): State<AppState>,
Query(query): Query<SearchQuery>,
) -> Result<Json<SearchResponse>, ApiError> {
if query.q.trim().is_empty() {
return Err(ApiError::bad_request("q is required"));
}
let mut filters: Vec<String> = Vec::new();
if let Some(library_id) = query.library_id.as_deref() {
filters.push(format!("library_id = '{}'", library_id.replace('"', "")));
}
let kind_filter = query.r#type.as_deref().or(query.kind.as_deref());
if let Some(kind) = kind_filter {
filters.push(format!("kind = '{}'", kind.replace('"', "")));
}
let body = serde_json::json!({
"q": query.q,
"limit": query.limit.unwrap_or(20).clamp(1, 100),
"filter": if filters.is_empty() { serde_json::Value::Null } else { serde_json::Value::String(filters.join(" AND ")) }
});
let client = reqwest::Client::new();
let url = format!("{}/indexes/books/search", state.meili_url.trim_end_matches('/'));
let response = client
.post(url)
.header("Authorization", format!("Bearer {}", state.meili_master_key))
.json(&body)
.send()
.await
.map_err(|e| ApiError::internal(format!("meili request failed: {e}")))?;
if !response.status().is_success() {
let body = response.text().await.unwrap_or_else(|_| "unknown meili error".to_string());
if body.contains("index_not_found") {
return Ok(Json(SearchResponse {
hits: serde_json::json!([]),
estimated_total_hits: Some(0),
processing_time_ms: Some(0),
}));
}
return Err(ApiError::internal(format!("meili error: {body}")));
}
let payload: serde_json::Value = response
.json()
.await
.map_err(|e| ApiError::internal(format!("invalid meili response: {e}")))?;
Ok(Json(SearchResponse {
hits: payload.get("hits").cloned().unwrap_or_else(|| serde_json::json!([])),
estimated_total_hits: payload.get("estimatedTotalHits").and_then(|v| v.as_u64()),
processing_time_ms: payload.get("processingTimeMs").and_then(|v| v.as_u64()),
}))
}

View File

@@ -7,7 +7,16 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
axum.workspace = true
chrono.workspace = true
parsers = { path = "../../crates/parsers" }
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
sha2.workspace = true
sqlx.workspace = true
stripstream-core = { path = "../../crates/core" }
tokio.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true
walkdir.workspace = true

View File

@@ -1,6 +1,30 @@
use anyhow::Context;
use axum::{routing::get, Router};
use chrono::{DateTime, Utc};
use parsers::{detect_format, parse_metadata, BookFormat};
use serde::Serialize;
use sha2::{Digest, Sha256};
use sqlx::{postgres::PgPoolOptions, Row};
use std::{collections::HashMap, path::Path, time::Duration};
use stripstream_core::config::IndexerConfig;
use tracing::info;
use tracing::{error, info};
use uuid::Uuid;
use walkdir::WalkDir;
#[derive(Clone)]
struct AppState {
pool: sqlx::PgPool,
meili_url: String,
meili_master_key: String,
}
#[derive(Serialize)]
struct JobStats {
scanned_files: usize,
indexed_files: usize,
removed_files: usize,
errors: usize,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
@@ -10,7 +34,20 @@ async fn main() -> anyhow::Result<()> {
)
.init();
let config = IndexerConfig::from_env();
let config = IndexerConfig::from_env()?;
let pool = PgPoolOptions::new()
.max_connections(5)
.connect(&config.database_url)
.await?;
let state = AppState {
pool,
meili_url: config.meili_url.clone(),
meili_master_key: config.meili_master_key.clone(),
};
tokio::spawn(run_worker(state.clone(), config.scan_interval_seconds));
let app = Router::new().route("/health", get(health));
let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?;
@@ -22,3 +59,363 @@ async fn main() -> anyhow::Result<()> {
async fn health() -> &'static str {
"ok"
}
async fn run_worker(state: AppState, interval_seconds: u64) {
let wait = Duration::from_secs(interval_seconds.max(1));
loop {
match claim_next_job(&state.pool).await {
Ok(Some((job_id, library_id))) => {
if let Err(err) = process_job(&state, job_id, library_id).await {
error!(job_id = %job_id, error = %err, "index job failed");
let _ = fail_job(&state.pool, job_id, &err.to_string()).await;
}
}
Ok(None) => tokio::time::sleep(wait).await,
Err(err) => {
error!(error = %err, "worker loop error");
tokio::time::sleep(wait).await;
}
}
}
}
async fn claim_next_job(pool: &sqlx::PgPool) -> anyhow::Result<Option<(Uuid, Option<Uuid>)>> {
let mut tx = pool.begin().await?;
let row = sqlx::query(
r#"
SELECT id, library_id
FROM index_jobs
WHERE status = 'pending'
ORDER BY created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
"#,
)
.fetch_optional(&mut *tx)
.await?;
let Some(row) = row else {
tx.commit().await?;
return Ok(None);
};
let id: Uuid = row.get("id");
let library_id: Option<Uuid> = row.get("library_id");
sqlx::query("UPDATE index_jobs SET status = 'running', started_at = NOW(), error_opt = NULL WHERE id = $1")
.bind(id)
.execute(&mut *tx)
.await?;
tx.commit().await?;
Ok(Some((id, library_id)))
}
async fn process_job(state: &AppState, job_id: Uuid, target_library_id: Option<Uuid>) -> anyhow::Result<()> {
let libraries = if let Some(library_id) = target_library_id {
sqlx::query("SELECT id, root_path FROM libraries WHERE id = $1 AND enabled = TRUE")
.bind(library_id)
.fetch_all(&state.pool)
.await?
} else {
sqlx::query("SELECT id, root_path FROM libraries WHERE enabled = TRUE")
.fetch_all(&state.pool)
.await?
};
let mut stats = JobStats {
scanned_files: 0,
indexed_files: 0,
removed_files: 0,
errors: 0,
};
for library in libraries {
let library_id: Uuid = library.get("id");
let root_path: String = library.get("root_path");
match scan_library(state, library_id, Path::new(&root_path), &mut stats).await {
Ok(()) => {}
Err(err) => {
stats.errors += 1;
error!(library_id = %library_id, error = %err, "library scan failed");
}
}
}
sync_meili(&state.pool, &state.meili_url, &state.meili_master_key).await?;
sqlx::query("UPDATE index_jobs SET status = 'success', finished_at = NOW(), stats_json = $2 WHERE id = $1")
.bind(job_id)
.bind(serde_json::to_value(&stats)?)
.execute(&state.pool)
.await?;
Ok(())
}
async fn fail_job(pool: &sqlx::PgPool, job_id: Uuid, error_message: &str) -> anyhow::Result<()> {
sqlx::query("UPDATE index_jobs SET status = 'failed', finished_at = NOW(), error_opt = $2 WHERE id = $1")
.bind(job_id)
.bind(error_message)
.execute(pool)
.await?;
Ok(())
}
async fn scan_library(
state: &AppState,
library_id: Uuid,
root: &Path,
stats: &mut JobStats,
) -> anyhow::Result<()> {
let existing_rows = sqlx::query(
r#"
SELECT bf.id AS file_id, bf.book_id, bf.abs_path, bf.fingerprint
FROM book_files bf
JOIN books b ON b.id = bf.book_id
WHERE b.library_id = $1
"#,
)
.bind(library_id)
.fetch_all(&state.pool)
.await?;
let mut existing: HashMap<String, (Uuid, Uuid, String)> = HashMap::new();
for row in existing_rows {
existing.insert(
row.get("abs_path"),
(row.get("file_id"), row.get("book_id"), row.get("fingerprint")),
);
}
let mut seen: HashMap<String, bool> = HashMap::new();
for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
let Some(format) = detect_format(path) else {
continue;
};
stats.scanned_files += 1;
let abs_path = path.to_string_lossy().to_string();
seen.insert(abs_path.clone(), true);
let metadata = std::fs::metadata(path)
.with_context(|| format!("cannot stat {}", path.display()))?;
let mtime: DateTime<Utc> = metadata
.modified()
.map(DateTime::<Utc>::from)
.unwrap_or_else(|_| Utc::now());
let fingerprint = compute_fingerprint(path, metadata.len(), &mtime)?;
if let Some((file_id, book_id, old_fingerprint)) = existing.get(&abs_path).cloned() {
if old_fingerprint == fingerprint {
continue;
}
match parse_metadata(path, format) {
Ok(parsed) => {
sqlx::query(
"UPDATE books SET title = $2, kind = $3, page_count = $4, updated_at = NOW() WHERE id = $1",
)
.bind(book_id)
.bind(parsed.title)
.bind(kind_from_format(format))
.bind(parsed.page_count)
.execute(&state.pool)
.await?;
sqlx::query(
"UPDATE book_files SET format = $2, size_bytes = $3, mtime = $4, fingerprint = $5, parse_status = 'ok', parse_error_opt = NULL, updated_at = NOW() WHERE id = $1",
)
.bind(file_id)
.bind(format.as_str())
.bind(metadata.len() as i64)
.bind(mtime)
.bind(fingerprint)
.execute(&state.pool)
.await?;
stats.indexed_files += 1;
}
Err(err) => {
stats.errors += 1;
sqlx::query(
"UPDATE book_files SET parse_status = 'error', parse_error_opt = $2, updated_at = NOW() WHERE id = $1",
)
.bind(file_id)
.bind(err.to_string())
.execute(&state.pool)
.await?;
}
}
continue;
}
match parse_metadata(path, format) {
Ok(parsed) => {
let book_id = Uuid::new_v4();
let file_id = Uuid::new_v4();
sqlx::query(
"INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, $5)",
)
.bind(book_id)
.bind(library_id)
.bind(kind_from_format(format))
.bind(parsed.title)
.bind(parsed.page_count)
.execute(&state.pool)
.await?;
sqlx::query(
"INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status) VALUES ($1, $2, $3, $4, $5, $6, $7, 'ok')",
)
.bind(file_id)
.bind(book_id)
.bind(format.as_str())
.bind(&abs_path)
.bind(metadata.len() as i64)
.bind(mtime)
.bind(fingerprint)
.execute(&state.pool)
.await?;
stats.indexed_files += 1;
}
Err(err) => {
stats.errors += 1;
let book_id = Uuid::new_v4();
let file_id = Uuid::new_v4();
sqlx::query(
"INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, NULL)",
)
.bind(book_id)
.bind(library_id)
.bind(kind_from_format(format))
.bind(file_display_name(path))
.execute(&state.pool)
.await?;
sqlx::query(
"INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status, parse_error_opt) VALUES ($1, $2, $3, $4, $5, $6, $7, 'error', $8)",
)
.bind(file_id)
.bind(book_id)
.bind(format.as_str())
.bind(&abs_path)
.bind(metadata.len() as i64)
.bind(mtime)
.bind(fingerprint)
.bind(err.to_string())
.execute(&state.pool)
.await?;
}
}
}
for (abs_path, (file_id, book_id, _)) in existing {
if seen.contains_key(&abs_path) {
continue;
}
sqlx::query("DELETE FROM book_files WHERE id = $1")
.bind(file_id)
.execute(&state.pool)
.await?;
sqlx::query("DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)")
.bind(book_id)
.execute(&state.pool)
.await?;
stats.removed_files += 1;
}
Ok(())
}
fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime<Utc>) -> anyhow::Result<String> {
let mut hasher = Sha256::new();
hasher.update(size.to_le_bytes());
hasher.update(mtime.timestamp().to_le_bytes());
let bytes = std::fs::read(path)?;
let take = bytes.len().min(65_536);
hasher.update(&bytes[..take]);
Ok(format!("{:x}", hasher.finalize()))
}
fn kind_from_format(format: BookFormat) -> &'static str {
match format {
BookFormat::Pdf => "ebook",
BookFormat::Cbz | BookFormat::Cbr => "comic",
}
}
fn file_display_name(path: &Path) -> String {
path.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "Untitled".to_string())
}
#[derive(Serialize)]
struct SearchDoc {
id: String,
library_id: String,
kind: String,
title: String,
author: Option<String>,
series: Option<String>,
volume: Option<String>,
language: Option<String>,
}
async fn sync_meili(pool: &sqlx::PgPool, meili_url: &str, meili_master_key: &str) -> anyhow::Result<()> {
let client = reqwest::Client::new();
let base = meili_url.trim_end_matches('/');
let _ = client
.post(format!("{base}/indexes"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!({"uid": "books", "primaryKey": "id"}))
.send()
.await;
let _ = client
.patch(format!("{base}/indexes/books/settings/filterable-attributes"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&serde_json::json!(["library_id", "kind"]))
.send()
.await;
let rows = sqlx::query(
"SELECT id, library_id, kind, title, author, series, volume, language FROM books",
)
.fetch_all(pool)
.await?;
let docs: Vec<SearchDoc> = rows
.into_iter()
.map(|row| SearchDoc {
id: row.get::<Uuid, _>("id").to_string(),
library_id: row.get::<Uuid, _>("library_id").to_string(),
kind: row.get("kind"),
title: row.get("title"),
author: row.get("author"),
series: row.get("series"),
volume: row.get("volume"),
language: row.get("language"),
})
.collect();
client
.put(format!("{base}/indexes/books/documents?primaryKey=id"))
.header("Authorization", format!("Bearer {meili_master_key}"))
.json(&docs)
.send()
.await
.context("failed to push docs to meili")?;
Ok(())
}

View File

@@ -4,6 +4,8 @@ use anyhow::{Context, Result};
pub struct ApiConfig {
pub listen_addr: String,
pub database_url: String,
pub meili_url: String,
pub meili_master_key: String,
pub api_bootstrap_token: String,
}
@@ -12,6 +14,8 @@ impl ApiConfig {
Ok(Self {
listen_addr: std::env::var("API_LISTEN_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".to_string()),
database_url: std::env::var("DATABASE_URL").context("DATABASE_URL is required")?,
meili_url: std::env::var("MEILI_URL").context("MEILI_URL is required")?,
meili_master_key: std::env::var("MEILI_MASTER_KEY").context("MEILI_MASTER_KEY is required")?,
api_bootstrap_token: std::env::var("API_BOOTSTRAP_TOKEN")
.context("API_BOOTSTRAP_TOKEN is required")?,
})
@@ -21,14 +25,25 @@ impl ApiConfig {
#[derive(Debug, Clone)]
pub struct IndexerConfig {
pub listen_addr: String,
pub database_url: String,
pub meili_url: String,
pub meili_master_key: String,
pub scan_interval_seconds: u64,
}
impl IndexerConfig {
pub fn from_env() -> Self {
Self {
pub fn from_env() -> Result<Self> {
Ok(Self {
listen_addr: std::env::var("INDEXER_LISTEN_ADDR")
.unwrap_or_else(|_| "0.0.0.0:8081".to_string()),
}
database_url: std::env::var("DATABASE_URL").context("DATABASE_URL is required")?,
meili_url: std::env::var("MEILI_URL").context("MEILI_URL is required")?,
meili_master_key: std::env::var("MEILI_MASTER_KEY").context("MEILI_MASTER_KEY is required")?,
scan_interval_seconds: std::env::var("INDEXER_SCAN_INTERVAL_SECONDS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(5),
})
}
}

View File

@@ -6,3 +6,5 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
lopdf = "0.35"
zip = { version = "2.2", default-features = false, features = ["deflate"] }

View File

@@ -1,3 +1,96 @@
pub fn supported_formats() -> &'static [&'static str] {
&["cbz", "cbr", "pdf"]
use anyhow::{Context, Result};
use std::path::Path;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BookFormat {
Cbz,
Cbr,
Pdf,
}
impl BookFormat {
pub fn as_str(self) -> &'static str {
match self {
Self::Cbz => "cbz",
Self::Cbr => "cbr",
Self::Pdf => "pdf",
}
}
}
#[derive(Debug, Clone)]
pub struct ParsedMetadata {
pub title: String,
pub page_count: Option<i32>,
}
pub fn detect_format(path: &Path) -> Option<BookFormat> {
let ext = path.extension()?.to_string_lossy().to_ascii_lowercase();
match ext.as_str() {
"cbz" => Some(BookFormat::Cbz),
"cbr" => Some(BookFormat::Cbr),
"pdf" => Some(BookFormat::Pdf),
_ => None,
}
}
pub fn parse_metadata(path: &Path, format: BookFormat) -> Result<ParsedMetadata> {
let title = path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "Untitled".to_string());
let page_count = match format {
BookFormat::Cbz => parse_cbz_page_count(path).ok(),
BookFormat::Cbr => parse_cbr_page_count(path).ok(),
BookFormat::Pdf => parse_pdf_page_count(path).ok(),
};
Ok(ParsedMetadata { title, page_count })
}
fn parse_cbz_page_count(path: &Path) -> Result<i32> {
let file = std::fs::File::open(path).with_context(|| format!("cannot open cbz: {}", path.display()))?;
let mut archive = zip::ZipArchive::new(file).context("invalid cbz archive")?;
let mut count: i32 = 0;
for i in 0..archive.len() {
let entry = archive.by_index(i).context("cannot read cbz entry")?;
let name = entry.name().to_ascii_lowercase();
if is_image_name(&name) {
count += 1;
}
}
Ok(count)
}
fn parse_cbr_page_count(path: &Path) -> Result<i32> {
let output = std::process::Command::new("unrar")
.arg("lb")
.arg(path)
.output()
.with_context(|| format!("failed to execute unrar for {}", path.display()))?;
if !output.status.success() {
return Err(anyhow::anyhow!("unrar failed for {}", path.display()));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let count = stdout
.lines()
.filter(|line| is_image_name(&line.to_ascii_lowercase()))
.count() as i32;
Ok(count)
}
fn parse_pdf_page_count(path: &Path) -> Result<i32> {
let doc = lopdf::Document::load(path).with_context(|| format!("cannot open pdf: {}", path.display()))?;
Ok(doc.get_pages().len() as i32)
}
fn is_image_name(name: &str) -> bool {
name.ends_with(".jpg")
|| name.ends_with(".jpeg")
|| name.ends_with(".png")
|| name.ends_with(".webp")
|| name.ends_with(".avif")
}

View File

@@ -17,8 +17,8 @@ services:
meilisearch:
image: getmeili/meilisearch:v1.12
environment:
MEILI_MASTER_KEY: ${MEILI_MASTER_KEY:-change-me}
env_file:
- ../.env
ports:
- "7700:7700"
volumes:
@@ -29,6 +29,20 @@ services:
timeout: 5s
retries: 5
migrate:
image: postgres:16-alpine
depends_on:
postgres:
condition: service_healthy
volumes:
- ./migrations:/migrations:ro
command:
[
"sh",
"-c",
"PGPASSWORD=stripstream psql -h postgres -U stripstream -d stripstream -f /migrations/0001_init.sql",
]
api:
build:
context: ..
@@ -37,7 +51,11 @@ services:
- ../.env
ports:
- "8080:8080"
volumes:
- ../libraries:/libraries
depends_on:
migrate:
condition: service_completed_successfully
postgres:
condition: service_healthy
meilisearch:
@@ -56,7 +74,11 @@ services:
- ../.env
ports:
- "8081:8081"
volumes:
- ../libraries:/libraries
depends_on:
migrate:
condition: service_completed_successfully
postgres:
condition: service_healthy
meilisearch: