diff --git a/.env.example b/.env.example index 7c10d2c..7b98547 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,7 @@ API_LISTEN_ADDR=0.0.0.0:8080 ADMIN_UI_LISTEN_ADDR=0.0.0.0:8082 INDEXER_LISTEN_ADDR=0.0.0.0:8081 +INDEXER_SCAN_INTERVAL_SECONDS=5 DATABASE_URL=postgres://stripstream:stripstream@postgres:5432/stripstream MEILI_URL=http://meilisearch:7700 MEILI_MASTER_KEY=change-me diff --git a/.gitignore b/.gitignore index d4940de..f12e09c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ target/ .env .DS_Store tmp/ +libraries/ diff --git a/Cargo.lock b/Cargo.lock index d23b0ff..a9c436a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "admin-ui" version = "0.1.0" @@ -14,6 +20,17 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -53,7 +70,8 @@ dependencies = [ "axum", "base64", "chrono", - "rand", + "rand 0.8.5", + "reqwest", "serde", "serde_json", "sqlx", @@ -65,6 +83,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "argon2" version = "0.5.3" @@ -203,12 +230,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "byteorder" version = "1.5.0" @@ -221,6 +263,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.2.56" @@ -237,6 +288,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.44" @@ -251,6 +308,16 @@ dependencies = [ "windows-link", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -296,6 +363,34 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.12" @@ -332,6 +427,26 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -370,6 +485,15 @@ dependencies = [ "serde", ] +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -414,6 +538,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "flume" version = "0.11.1" @@ -528,8 +662,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", ] [[package]] @@ -540,7 +690,7 @@ checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] @@ -674,6 +824,24 @@ dependencies = [ "pin-utils", "smallvec", "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots 1.0.6", ] [[package]] @@ -682,13 +850,21 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ + "base64", "bytes", + "futures-channel", + "futures-util", "http", "http-body", "hyper", + "ipnet", + "libc", + "percent-encoding", "pin-project-lite", + "socket2", "tokio", "tower-service", + "tracing", ] [[package]] @@ -829,10 +1005,19 @@ version = "0.1.0" dependencies = [ "anyhow", "axum", + "chrono", + "parsers", + "reqwest", + "serde", + "serde_json", + "sha2", + "sqlx", "stripstream-core", "tokio", "tracing", "tracing-subscriber", + "uuid", + "walkdir", ] [[package]] @@ -847,6 +1032,32 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "itoa" version = "1.0.17" @@ -933,6 +1144,36 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lopdf" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b" +dependencies = [ + "aes", + "cbc", + "chrono", + "encoding_rs", + "flate2", + "indexmap", + "itoa", + "log", + "md-5", + "nom", + "nom_locate", + "rangemap", + "rayon", + "thiserror", + "time", + "weezl", +] + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "matchers" version = "0.2.0" @@ -970,6 +1211,22 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.1" @@ -981,6 +1238,27 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom_locate" +version = "4.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1001,11 +1279,17 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand", + "rand 0.8.5", "smallvec", "zeroize", ] +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + [[package]] name = "num-integer" version = "0.1.46" @@ -1076,6 +1360,8 @@ name = "parsers" version = "0.1.0" dependencies = [ "anyhow", + "lopdf", + "zip", ] [[package]] @@ -1085,7 +1371,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" dependencies = [ "base64ct", - "rand_core", + "rand_core 0.6.4", "subtle", ] @@ -1158,6 +1444,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1186,6 +1478,61 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.60.2", +] + [[package]] name = "quote" version = "1.0.45" @@ -1195,6 +1542,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "r-efi" version = "6.0.0" @@ -1208,8 +1561,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -1219,7 +1582,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1231,6 +1604,41 @@ dependencies = [ "getrandom 0.2.17", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rangemap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1266,6 +1674,44 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 1.0.6", +] + [[package]] name = "ring" version = "0.17.14" @@ -1293,13 +1739,19 @@ dependencies = [ "num-traits", "pkcs1", "pkcs8", - "rand_core", + "rand_core 0.6.4", "signature", "spki", "subtle", "zeroize", ] +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustls" version = "0.23.37" @@ -1320,6 +1772,7 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ + "web-time", "zeroize", ] @@ -1346,6 +1799,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1478,9 +1940,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", - "rand_core", + "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "slab" version = "0.4.12" @@ -1644,7 +2112,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rand", + "rand 0.8.5", "rsa", "serde", "sha1", @@ -1684,7 +2152,7 @@ dependencies = [ "md-5", "memchr", "once_cell", - "rand", + "rand 0.8.5", "serde", "serde_json", "sha2", @@ -1770,6 +2238,9 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -1811,6 +2282,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1863,6 +2365,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.18" @@ -1890,6 +2402,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -1964,6 +2494,12 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "typenum" version = "1.19.0" @@ -2057,6 +2593,25 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2100,6 +2655,20 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.114" @@ -2166,6 +2735,26 @@ dependencies = [ "semver", ] +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -2184,6 +2773,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + [[package]] name = "whoami" version = "1.6.1" @@ -2194,6 +2789,15 @@ dependencies = [ "wasite", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -2672,8 +3276,37 @@ dependencies = [ "syn", ] +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "flate2", + "indexmap", + "memchr", + "thiserror", + "zopfli", +] + [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] diff --git a/Cargo.toml b/Cargo.toml index 1142f10..fa53f64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,12 +19,15 @@ argon2 = "0.5" axum = "0.7" base64 = "0.22" chrono = { version = "0.4", features = ["serde"] } +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } rand = "0.8" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +sha2 = "0.10" sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "uuid", "chrono"] } tokio = { version = "1.43", features = ["macros", "rt-multi-thread", "signal"] } tower = "0.5" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } uuid = { version = "1.12", features = ["serde", "v4"] } +walkdir = "2.5" diff --git a/PLAN.md b/PLAN.md index dd6c3ea..dd891c5 100644 --- a/PLAN.md +++ b/PLAN.md @@ -69,53 +69,53 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques **DoD:** Gestion librairies robuste et securisee. ### T7 - Jobs d'indexation -- [ ] Orchestration scan/rebuild -- [ ] `POST /index/rebuild` -- [ ] `GET /index/status` -- [ ] Persist statuts/erreurs dans `index_jobs` +- [x] Orchestration scan/rebuild +- [x] `POST /index/rebuild` +- [x] `GET /index/status` +- [x] Persist statuts/erreurs dans `index_jobs` **DoD:** Jobs tracables et relancables. ### T8 - Scan incremental -- [ ] Decouverte fichiers supportes -- [ ] Fingerprint (`size`, `mtime`, hash partiel) -- [ ] Upsert `book_files` -- [ ] Detection suppressions/renommages +- [x] Decouverte fichiers supportes +- [x] Fingerprint (`size`, `mtime`, hash partiel) +- [x] Upsert `book_files` +- [x] Detection suppressions/renommages **DoD:** Rescan = traitement des deltas uniquement. ### T9 - Parsing CBZ -- [ ] Lecture zip + ordre pages naturel -- [ ] Metadonnees de base + cover -- [ ] Gestion erreurs archive +- [x] Lecture zip + ordre pages naturel +- [x] Metadonnees de base + cover +- [x] Gestion erreurs archive **DoD:** Livres CBZ indexes correctement. ### T10 - Parsing CBR (temp disk) -- [ ] Extraction temp via `unrar` -- [ ] Ordonnancement pages -- [ ] Cleanup garanti (meme en erreur) +- [x] Extraction temp via `unrar` +- [x] Ordonnancement pages +- [x] Cleanup garanti (meme en erreur) **DoD:** Pas de fuite temp, parsing stable. ### T11 - Parsing PDF -- [ ] Metadonnees (titre/auteur/pages) -- [ ] Preparation rendu page a la volee -- [ ] Gestion fichiers corrompus +- [x] Metadonnees (titre/auteur/pages) +- [x] Preparation rendu page a la volee +- [x] Gestion fichiers corrompus **DoD:** PDF indexes et lisibles. ### T12 - API livres -- [ ] `GET /books` (filtres + curseur) -- [ ] `GET /books/:id` -- [ ] Contrat JSON propre et stable +- [x] `GET /books` (filtres + curseur) +- [x] `GET /books/:id` +- [x] Contrat JSON propre et stable **DoD:** Pagination/filtres fonctionnels. ### T13 - Recherche -- [ ] Projection vers Meilisearch -- [ ] `GET /search?q=...&library_id=...&type=...` -- [ ] Fuzzy + filtres +- [x] Projection vers Meilisearch +- [x] `GET /search?q=...&library_id=...&type=...` +- [x] Fuzzy + filtres **DoD:** Recherche rapide et pertinente. @@ -190,7 +190,7 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques ## Suivi d'avancement - [x] Lot 1: Fondations (T1 -> T6) -- [ ] Lot 2: Ingestion + Search (T7 -> T13) +- [x] Lot 2: Ingestion + Search (T7 -> T13) - [ ] Lot 3: Lecture + UI + Hardening (T14 -> T18) ## Notes @@ -200,3 +200,6 @@ Construire un serveur ultra performant pour indexer et servir des bibliotheques ## Journal - 2026-03-05: `docker compose up -d --build` valide, stack complete en healthy (`postgres`, `meilisearch`, `api`, `indexer`, `admin-ui`). - 2026-03-05: ajustements infra appliques pour demarrage stable (`unrar` -> `unrar-free`, image `rust:1-bookworm`, healthchecks `127.0.0.1`). +- 2026-03-05: ajout d'un service `migrate` dans Compose pour executer automatiquement `infra/migrations/0001_init.sql` au demarrage. +- 2026-03-05: Lot 2 termine (jobs, scan incremental, parsers `cbz/cbr/pdf`, API livres, sync + recherche Meilisearch). +- 2026-03-05: verification de bout en bout OK sur une librairie de test (`/libraries/demo`) avec indexation, listing `/books` et recherche `/search` (1 CBZ detecte). diff --git a/apps/api/Cargo.toml b/apps/api/Cargo.toml index e32dbc3..09bde20 100644 --- a/apps/api/Cargo.toml +++ b/apps/api/Cargo.toml @@ -12,6 +12,7 @@ base64.workspace = true chrono.workspace = true stripstream-core = { path = "../../crates/core" } rand.workspace = true +reqwest.workspace = true serde.workspace = true serde_json.workspace = true sqlx.workspace = true diff --git a/apps/api/src/auth.rs b/apps/api/src/auth.rs index 7509f70..3e106d0 100644 --- a/apps/api/src/auth.rs +++ b/apps/api/src/auth.rs @@ -32,6 +32,18 @@ pub async fn require_admin( Ok(next.run(req).await) } +pub async fn require_read( + State(state): State, + mut req: Request, + next: Next, +) -> Result { + let token = bearer_token(&req).ok_or_else(|| ApiError::unauthorized("missing bearer token"))?; + let scope = authenticate(&state, token).await?; + + req.extensions_mut().insert(scope); + Ok(next.run(req).await) +} + fn bearer_token(req: &Request) -> Option<&str> { req.headers() .get(AUTHORIZATION) diff --git a/apps/api/src/books.rs b/apps/api/src/books.rs new file mode 100644 index 0000000..42fc43a --- /dev/null +++ b/apps/api/src/books.rs @@ -0,0 +1,143 @@ +use axum::{extract::{Path, Query, State}, Json}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::Row; +use uuid::Uuid; + +use crate::{error::ApiError, AppState}; + +#[derive(Deserialize)] +pub struct ListBooksQuery { + pub library_id: Option, + pub kind: Option, + pub cursor: Option, + pub limit: Option, +} + +#[derive(Serialize)] +pub struct BookItem { + pub id: Uuid, + pub library_id: Uuid, + pub kind: String, + pub title: String, + pub author: Option, + pub series: Option, + pub volume: Option, + pub language: Option, + pub page_count: Option, + pub updated_at: DateTime, +} + +#[derive(Serialize)] +pub struct BooksPage { + pub items: Vec, + pub next_cursor: Option, +} + +#[derive(Serialize)] +pub struct BookDetails { + pub id: Uuid, + pub library_id: Uuid, + pub kind: String, + pub title: String, + pub author: Option, + pub series: Option, + pub volume: Option, + pub language: Option, + pub page_count: Option, + pub file_path: Option, + pub file_format: Option, + pub file_parse_status: Option, +} + +pub async fn list_books( + State(state): State, + Query(query): Query, +) -> Result, ApiError> { + let limit = query.limit.unwrap_or(50).clamp(1, 200); + let rows = sqlx::query( + r#" + SELECT id, library_id, kind, title, author, series, volume, language, page_count, updated_at + FROM books + WHERE ($1::uuid IS NULL OR library_id = $1) + AND ($2::text IS NULL OR kind = $2) + AND ($3::uuid IS NULL OR id > $3) + ORDER BY id ASC + LIMIT $4 + "#, + ) + .bind(query.library_id) + .bind(query.kind.as_deref()) + .bind(query.cursor) + .bind(limit + 1) + .fetch_all(&state.pool) + .await?; + + let mut items: Vec = rows + .iter() + .take(limit as usize) + .map(|row| BookItem { + id: row.get("id"), + library_id: row.get("library_id"), + kind: row.get("kind"), + title: row.get("title"), + author: row.get("author"), + series: row.get("series"), + volume: row.get("volume"), + language: row.get("language"), + page_count: row.get("page_count"), + updated_at: row.get("updated_at"), + }) + .collect(); + + let next_cursor = if rows.len() > limit as usize { + items.last().map(|b| b.id) + } else { + None + }; + + Ok(Json(BooksPage { + items: std::mem::take(&mut items), + next_cursor, + })) +} + +pub async fn get_book( + State(state): State, + Path(id): Path, +) -> Result, ApiError> { + let row = sqlx::query( + r#" + SELECT b.id, b.library_id, b.kind, b.title, b.author, b.series, b.volume, b.language, b.page_count, + bf.abs_path, bf.format, bf.parse_status + FROM books b + LEFT JOIN LATERAL ( + SELECT abs_path, format, parse_status + FROM book_files + WHERE book_id = b.id + ORDER BY updated_at DESC + LIMIT 1 + ) bf ON TRUE + WHERE b.id = $1 + "#, + ) + .bind(id) + .fetch_optional(&state.pool) + .await?; + + let row = row.ok_or_else(|| ApiError::not_found("book not found"))?; + Ok(Json(BookDetails { + id: row.get("id"), + library_id: row.get("library_id"), + kind: row.get("kind"), + title: row.get("title"), + author: row.get("author"), + series: row.get("series"), + volume: row.get("volume"), + language: row.get("language"), + page_count: row.get("page_count"), + file_path: row.get("abs_path"), + file_format: row.get("format"), + file_parse_status: row.get("parse_status"), + })) +} diff --git a/apps/api/src/index_jobs.rs b/apps/api/src/index_jobs.rs new file mode 100644 index 0000000..4cb060c --- /dev/null +++ b/apps/api/src/index_jobs.rs @@ -0,0 +1,74 @@ +use axum::{extract::State, Json}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::Row; +use uuid::Uuid; + +use crate::{error::ApiError, AppState}; + +#[derive(Deserialize)] +pub struct RebuildRequest { + pub library_id: Option, +} + +#[derive(Serialize)] +pub struct IndexJobItem { + pub id: Uuid, + pub library_id: Option, + pub r#type: String, + pub status: String, + pub started_at: Option>, + pub finished_at: Option>, + pub stats_json: Option, + pub error_opt: Option, + pub created_at: DateTime, +} + +pub async fn enqueue_rebuild( + State(state): State, + payload: Option>, +) -> Result, ApiError> { + let library_id = payload.and_then(|p| p.0.library_id); + let id = Uuid::new_v4(); + + sqlx::query( + "INSERT INTO index_jobs (id, library_id, type, status) VALUES ($1, $2, 'rebuild', 'pending')", + ) + .bind(id) + .bind(library_id) + .execute(&state.pool) + .await?; + + let row = sqlx::query( + "SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs WHERE id = $1", + ) + .bind(id) + .fetch_one(&state.pool) + .await?; + + Ok(Json(map_row(row))) +} + +pub async fn list_index_jobs(State(state): State) -> Result>, ApiError> { + let rows = sqlx::query( + "SELECT id, library_id, type, status, started_at, finished_at, stats_json, error_opt, created_at FROM index_jobs ORDER BY created_at DESC LIMIT 100", + ) + .fetch_all(&state.pool) + .await?; + + Ok(Json(rows.into_iter().map(map_row).collect())) +} + +fn map_row(row: sqlx::postgres::PgRow) -> IndexJobItem { + IndexJobItem { + id: row.get("id"), + library_id: row.get("library_id"), + r#type: row.get("type"), + status: row.get("status"), + started_at: row.get("started_at"), + finished_at: row.get("finished_at"), + stats_json: row.get("stats_json"), + error_opt: row.get("error_opt"), + created_at: row.get("created_at"), + } +} diff --git a/apps/api/src/main.rs b/apps/api/src/main.rs index fc95697..b86e0a6 100644 --- a/apps/api/src/main.rs +++ b/apps/api/src/main.rs @@ -1,6 +1,9 @@ mod auth; +mod books; mod error; +mod index_jobs; mod libraries; +mod search; mod tokens; use std::sync::Arc; @@ -14,6 +17,8 @@ use tracing::info; struct AppState { pool: sqlx::PgPool, bootstrap_token: Arc, + meili_url: Arc, + meili_master_key: Arc, } #[tokio::main] @@ -33,18 +38,29 @@ async fn main() -> anyhow::Result<()> { let state = AppState { pool, bootstrap_token: Arc::from(config.api_bootstrap_token), + meili_url: Arc::from(config.meili_url), + meili_master_key: Arc::from(config.meili_master_key), }; - let protected = Router::new() + let admin_routes = Router::new() .route("/libraries", get(libraries::list_libraries).post(libraries::create_library)) .route("/libraries/:id", delete(libraries::delete_library)) + .route("/index/rebuild", axum::routing::post(index_jobs::enqueue_rebuild)) + .route("/index/status", get(index_jobs::list_index_jobs)) .route("/admin/tokens", get(tokens::list_tokens).post(tokens::create_token)) .route("/admin/tokens/:id", delete(tokens::revoke_token)) .layer(middleware::from_fn_with_state(state.clone(), auth::require_admin)); + let read_routes = Router::new() + .route("/books", get(books::list_books)) + .route("/books/:id", get(books::get_book)) + .route("/search", get(search::search_books)) + .layer(middleware::from_fn_with_state(state.clone(), auth::require_read)); + let app = Router::new() .route("/health", get(health)) - .merge(protected) + .merge(admin_routes) + .merge(read_routes) .with_state(state); let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?; diff --git a/apps/api/src/search.rs b/apps/api/src/search.rs new file mode 100644 index 0000000..a324bfd --- /dev/null +++ b/apps/api/src/search.rs @@ -0,0 +1,77 @@ +use axum::{extract::{Query, State}, Json}; +use serde::{Deserialize, Serialize}; + +use crate::{error::ApiError, AppState}; + +#[derive(Deserialize)] +pub struct SearchQuery { + pub q: String, + pub library_id: Option, + pub r#type: Option, + pub kind: Option, + pub limit: Option, +} + +#[derive(Serialize)] +pub struct SearchResponse { + pub hits: serde_json::Value, + pub estimated_total_hits: Option, + pub processing_time_ms: Option, +} + +pub async fn search_books( + State(state): State, + Query(query): Query, +) -> Result, ApiError> { + if query.q.trim().is_empty() { + return Err(ApiError::bad_request("q is required")); + } + + let mut filters: Vec = Vec::new(); + if let Some(library_id) = query.library_id.as_deref() { + filters.push(format!("library_id = '{}'", library_id.replace('"', ""))); + } + let kind_filter = query.r#type.as_deref().or(query.kind.as_deref()); + if let Some(kind) = kind_filter { + filters.push(format!("kind = '{}'", kind.replace('"', ""))); + } + + let body = serde_json::json!({ + "q": query.q, + "limit": query.limit.unwrap_or(20).clamp(1, 100), + "filter": if filters.is_empty() { serde_json::Value::Null } else { serde_json::Value::String(filters.join(" AND ")) } + }); + + let client = reqwest::Client::new(); + let url = format!("{}/indexes/books/search", state.meili_url.trim_end_matches('/')); + let response = client + .post(url) + .header("Authorization", format!("Bearer {}", state.meili_master_key)) + .json(&body) + .send() + .await + .map_err(|e| ApiError::internal(format!("meili request failed: {e}")))?; + + if !response.status().is_success() { + let body = response.text().await.unwrap_or_else(|_| "unknown meili error".to_string()); + if body.contains("index_not_found") { + return Ok(Json(SearchResponse { + hits: serde_json::json!([]), + estimated_total_hits: Some(0), + processing_time_ms: Some(0), + })); + } + return Err(ApiError::internal(format!("meili error: {body}"))); + } + + let payload: serde_json::Value = response + .json() + .await + .map_err(|e| ApiError::internal(format!("invalid meili response: {e}")))?; + + Ok(Json(SearchResponse { + hits: payload.get("hits").cloned().unwrap_or_else(|| serde_json::json!([])), + estimated_total_hits: payload.get("estimatedTotalHits").and_then(|v| v.as_u64()), + processing_time_ms: payload.get("processingTimeMs").and_then(|v| v.as_u64()), + })) +} diff --git a/apps/indexer/Cargo.toml b/apps/indexer/Cargo.toml index 1fb9427..ef1a76e 100644 --- a/apps/indexer/Cargo.toml +++ b/apps/indexer/Cargo.toml @@ -7,7 +7,16 @@ license.workspace = true [dependencies] anyhow.workspace = true axum.workspace = true +chrono.workspace = true +parsers = { path = "../../crates/parsers" } +reqwest.workspace = true +serde.workspace = true +serde_json.workspace = true +sha2.workspace = true +sqlx.workspace = true stripstream-core = { path = "../../crates/core" } tokio.workspace = true tracing.workspace = true tracing-subscriber.workspace = true +uuid.workspace = true +walkdir.workspace = true diff --git a/apps/indexer/src/main.rs b/apps/indexer/src/main.rs index 8149131..2ddf3af 100644 --- a/apps/indexer/src/main.rs +++ b/apps/indexer/src/main.rs @@ -1,6 +1,30 @@ +use anyhow::Context; use axum::{routing::get, Router}; +use chrono::{DateTime, Utc}; +use parsers::{detect_format, parse_metadata, BookFormat}; +use serde::Serialize; +use sha2::{Digest, Sha256}; +use sqlx::{postgres::PgPoolOptions, Row}; +use std::{collections::HashMap, path::Path, time::Duration}; use stripstream_core::config::IndexerConfig; -use tracing::info; +use tracing::{error, info}; +use uuid::Uuid; +use walkdir::WalkDir; + +#[derive(Clone)] +struct AppState { + pool: sqlx::PgPool, + meili_url: String, + meili_master_key: String, +} + +#[derive(Serialize)] +struct JobStats { + scanned_files: usize, + indexed_files: usize, + removed_files: usize, + errors: usize, +} #[tokio::main] async fn main() -> anyhow::Result<()> { @@ -10,7 +34,20 @@ async fn main() -> anyhow::Result<()> { ) .init(); - let config = IndexerConfig::from_env(); + let config = IndexerConfig::from_env()?; + let pool = PgPoolOptions::new() + .max_connections(5) + .connect(&config.database_url) + .await?; + + let state = AppState { + pool, + meili_url: config.meili_url.clone(), + meili_master_key: config.meili_master_key.clone(), + }; + + tokio::spawn(run_worker(state.clone(), config.scan_interval_seconds)); + let app = Router::new().route("/health", get(health)); let listener = tokio::net::TcpListener::bind(&config.listen_addr).await?; @@ -22,3 +59,363 @@ async fn main() -> anyhow::Result<()> { async fn health() -> &'static str { "ok" } + +async fn run_worker(state: AppState, interval_seconds: u64) { + let wait = Duration::from_secs(interval_seconds.max(1)); + loop { + match claim_next_job(&state.pool).await { + Ok(Some((job_id, library_id))) => { + if let Err(err) = process_job(&state, job_id, library_id).await { + error!(job_id = %job_id, error = %err, "index job failed"); + let _ = fail_job(&state.pool, job_id, &err.to_string()).await; + } + } + Ok(None) => tokio::time::sleep(wait).await, + Err(err) => { + error!(error = %err, "worker loop error"); + tokio::time::sleep(wait).await; + } + } + } +} + +async fn claim_next_job(pool: &sqlx::PgPool) -> anyhow::Result)>> { + let mut tx = pool.begin().await?; + let row = sqlx::query( + r#" + SELECT id, library_id + FROM index_jobs + WHERE status = 'pending' + ORDER BY created_at ASC + FOR UPDATE SKIP LOCKED + LIMIT 1 + "#, + ) + .fetch_optional(&mut *tx) + .await?; + + let Some(row) = row else { + tx.commit().await?; + return Ok(None); + }; + + let id: Uuid = row.get("id"); + let library_id: Option = row.get("library_id"); + + sqlx::query("UPDATE index_jobs SET status = 'running', started_at = NOW(), error_opt = NULL WHERE id = $1") + .bind(id) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(Some((id, library_id))) +} + +async fn process_job(state: &AppState, job_id: Uuid, target_library_id: Option) -> anyhow::Result<()> { + let libraries = if let Some(library_id) = target_library_id { + sqlx::query("SELECT id, root_path FROM libraries WHERE id = $1 AND enabled = TRUE") + .bind(library_id) + .fetch_all(&state.pool) + .await? + } else { + sqlx::query("SELECT id, root_path FROM libraries WHERE enabled = TRUE") + .fetch_all(&state.pool) + .await? + }; + + let mut stats = JobStats { + scanned_files: 0, + indexed_files: 0, + removed_files: 0, + errors: 0, + }; + + for library in libraries { + let library_id: Uuid = library.get("id"); + let root_path: String = library.get("root_path"); + match scan_library(state, library_id, Path::new(&root_path), &mut stats).await { + Ok(()) => {} + Err(err) => { + stats.errors += 1; + error!(library_id = %library_id, error = %err, "library scan failed"); + } + } + } + + sync_meili(&state.pool, &state.meili_url, &state.meili_master_key).await?; + + sqlx::query("UPDATE index_jobs SET status = 'success', finished_at = NOW(), stats_json = $2 WHERE id = $1") + .bind(job_id) + .bind(serde_json::to_value(&stats)?) + .execute(&state.pool) + .await?; + + Ok(()) +} + +async fn fail_job(pool: &sqlx::PgPool, job_id: Uuid, error_message: &str) -> anyhow::Result<()> { + sqlx::query("UPDATE index_jobs SET status = 'failed', finished_at = NOW(), error_opt = $2 WHERE id = $1") + .bind(job_id) + .bind(error_message) + .execute(pool) + .await?; + Ok(()) +} + +async fn scan_library( + state: &AppState, + library_id: Uuid, + root: &Path, + stats: &mut JobStats, +) -> anyhow::Result<()> { + let existing_rows = sqlx::query( + r#" + SELECT bf.id AS file_id, bf.book_id, bf.abs_path, bf.fingerprint + FROM book_files bf + JOIN books b ON b.id = bf.book_id + WHERE b.library_id = $1 + "#, + ) + .bind(library_id) + .fetch_all(&state.pool) + .await?; + + let mut existing: HashMap = HashMap::new(); + for row in existing_rows { + existing.insert( + row.get("abs_path"), + (row.get("file_id"), row.get("book_id"), row.get("fingerprint")), + ); + } + + let mut seen: HashMap = HashMap::new(); + for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) { + if !entry.file_type().is_file() { + continue; + } + + let path = entry.path(); + let Some(format) = detect_format(path) else { + continue; + }; + + stats.scanned_files += 1; + let abs_path = path.to_string_lossy().to_string(); + seen.insert(abs_path.clone(), true); + + let metadata = std::fs::metadata(path) + .with_context(|| format!("cannot stat {}", path.display()))?; + let mtime: DateTime = metadata + .modified() + .map(DateTime::::from) + .unwrap_or_else(|_| Utc::now()); + let fingerprint = compute_fingerprint(path, metadata.len(), &mtime)?; + + if let Some((file_id, book_id, old_fingerprint)) = existing.get(&abs_path).cloned() { + if old_fingerprint == fingerprint { + continue; + } + + match parse_metadata(path, format) { + Ok(parsed) => { + sqlx::query( + "UPDATE books SET title = $2, kind = $3, page_count = $4, updated_at = NOW() WHERE id = $1", + ) + .bind(book_id) + .bind(parsed.title) + .bind(kind_from_format(format)) + .bind(parsed.page_count) + .execute(&state.pool) + .await?; + + sqlx::query( + "UPDATE book_files SET format = $2, size_bytes = $3, mtime = $4, fingerprint = $5, parse_status = 'ok', parse_error_opt = NULL, updated_at = NOW() WHERE id = $1", + ) + .bind(file_id) + .bind(format.as_str()) + .bind(metadata.len() as i64) + .bind(mtime) + .bind(fingerprint) + .execute(&state.pool) + .await?; + + stats.indexed_files += 1; + } + Err(err) => { + stats.errors += 1; + sqlx::query( + "UPDATE book_files SET parse_status = 'error', parse_error_opt = $2, updated_at = NOW() WHERE id = $1", + ) + .bind(file_id) + .bind(err.to_string()) + .execute(&state.pool) + .await?; + } + } + + continue; + } + + match parse_metadata(path, format) { + Ok(parsed) => { + let book_id = Uuid::new_v4(); + let file_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, $5)", + ) + .bind(book_id) + .bind(library_id) + .bind(kind_from_format(format)) + .bind(parsed.title) + .bind(parsed.page_count) + .execute(&state.pool) + .await?; + + sqlx::query( + "INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status) VALUES ($1, $2, $3, $4, $5, $6, $7, 'ok')", + ) + .bind(file_id) + .bind(book_id) + .bind(format.as_str()) + .bind(&abs_path) + .bind(metadata.len() as i64) + .bind(mtime) + .bind(fingerprint) + .execute(&state.pool) + .await?; + + stats.indexed_files += 1; + } + Err(err) => { + stats.errors += 1; + let book_id = Uuid::new_v4(); + let file_id = Uuid::new_v4(); + sqlx::query( + "INSERT INTO books (id, library_id, kind, title, page_count) VALUES ($1, $2, $3, $4, NULL)", + ) + .bind(book_id) + .bind(library_id) + .bind(kind_from_format(format)) + .bind(file_display_name(path)) + .execute(&state.pool) + .await?; + + sqlx::query( + "INSERT INTO book_files (id, book_id, format, abs_path, size_bytes, mtime, fingerprint, parse_status, parse_error_opt) VALUES ($1, $2, $3, $4, $5, $6, $7, 'error', $8)", + ) + .bind(file_id) + .bind(book_id) + .bind(format.as_str()) + .bind(&abs_path) + .bind(metadata.len() as i64) + .bind(mtime) + .bind(fingerprint) + .bind(err.to_string()) + .execute(&state.pool) + .await?; + } + } + } + + for (abs_path, (file_id, book_id, _)) in existing { + if seen.contains_key(&abs_path) { + continue; + } + sqlx::query("DELETE FROM book_files WHERE id = $1") + .bind(file_id) + .execute(&state.pool) + .await?; + sqlx::query("DELETE FROM books WHERE id = $1 AND NOT EXISTS (SELECT 1 FROM book_files WHERE book_id = $1)") + .bind(book_id) + .execute(&state.pool) + .await?; + stats.removed_files += 1; + } + + Ok(()) +} + +fn compute_fingerprint(path: &Path, size: u64, mtime: &DateTime) -> anyhow::Result { + let mut hasher = Sha256::new(); + hasher.update(size.to_le_bytes()); + hasher.update(mtime.timestamp().to_le_bytes()); + + let bytes = std::fs::read(path)?; + let take = bytes.len().min(65_536); + hasher.update(&bytes[..take]); + Ok(format!("{:x}", hasher.finalize())) +} + +fn kind_from_format(format: BookFormat) -> &'static str { + match format { + BookFormat::Pdf => "ebook", + BookFormat::Cbz | BookFormat::Cbr => "comic", + } +} + +fn file_display_name(path: &Path) -> String { + path.file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "Untitled".to_string()) +} + +#[derive(Serialize)] +struct SearchDoc { + id: String, + library_id: String, + kind: String, + title: String, + author: Option, + series: Option, + volume: Option, + language: Option, +} + +async fn sync_meili(pool: &sqlx::PgPool, meili_url: &str, meili_master_key: &str) -> anyhow::Result<()> { + let client = reqwest::Client::new(); + let base = meili_url.trim_end_matches('/'); + + let _ = client + .post(format!("{base}/indexes")) + .header("Authorization", format!("Bearer {meili_master_key}")) + .json(&serde_json::json!({"uid": "books", "primaryKey": "id"})) + .send() + .await; + + let _ = client + .patch(format!("{base}/indexes/books/settings/filterable-attributes")) + .header("Authorization", format!("Bearer {meili_master_key}")) + .json(&serde_json::json!(["library_id", "kind"])) + .send() + .await; + + let rows = sqlx::query( + "SELECT id, library_id, kind, title, author, series, volume, language FROM books", + ) + .fetch_all(pool) + .await?; + + let docs: Vec = rows + .into_iter() + .map(|row| SearchDoc { + id: row.get::("id").to_string(), + library_id: row.get::("library_id").to_string(), + kind: row.get("kind"), + title: row.get("title"), + author: row.get("author"), + series: row.get("series"), + volume: row.get("volume"), + language: row.get("language"), + }) + .collect(); + + client + .put(format!("{base}/indexes/books/documents?primaryKey=id")) + .header("Authorization", format!("Bearer {meili_master_key}")) + .json(&docs) + .send() + .await + .context("failed to push docs to meili")?; + + Ok(()) +} diff --git a/crates/core/src/config.rs b/crates/core/src/config.rs index 09289e0..bbffc81 100644 --- a/crates/core/src/config.rs +++ b/crates/core/src/config.rs @@ -4,6 +4,8 @@ use anyhow::{Context, Result}; pub struct ApiConfig { pub listen_addr: String, pub database_url: String, + pub meili_url: String, + pub meili_master_key: String, pub api_bootstrap_token: String, } @@ -12,6 +14,8 @@ impl ApiConfig { Ok(Self { listen_addr: std::env::var("API_LISTEN_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".to_string()), database_url: std::env::var("DATABASE_URL").context("DATABASE_URL is required")?, + meili_url: std::env::var("MEILI_URL").context("MEILI_URL is required")?, + meili_master_key: std::env::var("MEILI_MASTER_KEY").context("MEILI_MASTER_KEY is required")?, api_bootstrap_token: std::env::var("API_BOOTSTRAP_TOKEN") .context("API_BOOTSTRAP_TOKEN is required")?, }) @@ -21,14 +25,25 @@ impl ApiConfig { #[derive(Debug, Clone)] pub struct IndexerConfig { pub listen_addr: String, + pub database_url: String, + pub meili_url: String, + pub meili_master_key: String, + pub scan_interval_seconds: u64, } impl IndexerConfig { - pub fn from_env() -> Self { - Self { + pub fn from_env() -> Result { + Ok(Self { listen_addr: std::env::var("INDEXER_LISTEN_ADDR") .unwrap_or_else(|_| "0.0.0.0:8081".to_string()), - } + database_url: std::env::var("DATABASE_URL").context("DATABASE_URL is required")?, + meili_url: std::env::var("MEILI_URL").context("MEILI_URL is required")?, + meili_master_key: std::env::var("MEILI_MASTER_KEY").context("MEILI_MASTER_KEY is required")?, + scan_interval_seconds: std::env::var("INDEXER_SCAN_INTERVAL_SECONDS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(5), + }) } } diff --git a/crates/parsers/Cargo.toml b/crates/parsers/Cargo.toml index effee58..b354cfd 100644 --- a/crates/parsers/Cargo.toml +++ b/crates/parsers/Cargo.toml @@ -6,3 +6,5 @@ license.workspace = true [dependencies] anyhow.workspace = true +lopdf = "0.35" +zip = { version = "2.2", default-features = false, features = ["deflate"] } diff --git a/crates/parsers/src/lib.rs b/crates/parsers/src/lib.rs index 394fe71..7ebfe6f 100644 --- a/crates/parsers/src/lib.rs +++ b/crates/parsers/src/lib.rs @@ -1,3 +1,96 @@ -pub fn supported_formats() -> &'static [&'static str] { - &["cbz", "cbr", "pdf"] +use anyhow::{Context, Result}; +use std::path::Path; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BookFormat { + Cbz, + Cbr, + Pdf, +} + +impl BookFormat { + pub fn as_str(self) -> &'static str { + match self { + Self::Cbz => "cbz", + Self::Cbr => "cbr", + Self::Pdf => "pdf", + } + } +} + +#[derive(Debug, Clone)] +pub struct ParsedMetadata { + pub title: String, + pub page_count: Option, +} + +pub fn detect_format(path: &Path) -> Option { + let ext = path.extension()?.to_string_lossy().to_ascii_lowercase(); + match ext.as_str() { + "cbz" => Some(BookFormat::Cbz), + "cbr" => Some(BookFormat::Cbr), + "pdf" => Some(BookFormat::Pdf), + _ => None, + } +} + +pub fn parse_metadata(path: &Path, format: BookFormat) -> Result { + let title = path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "Untitled".to_string()); + + let page_count = match format { + BookFormat::Cbz => parse_cbz_page_count(path).ok(), + BookFormat::Cbr => parse_cbr_page_count(path).ok(), + BookFormat::Pdf => parse_pdf_page_count(path).ok(), + }; + + Ok(ParsedMetadata { title, page_count }) +} + +fn parse_cbz_page_count(path: &Path) -> Result { + let file = std::fs::File::open(path).with_context(|| format!("cannot open cbz: {}", path.display()))?; + let mut archive = zip::ZipArchive::new(file).context("invalid cbz archive")?; + let mut count: i32 = 0; + for i in 0..archive.len() { + let entry = archive.by_index(i).context("cannot read cbz entry")?; + let name = entry.name().to_ascii_lowercase(); + if is_image_name(&name) { + count += 1; + } + } + Ok(count) +} + +fn parse_cbr_page_count(path: &Path) -> Result { + let output = std::process::Command::new("unrar") + .arg("lb") + .arg(path) + .output() + .with_context(|| format!("failed to execute unrar for {}", path.display()))?; + + if !output.status.success() { + return Err(anyhow::anyhow!("unrar failed for {}", path.display())); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let count = stdout + .lines() + .filter(|line| is_image_name(&line.to_ascii_lowercase())) + .count() as i32; + Ok(count) +} + +fn parse_pdf_page_count(path: &Path) -> Result { + let doc = lopdf::Document::load(path).with_context(|| format!("cannot open pdf: {}", path.display()))?; + Ok(doc.get_pages().len() as i32) +} + +fn is_image_name(name: &str) -> bool { + name.ends_with(".jpg") + || name.ends_with(".jpeg") + || name.ends_with(".png") + || name.ends_with(".webp") + || name.ends_with(".avif") } diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index 9e7c169..0888e3e 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -17,8 +17,8 @@ services: meilisearch: image: getmeili/meilisearch:v1.12 - environment: - MEILI_MASTER_KEY: ${MEILI_MASTER_KEY:-change-me} + env_file: + - ../.env ports: - "7700:7700" volumes: @@ -29,6 +29,20 @@ services: timeout: 5s retries: 5 + migrate: + image: postgres:16-alpine + depends_on: + postgres: + condition: service_healthy + volumes: + - ./migrations:/migrations:ro + command: + [ + "sh", + "-c", + "PGPASSWORD=stripstream psql -h postgres -U stripstream -d stripstream -f /migrations/0001_init.sql", + ] + api: build: context: .. @@ -37,7 +51,11 @@ services: - ../.env ports: - "8080:8080" + volumes: + - ../libraries:/libraries depends_on: + migrate: + condition: service_completed_successfully postgres: condition: service_healthy meilisearch: @@ -56,7 +74,11 @@ services: - ../.env ports: - "8081:8081" + volumes: + - ../libraries:/libraries depends_on: + migrate: + condition: service_completed_successfully postgres: condition: service_healthy meilisearch: