feat(knowledge): Phase B+C 文档提取器 + multipart 文件上传
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- PDF 提取 (pdf-extract) + DOCX 提取 (zip+quick-xml) + Excel 解析 (calamine) - 统一格式路由 detect_format() → RAG 通道或结构化通道 - POST /api/v1/knowledge/upload multipart 文件上传 - PDF/DOCX/Markdown → RAG 管线,Excel → structured_rows JSONB - 结构化数据源 CRUD API (GET/DELETE /api/v1/structured/sources) - POST /api/v1/structured/query JSONB 关键词查询 - 修复 industry/service.rs SaasError::Database 类型不匹配
This commit is contained in:
264
Cargo.lock
generated
264
Cargo.lock
generated
@@ -17,6 +17,15 @@ version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "adobe-cmap-parser"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae8abfa9a4688de8fc9f42b3f013b6fffec18ed8a554f5f113577e0b9b3212a3"
|
||||
dependencies = [
|
||||
"pom 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aead"
|
||||
version = "0.5.2"
|
||||
@@ -381,6 +390,7 @@ dependencies = [
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"multer",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
@@ -621,6 +631,25 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bzip2"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
|
||||
dependencies = [
|
||||
"bzip2-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bzip2-sys"
|
||||
version = "0.1.13+1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cairo-rs"
|
||||
version = "0.18.5"
|
||||
@@ -646,6 +675,21 @@ dependencies = [
|
||||
"system-deps",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "calamine"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "138646b9af2c5d7f1804ea4bf93afc597737d2bd4f7341d67c48b03316976eb1"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"codepage",
|
||||
"encoding_rs",
|
||||
"log",
|
||||
"quick-xml 0.31.0",
|
||||
"serde",
|
||||
"zip 2.4.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "camino"
|
||||
version = "1.2.2"
|
||||
@@ -779,6 +823,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"jobserver",
|
||||
"libc",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
@@ -906,6 +952,15 @@ dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codepage"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48f68d061bc2828ae826206326e61251aca94c1e4a5305cf52d9138639c918b4"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color_quant"
|
||||
version = "1.1.0"
|
||||
@@ -1458,6 +1513,12 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deflate64"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2"
|
||||
|
||||
[[package]]
|
||||
name = "der"
|
||||
version = "0.7.10"
|
||||
@@ -1904,6 +1965,15 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "euclid"
|
||||
version = "0.20.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "2.5.3"
|
||||
@@ -2371,7 +2441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1c422344482708cb32db843cf3f55f27918cd24fec7b505bde895a1e8702c34"
|
||||
dependencies = [
|
||||
"derive_more 0.99.20",
|
||||
"lopdf",
|
||||
"lopdf 0.26.0",
|
||||
"printpdf",
|
||||
"rusttype",
|
||||
]
|
||||
@@ -3289,6 +3359,16 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
|
||||
dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jpeg-decoder"
|
||||
version = "0.3.2"
|
||||
@@ -3537,16 +3617,55 @@ dependencies = [
|
||||
"linked-hash-map",
|
||||
"log",
|
||||
"lzw",
|
||||
"pom",
|
||||
"pom 3.4.0",
|
||||
"time 0.2.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lopdf"
|
||||
version = "0.34.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5c8ecfc6c72051981c0459f75ccc585e7ff67c70829560cda8e647882a9abff"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
"flate2",
|
||||
"indexmap 2.13.0",
|
||||
"itoa 1.0.18",
|
||||
"log",
|
||||
"md-5",
|
||||
"nom",
|
||||
"rangemap",
|
||||
"time 0.3.47",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru-slab"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||
|
||||
[[package]]
|
||||
name = "lzma-rs"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"crc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lzma-sys"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lzw"
|
||||
version = "0.10.0"
|
||||
@@ -4251,6 +4370,31 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
|
||||
|
||||
[[package]]
|
||||
name = "pbkdf2"
|
||||
version = "0.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"hmac",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdf-extract"
|
||||
version = "0.7.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbb3a5387b94b9053c1e69d8abfd4dd6dae7afda65a5c5279bc1f42ab39df575"
|
||||
dependencies = [
|
||||
"adobe-cmap-parser",
|
||||
"encoding_rs",
|
||||
"euclid",
|
||||
"lopdf 0.34.0",
|
||||
"postscript",
|
||||
"type1-encoding-parser",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "3.0.6"
|
||||
@@ -4628,6 +4772,12 @@ dependencies = [
|
||||
"universal-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pom"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6"
|
||||
|
||||
[[package]]
|
||||
name = "pom"
|
||||
version = "3.4.0"
|
||||
@@ -4649,6 +4799,12 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postscript"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306"
|
||||
|
||||
[[package]]
|
||||
name = "potential_utf"
|
||||
version = "0.1.4"
|
||||
@@ -4696,7 +4852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a2472a184bcb128d0e3db65b59ebd11d010259a5e14fd9d048cba8f2c9302d4"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"lopdf",
|
||||
"lopdf 0.26.0",
|
||||
"rusttype",
|
||||
"time 0.2.27",
|
||||
]
|
||||
@@ -4810,6 +4966,25 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.37.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.38.4"
|
||||
@@ -5005,6 +5180,12 @@ dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rangemap"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68"
|
||||
|
||||
[[package]]
|
||||
name = "raw-window-handle"
|
||||
version = "0.6.2"
|
||||
@@ -7531,6 +7712,15 @@ version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "type1-encoding-parser"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa10c302f5a53b7ad27fd42a3996e23d096ba39b5b8dd6d9e683a05b01bee749"
|
||||
dependencies = [
|
||||
"pom 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typeid"
|
||||
version = "1.0.3"
|
||||
@@ -9335,6 +9525,15 @@ dependencies = [
|
||||
"quick-xml 0.30.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xz2"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
|
||||
dependencies = [
|
||||
"lzma-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke"
|
||||
version = "0.8.1"
|
||||
@@ -9590,6 +9789,7 @@ dependencies = [
|
||||
"axum-extra",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"calamine",
|
||||
"chrono",
|
||||
"dashmap",
|
||||
"data-encoding",
|
||||
@@ -9597,7 +9797,9 @@ dependencies = [
|
||||
"genpdf",
|
||||
"hex",
|
||||
"jsonwebtoken",
|
||||
"pdf-extract",
|
||||
"pgvector",
|
||||
"quick-xml 0.37.5",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"reqwest 0.12.28",
|
||||
@@ -9623,6 +9825,7 @@ dependencies = [
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"zclaw-types",
|
||||
"zip 2.4.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9700,6 +9903,20 @@ name = "zeroize"
|
||||
version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
||||
dependencies = [
|
||||
"zeroize_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zeroize_derive"
|
||||
version = "1.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerotrie"
|
||||
@@ -9740,15 +9957,28 @@ version = "2.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"arbitrary",
|
||||
"bzip2",
|
||||
"constant_time_eq",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"deflate64",
|
||||
"displaydoc",
|
||||
"flate2",
|
||||
"getrandom 0.3.4",
|
||||
"hmac",
|
||||
"indexmap 2.13.0",
|
||||
"lzma-rs",
|
||||
"memchr",
|
||||
"pbkdf2",
|
||||
"sha1 0.10.6",
|
||||
"thiserror 2.0.18",
|
||||
"time 0.3.47",
|
||||
"xz2",
|
||||
"zeroize",
|
||||
"zopfli",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9781,6 +10011,34 @@ dependencies = [
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "7.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||
dependencies = [
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.16+zstd.1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zune-inflate"
|
||||
version = "0.2.54"
|
||||
|
||||
Reference in New Issue
Block a user