diff --git a/Cargo.lock b/Cargo.lock index 4318682..7d7cbbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,32 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "adobe-cmap-parser" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8abfa9a4688de8fc9f42b3f013b6fffec18ed8a554f5f113577e0b9b3212a3" +dependencies = [ + "pom", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -130,30 +156,81 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "bytemuck" version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.2.56" @@ -164,6 +241,23 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + +[[package]] +name = "cff-parser" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f5b6e9141c036f3ff4ce7b2f7e432b0f00dee416ddcd4f17741d189ddc2e9d" + [[package]] name = "cfg-if" version = "1.0.4" @@ -190,6 +284,22 @@ dependencies = [ "windows-link", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colored" version = "3.1.1" @@ -225,6 +335,24 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "croner" version = "2.2.0" @@ -234,6 +362,47 @@ dependencies = [ "chrono", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "csv" version = "1.4.0" @@ -355,6 +524,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -366,6 +545,21 @@ dependencies = [ "syn", ] +[[package]] +name = "docx-rs" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70395eb132dcc1761533e62c54878a9deb2b637863ecb52e9c5f66148616398e" +dependencies = [ + "base64", + "image", + "serde", + "serde_json", + "thiserror", + "xml-rs", + "zip", +] + [[package]] name = "dptree" version = "0.5.1" @@ -382,6 +576,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ecb" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7" +dependencies = [ + "cipher", +] + [[package]] name = "either" version = "1.15.0" @@ -423,6 +626,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "euclid" +version = "0.20.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad" +dependencies = [ + "num-traits", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -441,12 +653,61 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fax" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab" +dependencies = [ + "fax_derive", +] + +[[package]] +name = "fax_derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -571,6 +832,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -582,6 +853,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "getrandom" version = "0.4.1" @@ -595,6 +878,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "gif" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5df2ba84018d80c213569363bdcd0c64e6933c67fe4c1d60ecf822971a3c35e" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "h2" version = "0.4.13" @@ -614,6 +907,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -650,6 +954,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -919,6 +1229,35 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "gif", + "image-webp", + "moxcms", + "num-traits", + "png", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error", +] + [[package]] name = "include_dir" version = "0.7.4" @@ -961,6 +1300,25 @@ dependencies = [ "serde_core", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1082,6 +1440,34 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lopdf" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7184fdea2bc3cd272a1acec4030c321a8f9875e877b3f92a53f2f6033fdc289" +dependencies = [ + "aes", + "bitflags 2.11.0", + "cbc", + "ecb", + "encoding_rs", + "flate2", + "getrandom 0.3.4", + "indexmap 2.13.0", + "itoa", + "log", + "md-5", + "nom", + "nom_locate", + "rand", + "rangemap", + "sha2", + "stringprep", + "thiserror", + "ttf-parser", + "weezl", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1097,6 +1483,16 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.8.0" @@ -1119,6 +1515,16 @@ dependencies = [ "unicase", ] +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.1" @@ -1130,6 +1536,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + [[package]] name = "native-tls" version = "0.2.16" @@ -1153,12 +1569,32 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" dependencies = [ - "bitflags", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "nom_locate" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1194,6 +1630,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.37.3" @@ -1203,6 +1649,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "ocrs" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb8d042b655f2b5cacec1539d55e36589c0459036541dce20072e9c6123bab2a" +dependencies = [ + "anyhow", + "rayon", + "rten", + "rten-imageproc", + "rten-tensor", + "thiserror", + "wasm-bindgen", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1215,7 +1676,7 @@ version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags", + "bitflags 2.11.0", "cfg-if", "foreign-types", "libc", @@ -1282,6 +1743,23 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec" +[[package]] +name = "pdf-extract" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28ba1758a3d3f361459645780e09570b573fc3c82637449e9963174c813a98" +dependencies = [ + "adobe-cmap-parser", + "cff-parser", + "encoding_rs", + "euclid", + "log", + "lopdf", + "postscript", + "type1-encoding-parser", + "unicode-normalization", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1326,6 +1804,25 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags 2.11.0", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "pom" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -1341,6 +1838,12 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "postscript" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1356,6 +1859,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1420,6 +1932,18 @@ dependencies = [ "cc", ] +[[package]] +name = "pxfm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quote" version = "1.0.44" @@ -1435,6 +1959,61 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rangemap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rc-box" version = "1.3.0" @@ -1450,7 +2029,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.11.0", ] [[package]] @@ -1594,13 +2173,120 @@ dependencies = [ "syn", ] +[[package]] +name = "rten" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c230fa4ade87c913f61dbd911b7eb0d49460ceff3f1e4fabc837fac191137c" +dependencies = [ + "flatbuffers", + "num_cpus", + "rayon", + "rten-base", + "rten-gemm", + "rten-model-file", + "rten-onnx", + "rten-shape-inference", + "rten-simd", + "rten-tensor", + "rten-vecmath", + "rustc-hash", + "smallvec", + "typeid", + "wasm-bindgen", +] + +[[package]] +name = "rten-base" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2738cf8bb4c27f828ac788d01ccf4e367e8e773cfec6851f81851b5211de6a79" +dependencies = [ + "rayon", +] + +[[package]] +name = "rten-gemm" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330a81a0ca209fb5ce21bd17efa0bd287d5881c6cebfbff0b21c4294a1a14a9e" +dependencies = [ + "rayon", + "rten-base", + "rten-simd", + "rten-tensor", +] + +[[package]] +name = "rten-imageproc" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f148e7e941fb5727b9046a5fa1b45525543d5105f14b384fd9261df0ee49bc" +dependencies = [ + "rten-tensor", +] + +[[package]] +name = "rten-model-file" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2f8d270f07ab1bbfff47250c6039f6caa5da59d6da7d74f66aa48559aa6fea" +dependencies = [ + "flatbuffers", + "rten-base", +] + +[[package]] +name = "rten-onnx" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23086eef75bfb55278cb0b45cf9f5a877d466d914914aafebee4ffca9b24d20c" + +[[package]] +name = "rten-shape-inference" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8a913c7ca40e2bfbb2a0cd447cce56b33ab19435f56693271a2ef37cf58984" +dependencies = [ + "rten-tensor", + "smallvec", +] + +[[package]] +name = "rten-simd" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b19a0032dfcb70dd20960c1c51a37674b237586cbc1ce586f45b46605d108e82" + +[[package]] +name = "rten-tensor" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05dc744a270aa32d154f1a3df8e48740ccc1be9dfbcf23295ada66d83aa98de6" +dependencies = [ + "rayon", + "rten-base", + "smallvec", + "typeid", +] + +[[package]] +name = "rten-vecmath" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9574ddebf5671bc08ceb76e2e1638fadc57fdeff318634eab2c29e9a803cff64" +dependencies = [ + "rten-base", + "rten-simd", +] + [[package]] name = "rusqlite" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37e34486da88d8e051c7c0e23c3f15fd806ea8546260aa2fec247e97242ec143" dependencies = [ - "bitflags", + "bitflags 2.11.0", "chrono", "csv", "fallible-iterator", @@ -1615,6 +2301,21 @@ dependencies = [ "uuid", ] +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustfox" version = "0.1.0" @@ -1622,11 +2323,18 @@ dependencies = [ "anyhow", "async-trait", "axum", + "base64", "chrono", + "docx-rs", "futures", "futures-util", + "image", + "infer", + "ocrs", + "pdf-extract", "reqwest", "rmcp", + "rten", "rusqlite", "serde", "serde_json", @@ -1647,7 +2355,7 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys", @@ -1758,7 +2466,7 @@ version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" dependencies = [ - "bitflags", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -1898,6 +2606,17 @@ dependencies = [ "syn", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1923,6 +2642,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "slab" version = "0.4.12" @@ -1973,6 +2698,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" @@ -2022,7 +2758,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags", + "bitflags 2.11.0", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -2081,7 +2817,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f7a34ca8e971fa892e633858c07547fe138ef4a02e4a4eaa1d35e517d6e0bc4" dependencies = [ - "bitflags", + "bitflags 2.11.0", "bytes", "chrono", "derive_more", @@ -2161,6 +2897,20 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tiff" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error", + "weezl", + "zune-jpeg", +] + [[package]] name = "time" version = "0.3.47" @@ -2202,6 +2952,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.49.0" @@ -2352,7 +3117,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags", + "bitflags 2.11.0", "bytes", "futures-util", "http", @@ -2444,18 +3209,66 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ttf-parser" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2df906b07856748fa3f6e0ad0cbaa047052d4a7dd609e231c4f72cee8c36f31" + +[[package]] +name = "type1-encoding-parser" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa10c302f5a53b7ad27fd42a3996e23d096ba39b5b8dd6d9e683a05b01bee749" +dependencies = [ + "pom", +] + +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicase" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -2510,6 +3323,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "want" version = "0.3.1" @@ -2643,7 +3462,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags", + "bitflags 2.11.0", "hashbrown 0.15.5", "indexmap 2.13.0", "semver", @@ -2659,6 +3478,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + [[package]] name = "windows" version = "0.62.2" @@ -3012,7 +3837,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags", + "bitflags 2.11.0", "indexmap 2.13.0", "log", "serde", @@ -3048,6 +3873,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xml-rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f" + [[package]] name = "yoke" version = "0.8.1" @@ -3071,6 +3902,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.6" @@ -3131,8 +3982,35 @@ dependencies = [ "syn", ] +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", +] + [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7a1c0af6e5d8d1363f4994b7a091ccf963d8b694f7da5b0b9cceb82da2c0a6" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index 20044dc..01b131b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,5 +55,22 @@ sqlite-vec = "0.1" # Setup wizard web server (used only by src/bin/setup.rs) axum = "0.8" +# OCR (pure Rust, neural-network based) +ocrs = "0.12" +rten = { version = "0.24", features = ["rten_format"] } + +# Image loading/processing +image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } + +# Document processing +pdf-extract = "0.10" +docx-rs = "0.4" + +# MIME type detection +infer = "0.19" + +# Base64 for vision API content parts +base64 = "0.22" + [dev-dependencies] tempfile = "3" diff --git a/config.example.toml b/config.example.toml index fd25e26..06a2b30 100644 --- a/config.example.toml +++ b/config.example.toml @@ -16,6 +16,7 @@ model = "moonshotai/kimi-k2.5" base_url = "https://openrouter.ai/api/v1" # Maximum tokens in response max_tokens = 4096 +# supports_vision = false # Set to true if your model supports image inputs # System prompt for the AI assistant system_prompt = """You are a helpful AI assistant with access to tools. \ Use the available tools to help the user with their tasks. \ @@ -63,6 +64,12 @@ directory = "skills" # model = "qwen/qwen3-embedding-8b" # dimensions = 1536 +# OCR configuration (optional) +# Used to extract text from images when supports_vision = false. +# Models are downloaded automatically on first use. +# [ocr] +# model_dir = "~/.cache/ocrs" # Where OCR model files are cached (downloaded on first use) + # MCP Server Configurations # Each [[mcp_servers]] block defines an MCP server to connect to # The bot will discover and register tools from each server diff --git a/docs/plans/2026-03-25-telegram-file-image-support.md b/docs/plans/2026-03-25-telegram-file-image-support.md new file mode 100644 index 0000000..122b76e --- /dev/null +++ b/docs/plans/2026-03-25-telegram-file-image-support.md @@ -0,0 +1,654 @@ +# Telegram File & Image Support Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Handle Telegram photos and file attachments (PDF, DOCX, images), routing them through a vision/OCR/document extraction pipeline before injecting context into the LLM. + +**Architecture:** Five-layer change — (1) `Cargo.toml` deps, (2) platform data model (Attachment), (3) multi-modal LLM messages, (4) new `file_processor` module (image/PDF/DOCX → text/content), (5) telegram handler + agent integration. OCR uses `ocrs` (pure Rust, neural-network-based). Long documents (>6000 chars) are chunked and stored via the existing `knowledge` store + `sqlite-vec` vector DB, then RAG-retrieved per user query. + +**Tech Stack:** Rust 2021, Tokio, teloxide 0.17, ocrs 0.12 (OCR), rten 0.24 (model runtime), image 0.25 (image loading), pdf-extract 0.10 (PDF), docx-rs 0.4 (DOCX), infer 0.19 (MIME detection), base64 0.22 (vision encoding) + +--- + +## Reading List + +Read before touching any code: + +- `src/platform/mod.rs` — IncomingMessage struct (will add `attachments`) +- `src/llm.rs` lines 1–18 — ChatMessage struct (will change `content` type) +- `src/config.rs` lines 44–55 — OpenRouterConfig (will add `supports_vision`) +- `src/platform/telegram.rs` lines 81–100 — handle_message fn (will add photo/doc handling) +- `src/agent.rs` lines 125–215 — process_message (will add attachment processing) +- `src/memory/knowledge.rs` lines 19–78 — `remember` and `search_knowledge` (reused for long-doc RAG) + +--- + +## Task 1: Add Dependencies + +**Files:** +- Modify: `Cargo.toml` + +Add under `[dependencies]`: +```toml +# OCR (pure Rust, neural-network based) +ocrs = "0.12" +rten = { version = "0.24", features = ["rten_format"] } + +# Image loading/processing +image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } + +# Document processing +pdf-extract = "0.10" +docx-rs = "0.4" + +# MIME type detection +infer = "0.19" + +# Base64 for vision API content parts +base64 = "0.22" +``` + +**Step 1:** Edit `Cargo.toml` + +**Step 2:** Run `cargo check` to verify deps resolve + +**Step 3:** Commit: `feat: add file processing dependencies` + +--- + +## Task 2: Platform Data Model — Attachment + +**Files:** +- Modify: `src/platform/mod.rs` + +Add `AttachmentKind` enum, `Attachment` struct, and `attachments` field to `IncomingMessage`: + +```rust +/// What kind of attachment was received +#[derive(Debug, Clone, PartialEq)] +pub enum AttachmentKind { + Image, + Pdf, + Docx, + Other, +} + +/// A file attachment received from a platform +#[derive(Debug, Clone)] +pub struct Attachment { + pub kind: AttachmentKind, + /// Absolute path to the downloaded temp file + pub path: std::path::PathBuf, + pub mime_type: String, + /// Original filename, if known + pub file_name: Option, +} + +/// A message received from any platform +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct IncomingMessage { + pub platform: String, + pub user_id: String, + pub chat_id: String, + pub user_name: String, + pub text: String, + /// Attached files, if any + #[serde(default)] + pub attachments: Vec, +} +``` + +**Step 1:** Edit `src/platform/mod.rs` + +**Step 2:** Fix any existing `IncomingMessage { ... }` construction sites that now need `attachments: vec![]` (check `src/agent.rs` and `src/platform/telegram.rs`). Grep: `IncomingMessage {` + +**Step 3:** Run `cargo check` + +**Step 4:** Commit: `feat: add Attachment type to IncomingMessage` + +--- + +## Task 3: Multi-Modal ChatMessage + +**Files:** +- Modify: `src/llm.rs` + +Change `ChatMessage.content` from `Option` to `MessageContent` which can be a plain string (for tool result messages) or a vec of content parts (for vision messages). Keep backward-compatible serialization. + +```rust +/// A single part in a multi-modal message +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentPart { + Text { text: String }, + ImageUrl { image_url: ImageUrlContent }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageUrlContent { + /// "data:image/jpeg;base64,..." or a URL + pub url: String, +} + +/// Either a plain text string or a list of content parts (multi-modal) +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + Text(String), + Parts(Vec), +} + +impl MessageContent { + /// Extract all text from the content (for logging, RAG, etc.) + pub fn as_text(&self) -> String { + match self { + Self::Text(s) => s.clone(), + Self::Parts(parts) => parts + .iter() + .filter_map(|p| if let ContentPart::Text { text } = p { Some(text.as_str()) } else { None }) + .collect::>() + .join(" "), + } + } + pub fn from_text(s: impl Into) -> Self { + Self::Text(s.into()) + } +} + +pub struct ChatMessage { + pub role: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, + ... +} +``` + +**Note on backward compat:** All places that currently do `content: Some("...".to_string())` must change to `content: Some(MessageContent::from_text("..."))`. Places that read `.content` as string need `.content.as_ref().map(|c| c.as_text())` or `.content.as_deref()` (removed in favour of as_text). + +**Step 1:** Edit `src/llm.rs` — add types above `ChatMessage`, update `ChatMessage.content` + +**Step 2:** Update all construction/access sites in `llm.rs` and `agent.rs` (search: `.content.as_deref()`, `content: Some(`) + +**Step 3:** Update `src/memory/conversations.rs` if it constructs `ChatMessage` directly (grep: `ChatMessage {`) + +**Step 4:** Update `src/platform/telegram.rs` `IncomingMessage` construction (not ChatMessage, just ensure it compiles) + +**Step 5:** Run `cargo check` — fix all type errors + +**Step 6:** Run `cargo test` + +**Step 7:** Commit: `feat: multi-modal ChatMessage content type` + +--- + +## Task 4: Config — Vision Support + OCR Model Dir + +**Files:** +- Modify: `src/config.rs` +- Modify: `config.example.toml` + +Add to `OpenRouterConfig`: +```rust +#[serde(default)] +pub supports_vision: bool, +``` + +Add `OcrConfig`: +```rust +#[derive(Debug, Deserialize, Clone)] +pub struct OcrConfig { + /// Directory to cache OCR model files (downloaded on first use) + #[serde(default = "default_ocr_model_dir")] + pub model_dir: PathBuf, +} + +fn default_ocr_model_dir() -> PathBuf { + dirs_next::cache_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("ocrs") +} +``` + +Add to `Config`: +```rust +#[serde(default = "default_ocr_config")] +pub ocr: OcrConfig, +``` + +Note: `dirs_next` is not a dependency — use `std::env::var("HOME")` fallback instead: +```rust +fn default_ocr_model_dir() -> PathBuf { + std::env::var("HOME") + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(".")) + .join(".cache/ocrs") +} +``` + +**Step 1:** Edit `src/config.rs` +**Step 2:** Edit `config.example.toml` — document new fields +**Step 3:** Run `cargo check` +**Step 4:** Commit: `feat: add vision support and OCR config` + +--- + +## Task 5: File Processor Module + +**Files:** +- Create: `src/file_processor/mod.rs` +- Modify: `src/main.rs` (add `mod file_processor;`) + +This is the core new module. It exposes: +- `process_attachments(attachments, user_query, config, memory) -> ProcessedAttachments` +- `ProcessedAttachments { text_context: String, image_parts: Vec }` + +### Sub-task 5a: Image processing (vision or OCR) + +```rust +/// Returns a ContentPart::ImageUrl if vision-capable model, or extracted text via OCR. +pub async fn process_image_attachment( + path: &Path, + mime_type: &str, + supports_vision: bool, + ocr_model_dir: &Path, +) -> Result { + if supports_vision { + let bytes = std::fs::read(path)?; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + let data_url = format!("data:{};base64,{}", mime_type, encoded); + Ok(ImageResult::VisionPart(ContentPart::ImageUrl { + image_url: ImageUrlContent { url: data_url } + })) + } else { + let text = ocr_image(path, ocr_model_dir).await?; + Ok(ImageResult::OcrText(text)) + } +} +``` + +OCR using `ocrs`: +```rust +async fn ocr_image(path: &Path, model_dir: &Path) -> Result { + let det_path = model_dir.join("text-detection.rten"); + let rec_path = model_dir.join("text-recognition.rten"); + + // Download models if not cached + ensure_ocr_models(model_dir).await?; + + let detection_model = rten::Model::load_file(&det_path)?; + let recognition_model = rten::Model::load_file(&rec_path)?; + + let engine = ocrs::OcrEngine::new(ocrs::OcrEngineParams { + detection_model: Some(detection_model), + recognition_model: Some(recognition_model), + ..Default::default() + })?; + + let img = image::open(path)?.into_rgb8(); + let img_source = ocrs::ImageSource::from_bytes(img.as_raw(), img.dimensions())?; + let ocr_input = engine.prepare_input(img_source)?; + let text = engine.get_text(&ocr_input)?; + Ok(text) +} + +async fn ensure_ocr_models(model_dir: &Path) -> Result<()> { + std::fs::create_dir_all(model_dir)?; + let det = model_dir.join("text-detection.rten"); + let rec = model_dir.join("text-recognition.rten"); + + const DET_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-detection.rten"; + const REC_URL: &str = "https://ocrs-models.s3.us-east-1.amazonaws.com/text-recognition.rten"; + + if !det.exists() { + download_file(DET_URL, &det).await?; + } + if !rec.exists() { + download_file(REC_URL, &rec).await?; + } + Ok(()) +} +``` + +### Sub-task 5b: PDF processing + +```rust +pub fn extract_pdf_text(path: &Path) -> Result { + let bytes = std::fs::read(path)?; + let text = pdf_extract::extract_text_from_mem(&bytes) + .unwrap_or_default(); + Ok(text) +} +``` + +Note: `pdf-extract` does not expose easy image extraction API. We extract text only from PDFs for now. + +### Sub-task 5c: DOCX processing + +```rust +pub fn extract_docx_text(path: &Path) -> Result { + let bytes = std::fs::read(path)?; + let docx = docx_rs::read_docx(&bytes)?; + let mut text = String::new(); + for child in docx.document.children { + if let docx_rs::DocumentChild::Paragraph(para) = child { + for run in para.children { + if let docx_rs::ParagraphChild::Run(run) = run { + for rc in run.children { + if let docx_rs::RunChild::Text(t) = rc { + text.push_str(&t.text); + } + } + text.push('\n'); + } + } + } + } + Ok(text) +} +``` + +### Sub-task 5d: Long-context chunking + +```rust +const LONG_CONTEXT_THRESHOLD: usize = 6000; +const CHUNK_SIZE: usize = 1000; +const CHUNK_OVERLAP: usize = 100; + +/// If text is long, store as knowledge chunks and RAG-retrieve relevant ones. +/// Returns a context block appropriate for injection. +pub async fn handle_long_context( + text: &str, + filename: &str, + query: &str, + memory: &MemoryStore, +) -> Result { + if text.chars().count() <= LONG_CONTEXT_THRESHOLD { + return Ok(format!("[File: {}]\n{}", filename, text)); + } + + // Chunk and store + let chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP); + for (i, chunk) in chunks.iter().enumerate() { + let key = format!("{}::chunk_{}", filename, i); + memory.remember("document_chunk", &key, chunk, Some(filename)).await?; + } + + // RAG-retrieve relevant chunks + let results = memory.search_knowledge(query, 5).await?; + let context = results.iter() + .map(|e| e.value.as_str()) + .collect::>() + .join("\n\n---\n\n"); + + Ok(format!("[File: {} — relevant sections]\n{}", filename, context)) +} +``` + +**Step 1:** Create `src/file_processor/mod.rs` with all the above + +**Step 2:** Add `mod file_processor;` to `src/main.rs` + +**Step 3:** Run `cargo check` — iterate on type errors + +**Step 4:** Run `cargo test` + +**Step 5:** Commit: `feat: file processor module (image OCR/vision, PDF, DOCX)` + +--- + +## Task 6: Telegram Handler — Download Photos & Documents + +**Files:** +- Modify: `src/platform/telegram.rs` +- Modify: `src/platform/mod.rs` (already done in Task 2) + +In `handle_message`, before the text-only early return, add handling for photo and document: + +```rust +async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseResult<()> { + let user = match msg.from.as_ref() { ... }; + + // Determine text content (may be empty if message is photo/doc only) + let text = msg.text() + .or_else(|| msg.caption()) // use caption for media messages + .unwrap_or("") + .to_string(); + + // Collect attachments + let mut attachments = Vec::new(); + let temp_dir = std::env::temp_dir().join(format!("rustfox_{}", uuid::Uuid::new_v4())); + std::fs::create_dir_all(&temp_dir).ok(); + + // Handle photo + if let Some(photos) = msg.photo() { + if let Some(largest) = photos.last() { + match download_telegram_file(&bot, &largest.file.id, &temp_dir, None).await { + Ok((path, mime)) => attachments.push(Attachment { + kind: AttachmentKind::Image, + path, + mime_type: mime, + file_name: None, + }), + Err(e) => warn!("Failed to download photo: {}", e), + } + } + } + + // Handle document + if let Some(doc) = msg.document() { + let file_name = doc.file_name.clone(); + let kind = classify_document_kind(&doc.mime_type, &file_name); + match download_telegram_file(&bot, &doc.file.id, &temp_dir, file_name.as_deref()).await { + Ok((path, mime)) => attachments.push(Attachment { + kind, + path, + mime_type: mime, + file_name, + }), + Err(e) => warn!("Failed to download document: {}", e), + } + } + + // Skip if nothing to process + if text.is_empty() && attachments.is_empty() { + return Ok(()); + } + + // ... existing command handling and streaming setup ... + + let incoming = IncomingMessage { + platform: "telegram".to_string(), + user_id: user_id.to_string(), + chat_id: msg.chat.id.0.to_string(), + user_name, + text, + attachments, + }; + + // Cleanup temp dir after processing + let process_result = agent.process_message(&incoming, tool_event_tx, Some(stream_token_tx)).await; + std::fs::remove_dir_all(&temp_dir).ok(); + + ... +} + +/// Download a Telegram file to temp_dir. Returns (path, mime_type). +async fn download_telegram_file( + bot: &Bot, + file_id: &str, + temp_dir: &Path, + filename: Option<&str>, +) -> Result<(PathBuf, String)> { + use teloxide::net::Download; + + let file = bot.get_file(file_id).await.context("get_file failed")?; + let ext = Path::new(&file.path).extension() + .and_then(|e| e.to_str()) + .unwrap_or("bin"); + let dest_name = filename.map(String::from) + .unwrap_or_else(|| format!("{}.{}", uuid::Uuid::new_v4(), ext)); + let dest = temp_dir.join(&dest_name); + + let mut bytes: Vec = Vec::new(); + bot.download_file(&file.path, &mut bytes).await.context("download_file failed")?; + std::fs::write(&dest, &bytes)?; + + // Detect MIME + let mime = infer::get(&bytes) + .map(|t| t.mime_type().to_string()) + .unwrap_or_else(|| mime_from_ext(ext)); + + Ok((dest, mime)) +} + +fn classify_document_kind(mime: &Option, filename: &Option) -> AttachmentKind { + let mime_str = mime.as_deref().unwrap_or(""); + let name_str = filename.as_deref().unwrap_or(""); + if mime_str.starts_with("image/") { return AttachmentKind::Image; } + if mime_str == "application/pdf" || name_str.ends_with(".pdf") { return AttachmentKind::Pdf; } + if mime_str.contains("wordprocessingml") || name_str.ends_with(".docx") { return AttachmentKind::Docx; } + AttachmentKind::Other +} + +fn mime_from_ext(ext: &str) -> String { + match ext { + "jpg" | "jpeg" => "image/jpeg", + "png" => "image/png", + "gif" => "image/gif", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + _ => "application/octet-stream", + }.to_string() +} +``` + +**Step 1:** Edit `src/platform/telegram.rs` + +**Step 2:** Run `cargo check` + +**Step 3:** Run `cargo test` + +**Step 4:** Commit: `feat: telegram handler downloads photos and documents` + +--- + +## Task 7: Agent — Process Attachments + +**Files:** +- Modify: `src/agent.rs` + +In `process_message()`, after building the `user_msg`, check for attachments and process them: + +```rust +// Process attachments into text context and/or vision content parts +let (attachment_text, image_parts) = if !incoming.attachments.is_empty() { + crate::file_processor::process_attachments( + &incoming.attachments, + &incoming.text, + &self.config, + &self.memory, + ).await +} else { + (String::new(), vec![]) +}; + +// Build user message: text + attachment context + optional image parts +let user_msg_content = if image_parts.is_empty() { + // Text-only: combine user text + any extracted document text + let mut combined = incoming.text.clone(); + if !attachment_text.is_empty() { + combined.push_str("\n\n"); + combined.push_str(&attachment_text); + } + MessageContent::from_text(combined) +} else { + // Multi-modal: text part + image parts + let mut parts = Vec::new(); + let mut text_content = incoming.text.clone(); + if !attachment_text.is_empty() { + text_content.push_str("\n\n"); + text_content.push_str(&attachment_text); + } + if !text_content.is_empty() { + parts.push(ContentPart::Text { text: text_content }); + } + parts.extend(image_parts); + MessageContent::Parts(parts) +}; + +let user_msg = ChatMessage { + role: "user".to_string(), + content: Some(user_msg_content), + tool_calls: None, + tool_call_id: None, +}; +``` + +Also update the RAG retrieval to use `incoming.text` as the query (unchanged), and the message saved to DB: save with text-only content (strip image parts for DB storage to avoid bloat): + +```rust +// Save text-only version to DB (don't store base64 image data in message history) +let db_msg = ChatMessage { + role: "user".to_string(), + content: Some(MessageContent::from_text({ + let mut t = incoming.text.clone(); + if !attachment_text.is_empty() { + t.push_str("\n\n[Attachment processed]"); + } + t + })), + tool_calls: None, + tool_call_id: None, +}; +self.memory.save_message(&conversation_id, &db_msg).await?; +messages.push(user_msg); // push the full message (with images) to in-memory context only +``` + +**Step 1:** Edit `src/agent.rs` + +**Step 2:** Run `cargo check` + +**Step 3:** Run `cargo test` + +**Step 4:** Commit: `feat: agent processes file attachments` + +--- + +## Task 8: Final Wiring and Tests + +**Step 1:** Run `cargo clippy -- -D warnings` and fix all warnings + +**Step 2:** Run `cargo test` + +**Step 3:** Add unit tests for: +- `classify_document_kind()` in `telegram.rs` +- `chunk_text()` in `file_processor/mod.rs` +- `MessageContent` serialization (text stays as string, parts serialize correctly) + +**Step 4:** Commit: `test: add unit tests for file attachment pipeline` + +--- + +## Notes on OCR Model Download + +`ocrs` requires two `.rten` model files. On first OCR use: +1. If `~/.cache/ocrs/text-detection.rten` doesn't exist → download from S3 +2. Same for `text-recognition.rten` + +This is done by `ensure_ocr_models()` in the file_processor. The download uses `reqwest` (already a dependency). Models are ~100MB total; download is one-time. + +If the bot is deployed without internet access, operators should pre-download models and point `[ocr] model_dir` to their location in config.toml. + +## config.example.toml additions + +```toml +[openrouter] +# ... existing fields ... +# Set to true if your model supports vision (image inputs) +# supports_vision = false + +[ocr] +# Directory where OCR model files are cached (downloaded on first use) +# model_dir = "~/.cache/ocrs" +``` diff --git a/src/agent.rs b/src/agent.rs index c2f209e..0b8e2ec 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -6,7 +6,7 @@ use teloxide::Bot; use crate::config::Config; use crate::langsmith::LangSmithClient; -use crate::llm::{ChatMessage, FunctionDefinition, LlmClient, ToolDefinition}; +use crate::llm::{ChatMessage, ContentPart, FunctionDefinition, LlmClient, MessageContent, ToolDefinition}; use crate::mcp::McpManager; use crate::memory::MemoryStore; use crate::platform::IncomingMessage; @@ -152,7 +152,7 @@ impl Agent { if messages.is_empty() { let system_msg = ChatMessage { role: "system".to_string(), - content: Some(current_system_prompt), + content: Some(MessageContent::from_text(current_system_prompt)), tool_calls: None, tool_call_id: None, }; @@ -165,7 +165,7 @@ impl Agent { // on the very next message without restarting the bot. // Find the system message by role (defensive: don't assume messages[0] is system). if let Some(system_msg) = messages.iter_mut().find(|m| m.role == "system") { - system_msg.content = Some(current_system_prompt); + system_msg.content = Some(MessageContent::from_text(current_system_prompt)); } } @@ -191,24 +191,78 @@ impl Agent { .await { if let Some(system_msg) = messages.iter_mut().find(|m| m.role == "system") { - if let Some(ref mut content) = system_msg.content { - content.push_str("\n\n"); - content.push_str(&rag_block); + if let Some(MessageContent::Text(ref mut s)) = system_msg.content { + s.push_str("\n\n"); + s.push_str(&rag_block); } } } } - // Add user message - let user_msg = ChatMessage { + // Process attachments (images → vision parts or OCR text; PDFs/DOCXs → extracted text) + let (attachment_text, image_parts) = if !incoming.attachments.is_empty() { + crate::file_processor::process_attachments( + &incoming.attachments, + &incoming.text, + &self.config, + &self.memory, + ) + .await + } else { + (String::new(), vec![]) + }; + + // Build user message content + let user_msg_content = if image_parts.is_empty() { + // Text-only path: combine user text with any extracted document text + let mut combined = incoming.text.clone(); + if !attachment_text.is_empty() { + combined.push_str("\n\n"); + combined.push_str(&attachment_text); + } + MessageContent::from_text(combined) + } else { + // Multi-modal path: text part + image content parts + let mut parts: Vec = Vec::new(); + let mut text_content = incoming.text.clone(); + if !attachment_text.is_empty() { + text_content.push_str("\n\n"); + text_content.push_str(&attachment_text); + } + if !text_content.is_empty() { + parts.push(ContentPart::Text { text: text_content }); + } + parts.extend(image_parts); + MessageContent::Parts(parts) + }; + + // Save a text-only version to DB (avoid storing base64 image data in message history) + let db_content = if incoming.attachments.is_empty() { + user_msg_content.clone() + } else { + let mut db_text = incoming.text.clone(); + if !attachment_text.is_empty() { + db_text.push_str("\n\n[Attachment processed]"); + } + MessageContent::from_text(db_text) + }; + let db_msg = ChatMessage { role: "user".to_string(), - content: Some(incoming.text.clone()), + content: Some(db_content), tool_calls: None, tool_call_id: None, }; self.memory - .save_message(&conversation_id, &user_msg) + .save_message(&conversation_id, &db_msg) .await?; + + // Push the full message (with image parts if any) to in-memory context + let user_msg = ChatMessage { + role: "user".to_string(), + content: Some(user_msg_content), + tool_calls: None, + tool_call_id: None, + }; messages.push(user_msg); // Gather all tool definitions @@ -375,7 +429,7 @@ impl Agent { let tool_msg = ChatMessage { role: "tool".to_string(), - content: Some(tool_result), + content: Some(MessageContent::from_text(tool_result)), tool_calls: None, tool_call_id: Some(tool_call.id.clone()), }; @@ -391,7 +445,7 @@ impl Agent { } // Final response — no tool calls - let content = response.content.clone().unwrap_or_default(); + let content = response.content.as_ref().map(|c| c.as_text()).unwrap_or_default(); if content.is_empty() { warn!( @@ -490,6 +544,7 @@ impl Agent { chat_id: cid, user_name: String::new(), text: prompt, + attachments: vec![], }; let req = ScheduledJobRequest { incoming, @@ -1014,13 +1069,13 @@ impl Agent { let mut messages = vec![ ChatMessage { role: "system".to_string(), - content: Some(system_content), + content: Some(MessageContent::from_text(system_content)), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(prompt.to_string()), + content: Some(MessageContent::from_text(prompt)), tool_calls: None, tool_call_id: None, }, @@ -1081,7 +1136,7 @@ impl Agent { messages.push(ChatMessage { role: "tool".to_string(), - content: Some(result), + content: Some(MessageContent::from_text(result)), tool_calls: None, tool_call_id: Some(tool_call.id.clone()), }); @@ -1092,7 +1147,7 @@ impl Agent { } // Final response — no tool calls - return response.content.unwrap_or_default(); + return response.content.map(|c| c.as_text()).unwrap_or_default(); } format!( @@ -1138,7 +1193,7 @@ impl Agent { if let Ok(msgs) = self.memory.search_messages(query, limit).await { for msg in msgs { if let Some(content) = &msg.content { - results.push(format!("[{}]: {}", msg.role, content)); + results.push(format!("[{}]: {}", msg.role, content.as_text())); } } } @@ -1247,6 +1302,7 @@ impl Agent { chat_id: cid, user_name: String::new(), text: prompt, + attachments: vec![], }; let req = ScheduledJobRequest { incoming, diff --git a/src/config.rs b/src/config.rs index 20bef84..aafa49b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -22,6 +22,8 @@ pub struct Config { pub embedding: Option, #[serde(default)] pub langsmith: Option, + #[serde(default = "default_ocr_config")] + pub ocr: OcrConfig, } #[derive(Debug, Deserialize, Clone)] @@ -52,6 +54,19 @@ pub struct OpenRouterConfig { pub max_tokens: u32, #[serde(default = "default_system_prompt")] pub system_prompt: String, + /// Whether the configured model supports vision (image inputs). + /// When true, images are sent as base64-encoded content parts. + /// When false, OCR is used to extract text from images. + #[serde(default)] + pub supports_vision: bool, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct OcrConfig { + /// Directory where OCR model files are cached. + /// Models are downloaded automatically on first OCR use. + #[serde(default = "default_ocr_model_dir")] + pub model_dir: std::path::PathBuf, } #[derive(Debug, Deserialize, Clone)] @@ -244,6 +259,19 @@ fn default_langsmith_base_url() -> String { "https://api.smith.langchain.com".to_string() } +fn default_ocr_model_dir() -> std::path::PathBuf { + std::env::var("HOME") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from(".")) + .join(".cache/ocrs") +} + +fn default_ocr_config() -> OcrConfig { + OcrConfig { + model_dir: default_ocr_model_dir(), + } +} + impl Config { /// Location string from [general], injected into the system prompt. pub fn user_location(&self) -> Option<&str> { @@ -331,4 +359,50 @@ mod tests { let ls = cfg.langsmith.unwrap(); assert_eq!(ls.project, "default"); } + + #[test] + fn test_supports_vision_defaults_false() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(!cfg.openrouter.supports_vision); + } + + #[test] + fn test_supports_vision_parses_true() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + supports_vision = true + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(cfg.openrouter.supports_vision); + } + + #[test] + fn test_ocr_config_default_model_dir() { + let toml = r#" + [telegram] + bot_token = "tok" + allowed_user_ids = [1] + [openrouter] + api_key = "key" + [sandbox] + allowed_directory = "/tmp" + "#; + let cfg: Config = toml::from_str(toml).unwrap(); + assert!(cfg.ocr.model_dir.to_string_lossy().contains("ocrs")); + } } diff --git a/src/file_processor/mod.rs b/src/file_processor/mod.rs new file mode 100644 index 0000000..0529222 --- /dev/null +++ b/src/file_processor/mod.rs @@ -0,0 +1,323 @@ +use anyhow::{Context, Result}; +use base64::Engine as _; +use std::path::Path; + +use crate::config::Config; +use crate::llm::{ContentPart, ImageUrlContent}; +use crate::memory::MemoryStore; +use crate::platform::{Attachment, AttachmentKind}; + +const LONG_CONTEXT_THRESHOLD: usize = 6000; +const CHUNK_SIZE: usize = 1000; +const CHUNK_OVERLAP: usize = 100; + + + +/// Returned by `process_image` to indicate whether we got a vision part or OCR text. +pub enum ImageResult { + VisionPart(ContentPart), + OcrText(String), +} + +/// Process all attachments for a message. +/// - Images: base64 vision part (if supports_vision) OR OCR text (if not) +/// - PDFs: text extraction +/// - DOCXs: text extraction +/// - Long text (>6000 chars): chunked into knowledge store, RAG-retrieved +pub async fn process_attachments( + attachments: &[Attachment], + user_query: &str, + config: &Config, + memory: &MemoryStore, +) -> (String, Vec) { + let mut text_parts: Vec = Vec::new(); + let mut image_parts: Vec = Vec::new(); + + for attachment in attachments { + match attachment.kind { + AttachmentKind::Image => { + match process_image( + &attachment.path, + &attachment.mime_type, + config.openrouter.supports_vision, + &config.ocr.model_dir, + ) + .await + { + Ok(ImageResult::VisionPart(part)) => image_parts.push(part), + Ok(ImageResult::OcrText(text)) => { + let fname = attachment.file_name.as_deref().unwrap_or("image"); + text_parts.push(format!("[Image: {}]\n{}", fname, text)); + } + Err(e) => { + tracing::warn!("Image processing failed: {}", e); + text_parts.push(format!("[Image processing failed: {}]", e)); + } + } + } + AttachmentKind::Pdf => { + let fname = attachment + .file_name + .as_deref() + .unwrap_or("document.pdf"); + match extract_pdf_text(&attachment.path) { + Ok(text) => { + let ctx = + handle_context_length(&text, fname, user_query, memory).await; + text_parts.push(ctx); + } + Err(e) => { + tracing::warn!("PDF extraction failed: {}", e); + text_parts.push(format!("[PDF processing failed: {}]", e)); + } + } + } + AttachmentKind::Docx => { + let fname = attachment + .file_name + .as_deref() + .unwrap_or("document.docx"); + match extract_docx_text(&attachment.path) { + Ok(text) => { + let ctx = + handle_context_length(&text, fname, user_query, memory).await; + text_parts.push(ctx); + } + Err(e) => { + tracing::warn!("DOCX extraction failed: {}", e); + text_parts.push(format!("[DOCX processing failed: {}]", e)); + } + } + } + AttachmentKind::Other => { + tracing::debug!("Skipping unsupported attachment type"); + } + } + } + + (text_parts.join("\n\n"), image_parts) +} + +/// Returns either a vision ContentPart (base64) or extracted OCR text. +async fn process_image( + path: &Path, + mime_type: &str, + supports_vision: bool, + ocr_model_dir: &Path, +) -> Result { + if supports_vision { + let bytes = tokio::fs::read(path).await?; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + let data_url = format!("data:{};base64,{}", mime_type, encoded); + Ok(ImageResult::VisionPart(ContentPart::ImageUrl { + image_url: ImageUrlContent { url: data_url }, + })) + } else { + let text = ocr_image(path, ocr_model_dir).await?; + Ok(ImageResult::OcrText(text)) + } +} + +/// Perform OCR on an image using the ocrs neural-network engine. +/// Downloads model files on first use to `model_dir`. +async fn ocr_image(path: &Path, model_dir: &Path) -> Result { + ensure_ocr_models(model_dir).await?; + + let det_path = model_dir.join("text-detection.rten"); + let rec_path = model_dir.join("text-recognition.rten"); + + let path_owned = path.to_path_buf(); + + tokio::task::spawn_blocking(move || -> Result { + let detection_model = rten::Model::load_file(&det_path) + .context("Failed to load OCR detection model")?; + let recognition_model = rten::Model::load_file(&rec_path) + .context("Failed to load OCR recognition model")?; + + let engine = ocrs::OcrEngine::new(ocrs::OcrEngineParams { + detection_model: Some(detection_model), + recognition_model: Some(recognition_model), + ..Default::default() + })?; + + let img = image::open(&path_owned) + .context("Failed to open image for OCR")? + .into_rgb8(); + let img_source = ocrs::ImageSource::from_bytes(img.as_raw(), img.dimensions())?; + let ocr_input = engine.prepare_input(img_source)?; + let text = engine.get_text(&ocr_input)?; + Ok(text) + }) + .await + .context("OCR task panicked")? +} + +/// Download OCR model files to model_dir if they don't exist. +async fn ensure_ocr_models(model_dir: &Path) -> Result<()> { + tokio::fs::create_dir_all(model_dir).await?; + + let det = model_dir.join("text-detection.rten"); + let rec = model_dir.join("text-recognition.rten"); + + const DET_URL: &str = + "https://ocrs-models.s3.us-east-1.amazonaws.com/text-detection.rten"; + const REC_URL: &str = + "https://ocrs-models.s3.us-east-1.amazonaws.com/text-recognition.rten"; + + if !det.exists() { + tracing::info!("Downloading OCR detection model to {}", det.display()); + download_model(DET_URL, &det).await?; + } + if !rec.exists() { + tracing::info!("Downloading OCR recognition model to {}", rec.display()); + download_model(REC_URL, &rec).await?; + } + Ok(()) +} + +async fn download_model(url: &str, dest: &Path) -> Result<()> { + let response = reqwest::get(url).await.context("Failed to fetch OCR model")?; + let bytes = response + .bytes() + .await + .context("Failed to read OCR model bytes")?; + tokio::fs::write(dest, &bytes) + .await + .context("Failed to write OCR model")?; + tracing::info!("OCR model saved: {} bytes", bytes.len()); + Ok(()) +} + +/// Extract text content from a PDF file. +fn extract_pdf_text(path: &Path) -> Result { + let bytes = std::fs::read(path).context("Failed to read PDF")?; + // unwrap_or_default: malformed PDFs return empty string rather than propagating + let text = pdf_extract::extract_text_from_mem(&bytes).unwrap_or_default(); + Ok(text) +} + +/// Extract text content from a DOCX file. +fn extract_docx_text(path: &Path) -> Result { + let bytes = std::fs::read(path).context("Failed to read DOCX")?; + let docx = docx_rs::read_docx(&bytes) + .map_err(|e| anyhow::anyhow!("Failed to parse DOCX: {:?}", e))?; + + let mut text = String::new(); + for child in docx.document.children { + if let docx_rs::DocumentChild::Paragraph(para) = child { + for run_child in para.children { + if let docx_rs::ParagraphChild::Run(run) = run_child { + for rc in run.children { + if let docx_rs::RunChild::Text(t) = rc { + text.push_str(&t.text); + } + } + } + } + text.push('\n'); + } + } + Ok(text) +} + +/// Chunk text with overlap. +fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec { + let chars: Vec = text.chars().collect(); + let mut chunks = Vec::new(); + let mut start = 0; + while start < chars.len() { + let end = (start + chunk_size).min(chars.len()); + chunks.push(chars[start..end].iter().collect()); + if end == chars.len() { + break; + } + start += chunk_size - overlap; + } + chunks +} + +/// If text is long, store chunks in knowledge store and RAG-retrieve relevant ones. +/// If short, return it directly. +async fn handle_context_length( + text: &str, + filename: &str, + query: &str, + memory: &MemoryStore, +) -> String { + let char_count = text.chars().count(); + if char_count <= LONG_CONTEXT_THRESHOLD { + return format!("[File: {}]\n{}", filename, text); + } + + let chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP); + tracing::info!( + "Document '{}' is {} chars — storing {} chunks in knowledge base", + filename, + char_count, + chunks.len() + ); + + for (i, chunk) in chunks.iter().enumerate() { + let key = format!("{}::chunk_{}", filename, i); + if let Err(e) = memory + .remember("document_chunk", &key, chunk, Some(filename)) + .await + { + tracing::warn!("Failed to store document chunk {}: {}", i, e); + } + } + + match memory.search_knowledge(query, 5).await { + Ok(results) if !results.is_empty() => { + let context = results + .iter() + .map(|e| e.value.as_str()) + .collect::>() + .join("\n\n---\n\n"); + format!("[File: {} — relevant sections]\n{}", filename, context) + } + _ => format!( + "[File: {} — document indexed, but no relevant sections found for this query]", + filename + ), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_chunk_text_short_returns_one_chunk() { + let text = "hello world"; + let chunks = chunk_text(text, 1000, 100); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0], text); + } + + #[test] + fn test_chunk_text_long_splits_with_overlap() { + let text = "a".repeat(2500); + let chunks = chunk_text(&text, 1000, 100); + // chunk 0: [0, 1000) + // chunk 1: [900, 1900) + // chunk 2: [1800, 2500) (last chunk, smaller) + assert_eq!(chunks.len(), 3); + assert_eq!(chunks[0].chars().count(), 1000); + assert_eq!(chunks[1].chars().count(), 1000); + } + + #[test] + fn test_chunk_text_exact_boundary() { + let text = "b".repeat(1000); + let chunks = chunk_text(&text, 1000, 100); + assert_eq!(chunks.len(), 1); + } + + #[test] + fn test_chunk_text_just_over_boundary() { + let text = "b".repeat(1001); + let chunks = chunk_text(&text, 1000, 100); + assert_eq!(chunks.len(), 2); + } +} diff --git a/src/llm.rs b/src/llm.rs index fb8bd5d..aaf274c 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -5,11 +5,71 @@ use tracing::{debug, warn}; use crate::config::OpenRouterConfig; +/// A single part in a multi-modal message +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ContentPart { + Text { text: String }, + ImageUrl { image_url: ImageUrlContent }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageUrlContent { + /// "data:image/jpeg;base64,..." or a URL + pub url: String, +} + +/// Either a plain text string or a list of content parts (multi-modal). +/// Serializes as a plain JSON string for text-only, or as a JSON array for multi-modal. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum MessageContent { + Text(String), + Parts(Vec), +} + +impl MessageContent { + /// Extract all text from the content (for logging, RAG, DB storage, etc.) + pub fn as_text(&self) -> String { + match self { + Self::Text(s) => s.clone(), + Self::Parts(parts) => parts + .iter() + .filter_map(|p| { + if let ContentPart::Text { text } = p { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .join(" "), + } + } + + pub fn from_text(s: impl Into) -> Self { + Self::Text(s.into()) + } + + pub fn is_empty(&self) -> bool { + match self { + Self::Text(s) => s.is_empty(), + Self::Parts(parts) => parts.is_empty(), + } + } +} + +impl Default for MessageContent { + fn default() -> Self { + Self::Text(String::new()) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChatMessage { pub role: String, #[serde(skip_serializing_if = "Option::is_none")] - pub content: Option, + pub content: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -174,7 +234,7 @@ impl LlmClient { tool_call_count = choice.message.tool_calls.as_ref().map_or(0, |t| t.len()), "Received LLM response" ); - if choice.message.content.as_deref().is_none_or(str::is_empty) + if choice.message.content.as_ref().is_none_or(MessageContent::is_empty) && choice.message.tool_calls.as_ref().is_none_or(Vec::is_empty) { warn!( @@ -293,6 +353,39 @@ impl LlmClient { mod tests { use super::*; + #[test] + fn test_message_content_text_serializes_as_string() { + let content = MessageContent::from_text("hello world"); + let json = serde_json::to_string(&content).unwrap(); + assert_eq!(json, r#""hello world""#); + } + + #[test] + fn test_message_content_parts_serializes_as_array() { + let content = MessageContent::Parts(vec![ContentPart::Text { + text: "hello".to_string(), + }]); + let json = serde_json::to_value(&content).unwrap(); + assert!(json.is_array()); + assert_eq!(json[0]["type"], "text"); + assert_eq!(json[0]["text"], "hello"); + } + + #[test] + fn test_message_content_as_text_from_parts() { + let content = MessageContent::Parts(vec![ + ContentPart::Text { + text: "hello".to_string(), + }, + ContentPart::ImageUrl { + image_url: ImageUrlContent { + url: "data:image/png;base64,abc".to_string(), + }, + }, + ]); + assert_eq!(content.as_text(), "hello"); + } + #[test] fn test_chat_request_serializes_model_field() { // Verifies the model string will appear in the JSON POST body diff --git a/src/main.rs b/src/main.rs index 8a96440..1f1b157 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ mod platform; mod scheduler; mod skills; mod tools; +mod file_processor; mod utils; use std::path::PathBuf; diff --git a/src/memory/conversations.rs b/src/memory/conversations.rs index 4bf4669..7a3765e 100644 --- a/src/memory/conversations.rs +++ b/src/memory/conversations.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use uuid::Uuid; use super::MemoryStore; -use crate::llm::ChatMessage; +use crate::llm::{ChatMessage, MessageContent}; /// Cast a &[f32] to &[u8] for SQLite blob storage pub(crate) fn f32_slice_to_bytes(floats: &[f32]) -> &[u8] { @@ -62,7 +62,8 @@ impl MemoryStore { .map(|tc| serde_json::to_string(tc).unwrap_or_default()); // Generate embedding before acquiring the DB lock (async HTTP call) - let embedding = if let Some(content) = &message.content { + let content_text: Option = message.content.as_ref().map(|c| c.as_text()); + let embedding = if let Some(ref content) = content_text { if !content.is_empty() && message.role != "tool" { self.embeddings.try_embed_one(content).await } else { @@ -81,7 +82,7 @@ impl MemoryStore { &id, conversation_id, &message.role, - &message.content, + &content_text, &tool_calls_json, &message.tool_call_id, ], @@ -409,9 +410,10 @@ fn parse_message_row(row: &rusqlite::Row) -> rusqlite::Result { let tool_calls_json: Option = row.get(2)?; let tool_calls = tool_calls_json.and_then(|json| serde_json::from_str(&json).ok()); + let content_str: Option = row.get(1)?; Ok(ChatMessage { role: row.get(0)?, - content: row.get(1)?, + content: content_str.map(MessageContent::Text), tool_calls, tool_call_id: row.get(3)?, }) @@ -420,12 +422,12 @@ fn parse_message_row(row: &rusqlite::Row) -> rusqlite::Result { #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; fn make_msg(role: &str, content: &str) -> ChatMessage { ChatMessage { role: role.to_string(), - content: Some(content.to_string()), + content: Some(MessageContent::from_text(content)), tool_calls: None, tool_call_id: None, } @@ -457,7 +459,7 @@ mod tests { .await .unwrap(); assert_eq!(results.len(), 1); - assert!(results[0].content.as_deref().unwrap().contains("love")); + assert!(results[0].content.as_ref().map(|c| c.as_text()).unwrap().contains("love")); } #[tokio::test] diff --git a/src/memory/query_rewriter.rs b/src/memory/query_rewriter.rs index 8051e29..55ee154 100644 --- a/src/memory/query_rewriter.rs +++ b/src/memory/query_rewriter.rs @@ -1,4 +1,4 @@ -use crate::llm::{ChatMessage, LlmClient}; +use crate::llm::{ChatMessage, LlmClient, MessageContent}; /// Rewrite an ambiguous follow-up question into a self-contained search query. /// Uses the last ≤3 non-system messages as conversation context. @@ -32,16 +32,15 @@ pub async fn rewrite_for_rag( let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some( - "You are a query rewriter. Output only the rewritten query, nothing else." - .to_string(), - ), + content: Some(MessageContent::from_text( + "You are a query rewriter. Output only the rewritten query, nothing else.", + )), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(prompt), + content: Some(MessageContent::from_text(prompt)), tool_calls: None, tool_call_id: None, }, @@ -51,6 +50,7 @@ pub async fn rewrite_for_rag( Ok(response) => { let rewritten = response .content + .map(|c| c.as_text()) .unwrap_or_default() .trim() .lines() @@ -94,7 +94,8 @@ fn format_history(messages: &[ChatMessage]) -> String { .iter() .filter_map(|m| { m.content.as_ref().map(|c| { - let snippet = crate::utils::strings::truncate_chars(c, 200); + let text = c.as_text(); + let snippet = crate::utils::strings::truncate_chars(&text, 200); format!("{}: {}", m.role, snippet) }) }) @@ -105,12 +106,12 @@ fn format_history(messages: &[ChatMessage]) -> String { #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; fn msg(role: &str, text: &str) -> ChatMessage { ChatMessage { role: role.to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } diff --git a/src/memory/rag.rs b/src/memory/rag.rs index c5eca1d..a5eb685 100644 --- a/src/memory/rag.rs +++ b/src/memory/rag.rs @@ -42,7 +42,8 @@ pub async fn auto_retrieve_context( for msg in &results { if let Some(content) = &msg.content { let role = &msg.role; - let snippet = crate::utils::strings::truncate_chars(content, 300); + let text = content.as_text(); + let snippet = crate::utils::strings::truncate_chars(&text, 300); block.push_str(&format!("[{}] {}\n", role, snippet)); } } @@ -59,13 +60,13 @@ pub async fn auto_retrieve_context( #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; use crate::memory::MemoryStore; fn user_msg(text: &str) -> ChatMessage { ChatMessage { role: "user".to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } @@ -165,7 +166,7 @@ mod tests { let msg = crate::llm::ChatMessage { role: "user".to_string(), - content: Some("I prefer TypeScript for frontend work".to_string()), + content: Some(crate::llm::MessageContent::from_text("I prefer TypeScript for frontend work")), tool_calls: None, tool_call_id: None, }; diff --git a/src/memory/summarizer.rs b/src/memory/summarizer.rs index a19f3a1..350e1c0 100644 --- a/src/memory/summarizer.rs +++ b/src/memory/summarizer.rs @@ -1,7 +1,7 @@ use anyhow::Result; use tracing::{info, warn}; -use crate::llm::{ChatMessage, LlmClient}; +use crate::llm::{ChatMessage, LlmClient, MessageContent}; use super::MemoryStore; @@ -44,23 +44,22 @@ pub async fn summarize_conversation( let messages = vec![ ChatMessage { role: "system".to_string(), - content: Some( - "You produce concise, factual conversation summaries. Output only bullet points." - .to_string(), - ), + content: Some(MessageContent::from_text( + "You produce concise, factual conversation summaries. Output only bullet points.", + )), tool_calls: None, tool_call_id: None, }, ChatMessage { role: "user".to_string(), - content: Some(summarization_prompt), + content: Some(MessageContent::from_text(summarization_prompt)), tool_calls: None, tool_call_id: None, }, ]; let response = llm.chat(&messages, &[]).await?; - let summary_text = response.content.unwrap_or_default(); + let summary_text = response.content.map(|c| c.as_text()).unwrap_or_default(); if summary_text.trim().is_empty() { warn!(conversation_id = %conversation_id, "LLM returned empty summary — skipping"); @@ -69,7 +68,7 @@ pub async fn summarize_conversation( let summary_msg = ChatMessage { role: "system".to_string(), - content: Some(format!("[SUMMARY]\n{}", summary_text.trim())), + content: Some(MessageContent::from_text(format!("[SUMMARY]\n{}", summary_text.trim()))), tool_calls: None, tool_call_id: None, }; @@ -115,13 +114,13 @@ pub async fn summarize_all_active( #[cfg(test)] mod tests { use super::*; - use crate::llm::ChatMessage; + use crate::llm::{ChatMessage, MessageContent}; use crate::memory::MemoryStore; fn user_msg(text: &str) -> ChatMessage { ChatMessage { role: "user".to_string(), - content: Some(text.to_string()), + content: Some(MessageContent::from_text(text)), tool_calls: None, tool_call_id: None, } diff --git a/src/platform/mod.rs b/src/platform/mod.rs index a97d93e..c48c9a2 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -1,6 +1,26 @@ pub mod telegram; pub mod tool_notifier; +/// What kind of attachment was received +#[derive(Debug, Clone, PartialEq)] +pub enum AttachmentKind { + Image, + Pdf, + Docx, + Other, +} + +/// A file attachment received from a platform +#[derive(Debug, Clone)] +pub struct Attachment { + pub kind: AttachmentKind, + /// Absolute path to the downloaded temp file + pub path: std::path::PathBuf, + pub mime_type: String, + /// Original filename, if known + pub file_name: Option, +} + /// A message received from any platform #[derive(Debug, Clone)] #[allow(dead_code)] @@ -15,4 +35,6 @@ pub struct IncomingMessage { pub user_name: String, /// The message text pub text: String, + /// Attached files, if any + pub attachments: Vec, } diff --git a/src/platform/telegram.rs b/src/platform/telegram.rs index 640f8a2..647ccab 100644 --- a/src/platform/telegram.rs +++ b/src/platform/telegram.rs @@ -1,11 +1,13 @@ +use std::path::{Path, PathBuf}; use std::sync::Arc; -use anyhow::Result; +use anyhow::{Context, Result}; +use teloxide::net::Download; use teloxide::prelude::*; use tracing::{error, info, warn}; use crate::agent::Agent; -use crate::platform::IncomingMessage; +use crate::platform::{Attachment, AttachmentKind, IncomingMessage}; /// Split long messages for Telegram's 4096 char limit #[cfg(test)] @@ -85,16 +87,63 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe }; let user_id = user.id.0; - let text = match msg.text() { - Some(t) => t.to_string(), - None => return Ok(()), - }; - let user_name = user.first_name.clone(); + // For media messages, use caption as text; for text messages, use msg.text() + let text = msg.text().or_else(|| msg.caption()).unwrap_or("").to_string(); + + // Temp dir for file downloads — created lazily by download_telegram_file + let temp_dir = std::env::temp_dir().join(format!("rustfox_{}", uuid::Uuid::new_v4())); + + let mut attachments: Vec = Vec::new(); + + // Handle photo attachments — last PhotoSize is the highest resolution + if let Some(photos) = msg.photo() { + if let Some(largest) = photos.last() { + let file_id = largest.file.id.to_string(); + match download_telegram_file(&bot, &file_id, &temp_dir, None).await { + Ok((path, mime)) => { + attachments.push(Attachment { + kind: AttachmentKind::Image, + path, + mime_type: mime, + file_name: None, + }); + } + Err(e) => warn!("Failed to download photo: {:#}", e), + } + } + } + + // Handle document attachments + if let Some(doc) = msg.document() { + let file_id = doc.file.id.to_string(); + let file_name = doc.file_name.clone(); + match download_telegram_file(&bot, &file_id, &temp_dir, file_name.as_deref()).await { + Ok((path, mime)) => { + let kind = classify_attachment_kind(&mime, file_name.as_deref()); + attachments.push(Attachment { + kind, + path, + mime_type: mime, + file_name, + }); + } + Err(e) => warn!("Failed to download document: {:#}", e), + } + } + + // Skip if there is nothing to process + if text.is_empty() && attachments.is_empty() { + return Ok(()); + } + info!( - "Telegram message from {} ({}): {}", - user_name, user_id, text + "Telegram message from {} ({}): {} [attachments: {}]", + user_name, + user_id, + if text.is_empty() { "(no text)" } else { &text }, + attachments.len() ); // Handle commands @@ -291,6 +340,7 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe chat_id: msg.chat.id.0.to_string(), user_name, text, + attachments, }; // Process through agent — moves stream_token_tx and tool_event_tx @@ -314,6 +364,11 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe // Wait for stream receiver to complete its final edit stream_handle.await.ok(); + // Cleanup temp dir used for file downloads (async to avoid blocking the executor) + if temp_dir.exists() { + tokio::fs::remove_dir_all(&temp_dir).await.ok(); + } + if let Err(e) = process_result { warn!(error = %e, "Agent processing failed"); bot.send_message(msg.chat.id, format!("Error: {:#}", e)) @@ -324,6 +379,88 @@ async fn handle_message(bot: Bot, msg: Message, agent: Arc) -> ResponseRe Ok(()) } +/// Download a Telegram file to the given directory, creating it if needed. +/// Returns (local_path, detected_mime_type). +async fn download_telegram_file( + bot: &Bot, + file_id: &str, + dest_dir: &Path, + filename: Option<&str>, +) -> Result<(PathBuf, String)> { + std::fs::create_dir_all(dest_dir).context("Failed to create temp directory")?; + + let file = bot + .get_file(file_id.to_string().into()) + .await + .context("Failed to get file info from Telegram")?; + + let ext = Path::new(&file.path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("bin"); + + let dest_name = match filename { + Some(n) => n.to_string(), + None => format!("{}.{}", uuid::Uuid::new_v4(), ext), + }; + let dest_path = dest_dir.join(&dest_name); + + let mut bytes: Vec = Vec::new(); + bot.download_file(&file.path, &mut bytes) + .await + .context("Failed to download file from Telegram")?; + + std::fs::write(&dest_path, &bytes).context("Failed to write downloaded file")?; + + let mime = infer::get(&bytes) + .map(|t| t.mime_type().to_string()) + .unwrap_or_else(|| mime_from_extension(ext).to_string()); + + Ok((dest_path, mime)) +} + +/// Classify an attachment based on MIME type and filename extension fallback. +fn classify_attachment_kind(mime_type: &str, file_name: Option<&str>) -> AttachmentKind { + if mime_type.starts_with("image/") { + return AttachmentKind::Image; + } + if mime_type == "application/pdf" { + return AttachmentKind::Pdf; + } + if mime_type.contains("wordprocessingml") || mime_type == "application/msword" { + return AttachmentKind::Docx; + } + // Fallback: check extension + let name = file_name.unwrap_or(""); + if name.ends_with(".pdf") { + return AttachmentKind::Pdf; + } + if name.ends_with(".docx") || name.ends_with(".doc") { + return AttachmentKind::Docx; + } + if name.ends_with(".jpg") + || name.ends_with(".jpeg") + || name.ends_with(".png") + || name.ends_with(".gif") + || name.ends_with(".webp") + { + return AttachmentKind::Image; + } + AttachmentKind::Other +} + +fn mime_from_extension(ext: &str) -> &'static str { + match ext { + "jpg" | "jpeg" => "image/jpeg", + "png" => "image/png", + "gif" => "image/gif", + "webp" => "image/webp", + "pdf" => "application/pdf", + "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + _ => "application/octet-stream", + } +} + #[cfg(test)] mod tests { use super::*; @@ -381,4 +518,49 @@ mod tests { "Zero-width-space placeholder must be removed from stream_handle" ); } + + #[test] + fn test_classify_attachment_kind_image_jpeg() { + assert_eq!(classify_attachment_kind("image/jpeg", None), AttachmentKind::Image); + } + + #[test] + fn test_classify_attachment_kind_pdf() { + assert_eq!(classify_attachment_kind("application/pdf", None), AttachmentKind::Pdf); + } + + #[test] + fn test_classify_attachment_kind_docx() { + assert_eq!( + classify_attachment_kind( + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + None + ), + AttachmentKind::Docx + ); + } + + #[test] + fn test_classify_attachment_kind_fallback_to_extension() { + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("report.pdf")), + AttachmentKind::Pdf + ); + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("letter.docx")), + AttachmentKind::Docx + ); + assert_eq!( + classify_attachment_kind("application/octet-stream", Some("photo.jpg")), + AttachmentKind::Image + ); + } + + #[test] + fn test_classify_attachment_kind_unknown() { + assert_eq!( + classify_attachment_kind("application/zip", Some("archive.zip")), + AttachmentKind::Other + ); + } }