From 19879454137a029ab6ca32f8bf3050888d9589a5 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 9 Feb 2026 23:16:19 +0100 Subject: [PATCH 01/25] feat(hf): support writing and reading from both http and xet --- core/Cargo.lock | 1497 ++++++++++++++++++++++++++++--- core/Cargo.toml | 2 + core/services/hf/Cargo.toml | 29 + core/services/hf/src/backend.rs | 239 +++-- core/services/hf/src/config.rs | 150 +--- core/services/hf/src/core.rs | 807 +++++++++-------- core/services/hf/src/docs.md | 7 +- core/services/hf/src/lib.rs | 3 + core/services/hf/src/lister.rs | 166 +--- core/services/hf/src/reader.rs | 298 ++++++ core/services/hf/src/uri.rs | 522 +++++++++++ core/services/hf/src/writer.rs | 348 +++++++ 12 files changed, 3202 insertions(+), 866 deletions(-) create mode 100644 core/services/hf/src/reader.rs create mode 100644 core/services/hf/src/uri.rs create mode 100644 core/services/hf/src/writer.rs diff --git a/core/Cargo.lock b/core/Cargo.lock index b4359c518160..fab12d63dcf0 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -27,7 +27,7 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cipher", "cpufeatures", ] @@ -49,7 +49,7 @@ version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "getrandom 0.3.4", "once_cell", "version_check", @@ -411,13 +411,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" dependencies = [ "autocfg", - "cfg-if", + "cfg-if 1.0.4", "concurrent-queue", "futures-io", "futures-lite", "parking", "polling", - "rustix 1.1.2", + "rustix 1.1.3", "slab", "windows-sys 0.61.2", ] @@ -610,7 +610,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70af449c9a763cb655c6a1e5338b42d99c67190824ff90658c1e30be844c0775" dependencies = [ "awaitable-error", - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -1085,10 +1085,13 @@ checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ "axum-core 0.5.5", "bytes", + "form_urlencoded", "futures-util", "http 1.4.0", "http-body 1.0.1", "http-body-util", + "hyper 1.8.1", + "hyper-util", "itoa", "matchit 0.8.4", "memchr", @@ -1096,10 +1099,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", "sync_wrapper 1.0.2", + "tokio", "tower 0.5.2", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -1135,6 +1143,7 @@ dependencies = [ "sync_wrapper 1.0.2", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -1305,7 +1314,7 @@ dependencies = [ "arrayref", "arrayvec", "cc", - "cfg-if", + "cfg-if 1.0.4", "constant_time_eq", ] @@ -1403,6 +1412,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", + "regex-automata", "serde", ] @@ -1539,6 +1549,90 @@ dependencies = [ "serde_json", ] +[[package]] +name = "cas_client" +version = "0.14.5" +dependencies = [ + "anyhow", + "async-trait", + "axum 0.8.7", + "base64 0.22.1", + "bytes", + "cas_object", + "cas_types", + "chrono", + "clap", + "deduplication", + "error_printer", + "file_utils", + "futures", + "futures-util", + "heed", + "http 1.4.0", + "hyper 1.8.1", + "lazy_static", + "mdb_shard", + "merklehash", + "more-asserts", + "progress_tracking", + "rand 0.9.2", + "reqwest 0.13.2", + "reqwest-middleware", + "reqwest-retry", + "serde", + "serde_json", + "statrs", + "tempfile", + "thiserror 2.0.17", + "tokio", + "tokio-retry", + "tower-http", + "tracing", + "tracing-log", + "tracing-subscriber", + "url", + "utils", + "warp", + "web-time", + "xet_runtime", +] + +[[package]] +name = "cas_object" +version = "0.1.0" +dependencies = [ + "anyhow", + "blake3", + "bytes", + "clap", + "countio", + "csv", + "deduplication", + "futures", + "half", + "lz4_flex", + "mdb_shard", + "merklehash", + "more-asserts", + "rand 0.9.2", + "serde", + "thiserror 2.0.17", + "tokio", + "tracing", + "utils", + "xet_runtime", +] + +[[package]] +name = "cas_types" +version = "0.1.0" +dependencies = [ + "merklehash", + "serde", + "serde_repr", + "thiserror 2.0.17", +] + [[package]] name = "cast" version = "0.3.0" @@ -1632,6 +1726,12 @@ dependencies = [ "unicode-security", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cexpr" version = "0.6.0" @@ -1641,6 +1741,12 @@ dependencies = [ "nom", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.4" @@ -1856,7 +1962,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff5c12800e82a01d12046ccc29b014e1cbbb2fbe38c52534e0d40d4fc58881d5" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cfg_aliases", "compio-buf", "compio-log", @@ -1880,7 +1986,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c568022f90c2e2e8ea7ff4c4e8fde500753b5b9b6b6d870e25b5e656f9ea2892" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cfg_aliases", "compio-buf", "compio-driver", @@ -1918,7 +2024,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bffab78b8a876111ca76450912ca6a5a164b0dd93973e342c5f438a6f478c735" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "compio-buf", "compio-driver", "compio-io", @@ -1938,7 +2044,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fd890a129a8086af857bbe18401689c130aa6ccfc7f3c029a7800f7256af3e" dependencies = [ "async-task", - "cfg-if", + "cfg-if 1.0.4", "compio-buf", "compio-driver", "compio-log", @@ -2006,6 +2112,21 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f12cc9948ed9604230cdddc7c86e270f9401ccbe3c2e98a4378c5e7632212f" + +[[package]] +name = "const_panic" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" +dependencies = [ + "typewit", +] + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -2058,6 +2179,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "countio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9702aee5d1d744c01d82f6915644f950f898e014903385464c773b96fefdecb" +dependencies = [ + "futures-io", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -2116,7 +2246,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -2263,6 +2393,27 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + [[package]] name = "ctor" version = "0.6.3" @@ -2399,7 +2550,7 @@ version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "hashbrown 0.14.5", "lock_api", "once_cell", @@ -2412,7 +2563,7 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crossbeam-utils", "hashbrown 0.14.5", "lock_api", @@ -2420,12 +2571,66 @@ dependencies = [ "parking_lot_core 0.9.12", ] +[[package]] +name = "data" +version = "0.14.5" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "cas_client", + "cas_object", + "cas_types", + "chrono", + "clap", + "deduplication", + "error_printer", + "file_reconstruction", + "futures", + "hub_client", + "lazy_static", + "mdb_shard", + "merklehash", + "more-asserts", + "progress_tracking", + "prometheus 0.14.0", + "rand 0.9.2", + "regex", + "serde", + "serde_json", + "sha2", + "tempfile", + "thiserror 2.0.17", + "tokio", + "tracing", + "ulid", + "utils", + "walkdir", + "xet_runtime", +] + [[package]] name = "data-encoding" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "deduplication" +version = "0.14.5" +dependencies = [ + "async-trait", + "bytes", + "gearhash", + "lazy_static", + "mdb_shard", + "merklehash", + "more-asserts", + "progress_tracking", + "utils", + "xet_runtime", +] + [[package]] name = "der" version = "0.6.1" @@ -2457,6 +2662,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "derive-new" version = "0.5.9" @@ -2610,16 +2826,37 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.2", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -2627,7 +2864,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users", + "redox_users 0.4.6", "winapi", ] @@ -2648,7 +2885,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a405457ec78b8fe08b0e32b4a3570ab5dff6dd16eb9e76a5ee0a9d9cbd898933" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "clap", "condtype", "divan-macros", @@ -2692,7 +2929,7 @@ version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "socket2 0.6.1", "windows-sys 0.60.2", @@ -2710,6 +2947,15 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +[[package]] +name = "doxygen-rs" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" +dependencies = [ + "phf", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -2746,6 +2992,20 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "duration-str" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12494809f9915b6132014cc259c4e204ab53ab6c6dd2225672703b5359267d82" +dependencies = [ + "chrono", + "rust_decimal", + "serde", + "thiserror 2.0.17", + "time", + "winnow", +] + [[package]] name = "dyn-clone" version = "1.0.20" @@ -2853,7 +3113,7 @@ version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -2910,6 +3170,13 @@ dependencies = [ "version_check", ] +[[package]] +name = "error_printer" +version = "0.14.5" +dependencies = [ + "tracing", +] + [[package]] name = "escape8259" version = "0.5.3" @@ -2940,7 +3207,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "home", "windows-sys 0.48.0", ] @@ -3000,7 +3267,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ad55eb18b0326516b75e7ff961ad85033154e82265e7819f77acf38a500acb" dependencies = [ "anyhow", - "cfg-if", + "cfg-if 1.0.4", "foldhash 0.2.0", "itoa", "parking_lot 0.12.5", @@ -3075,6 +3342,38 @@ dependencies = [ "subtle", ] +[[package]] +name = "file_reconstruction" +version = "0.14.5" +dependencies = [ + "async-trait", + "bytes", + "cas_client", + "cas_types", + "merklehash", + "more-asserts", + "progress_tracking", + "thiserror 2.0.17", + "tokio", + "tracing", + "utils", + "xet_config", + "xet_runtime", +] + +[[package]] +name = "file_utils" +version = "0.14.2" +dependencies = [ + "colored", + "lazy_static", + "libc", + "rand 0.9.2", + "tracing", + "whoami 2.1.1", + "winapi", +] + [[package]] name = "find-msvc-tools" version = "0.1.5" @@ -3264,7 +3563,7 @@ checksum = "9db9c0e4648b13e9216d785b308d43751ca975301aeb83e607ec630b6f956944" dependencies = [ "bincode", "bytes", - "cfg-if", + "cfg-if 1.0.4", "itertools 0.14.0", "madsim-tokio", "mixtrics", @@ -3359,7 +3658,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4" dependencies = [ - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.59.0", ] @@ -3567,6 +3866,15 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "312d2295c7302019c395cfb90dacd00a82a2eabd700429bba9c7a3f38dbbe11b" +[[package]] +name = "gearhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8cf82cf76cd16485e56295a1377c775ce708c9f1a0be6b029076d60a245d213" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "generator" version = "0.7.5" @@ -3657,7 +3965,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -3668,7 +3976,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", @@ -3681,7 +3989,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", "r-efi", @@ -3689,6 +3997,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if 1.0.4", + "js-sys", + "libc", + "r-efi", + "wasip2", + "wasip3", + "wasm-bindgen", +] + [[package]] name = "ghac" version = "0.2.0" @@ -3735,7 +4058,7 @@ version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9efcab3c1958580ff1f25a2a41be1668f7603d849bb63af523b208a3cc1223b8" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "dashmap 6.1.0", "futures-sink", "futures-timer", @@ -3807,7 +4130,7 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crunchy", "zerocopy", ] @@ -3932,7 +4255,7 @@ dependencies = [ "tokio", "url", "uuid", - "whoami", + "whoami 1.6.1", ] [[package]] @@ -3959,6 +4282,36 @@ dependencies = [ "log", ] +[[package]] +name = "headers" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3314d5adb5d94bcdf56771f2e50dbbc80bb4bdf88967526706205ac9eff24eb" +dependencies = [ + "base64 0.22.1", + "bytes", + "headers-core", + "http 1.4.0", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" +dependencies = [ + "http 1.4.0", +] + +[[package]] +name = "heapify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0049b265b7f201ca9ab25475b22b47fe444060126a51abe00f77d986fc5cc52e" + [[package]] name = "heapless" version = "0.6.1" @@ -4001,13 +4354,51 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "hermit-abi" -version = "0.5.2" +name = "heed" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393" +dependencies = [ + "bitflags 2.10.0", + "byteorder", + "heed-traits", + "heed-types", + "libc", + "lmdb-master-sys", + "once_cell", + "page_size", + "serde", + "synchronoise", + "url", +] [[package]] -name = "hex" +name = "heed-traits" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" + +[[package]] +name = "heed-types" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" +dependencies = [ + "bincode", + "byteorder", + "heed-traits", + "serde", + "serde_json", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" @@ -4019,7 +4410,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" dependencies = [ "async-trait", - "cfg-if", + "cfg-if 1.0.4", "data-encoding", "enum-as-inner", "futures-channel", @@ -4043,7 +4434,7 @@ version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "futures-util", "hickory-proto", "ipconfig", @@ -4178,6 +4569,20 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hub_client" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "cas_client", + "reqwest 0.13.2", + "reqwest-middleware", + "serde", + "thiserror 2.0.17", + "urlencoding", +] + [[package]] name = "humantime" version = "2.3.0" @@ -4437,6 +4842,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -4527,7 +4938,7 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -4547,7 +4958,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd7bddefd0a8833b88a4b68f90dae22c7450d11b354198baee3874fd811b344" dependencies = [ "bitflags 2.10.0", - "cfg-if", + "cfg-if 1.0.4", "libc", ] @@ -4559,7 +4970,7 @@ checksum = "bbe9ac631d954bb17eee5c932bd71bce3d5726c949c27729dd5c946b0de65471" dependencies = [ "bytes", "io-uring 0.7.11", - "rustix 1.1.2", + "rustix 1.1.3", ] [[package]] @@ -4699,6 +5110,28 @@ dependencies = [ "jiff-tzdb", ] +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if 1.0.4", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "jobserver" version = "0.1.34" @@ -4711,9 +5144,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -4734,6 +5167,23 @@ dependencies = [ "simple_asn1", ] +[[package]] +name = "konst" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f660d5f887e3562f9ab6f4a14988795b694099d66b4f5dedc02d197ba9becb1d" +dependencies = [ + "const_panic", + "konst_proc_macros", + "typewit", +] + +[[package]] +name = "konst_proc_macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a" + [[package]] name = "kv-log-macro" version = "1.0.7" @@ -4812,6 +5262,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lexicmp" version = "0.1.0" @@ -4823,9 +5279,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.178" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libfuzzer-sys" @@ -4843,7 +5299,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "windows-link", ] @@ -4853,7 +5309,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "windows-link", ] @@ -4865,13 +5321,13 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags 2.10.0", "libc", - "redox_syscall 0.5.18", + "redox_syscall 0.7.1", ] [[package]] @@ -4958,6 +5414,17 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "lmdb-master-sys" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df" +dependencies = [ + "cc", + "doxygen-rs", + "libc", +] + [[package]] name = "lock_api" version = "0.4.14" @@ -5050,7 +5517,7 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "generator", "scoped-tls", "tracing", @@ -5091,6 +5558,15 @@ dependencies = [ "libc", ] +[[package]] +name = "lz4_flex" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +dependencies = [ + "twox-hash", +] + [[package]] name = "mac" version = "0.1.1" @@ -5265,10 +5741,39 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "digest", ] +[[package]] +name = "mdb_shard" +version = "0.14.5" +dependencies = [ + "anyhow", + "async-trait", + "blake3", + "bytes", + "clap", + "futures", + "futures-util", + "heapify", + "itertools 0.14.0", + "lazy_static", + "merklehash", + "more-asserts", + "rand 0.9.2", + "regex", + "serde", + "static_assertions", + "tempfile", + "thiserror 2.0.17", + "tokio", + "tracing", + "utils", + "uuid", + "xet_runtime", +] + [[package]] name = "mea" version = "0.5.3" @@ -5320,6 +5825,20 @@ dependencies = [ "autocfg", ] +[[package]] +name = "merklehash" +version = "0.14.5" +dependencies = [ + "base64 0.22.1", + "blake3", + "bytemuck", + "getrandom 0.4.1", + "heed", + "rand 0.9.2", + "safe-transmute", + "serde", +] + [[package]] name = "metrics" version = "0.24.3" @@ -5563,6 +6082,12 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + [[package]] name = "multer" version = "3.1.0" @@ -5592,6 +6117,23 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "034a0ad7deebf0c2abcf2435950a6666c3c15ea9d8fad0c0f48efa8a7f843fed" +[[package]] +name = "nalgebra" +version = "0.33.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26aecdf64b707efd1310e3544d709c5c0ac61c13756046aaaba41be5c4f66a3b" +dependencies = [ + "approx 0.5.1", + "matrixmultiply", + "num-complex", + "num-rational", + "num-traits", + "rand 0.8.5", + "rand_distr", + "simba", + "typenum", +] + [[package]] name = "nanoid" version = "0.4.0" @@ -5678,7 +6220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" dependencies = [ "bitflags 1.3.2", - "cfg-if", + "cfg-if 1.0.4", "libc", "memoffset 0.7.1", "pin-utils", @@ -5799,6 +6341,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -5819,6 +6372,24 @@ dependencies = [ "libc", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.10.0", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.32.2" @@ -5868,6 +6439,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "oorandom" version = "11.1.5" @@ -5976,7 +6553,7 @@ dependencies = [ "opendal-service-yandex-disk", "opendal-testkit", "rand 0.8.5", - "reqwest", + "reqwest 0.12.24", "sha2", "size", "tokio", @@ -6029,7 +6606,7 @@ dependencies = [ "pretty_assertions", "quick-xml", "rand 0.8.5", - "reqwest", + "reqwest 0.12.24", "serde", "serde_json", "sha2", @@ -6476,7 +7053,7 @@ dependencies = [ "opendal-core", "quick-xml", "reqsign", - "reqwest", + "reqwest 0.12.24", "serde", "tokio", ] @@ -6593,7 +7170,7 @@ dependencies = [ "percent-encoding", "quick-xml", "reqsign", - "reqwest", + "reqwest 0.12.24", "serde", "serde_json", "tokio", @@ -6686,15 +7263,23 @@ dependencies = [ name = "opendal-service-hf" version = "0.55.0" dependencies = [ + "async-trait", + "base64 0.22.1", "bytes", + "cas_types", + "data", "futures", "http 1.4.0", "log", "opendal-core", "percent-encoding", + "reqwest 0.12.24", "serde", "serde_json", + "sha2", + "tempfile", "tokio", + "utils", ] [[package]] @@ -6971,7 +7556,7 @@ dependencies = [ "reqsign-core", "reqsign-file-read-tokio", "reqsign-http-send-reqwest", - "reqwest", + "reqwest 0.12.24", "serde", "serde_json", "tokio", @@ -7270,7 +7855,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ "bitflags 2.10.0", - "cfg-if", + "cfg-if 1.0.4", "foreign-types", "libc", "once_cell", @@ -7337,7 +7922,7 @@ dependencies = [ "bytes", "http 1.4.0", "opentelemetry", - "reqwest", + "reqwest 0.12.24", ] [[package]] @@ -7352,7 +7937,7 @@ dependencies = [ "opentelemetry-proto", "opentelemetry_sdk", "prost 0.14.1", - "reqwest", + "reqwest 0.12.24", "thiserror 2.0.17", "tokio", "tonic 0.14.2", @@ -7389,6 +7974,12 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -7418,6 +8009,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +dependencies = [ + "memchr", +] + [[package]] name = "outref" version = "0.5.2" @@ -7435,6 +8035,16 @@ dependencies = [ "sha2", ] +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "panic-message" version = "0.3.0" @@ -7474,7 +8084,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "instant", "libc", "redox_syscall 0.2.16", @@ -7488,7 +8098,7 @@ version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "redox_syscall 0.5.18", "smallvec", @@ -7814,11 +8424,11 @@ version = "3.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -7955,13 +8565,24 @@ dependencies = [ "hex", ] +[[package]] +name = "progress_tracking" +version = "0.1.0" +dependencies = [ + "async-trait", + "merklehash", + "more-asserts", + "tokio", + "utils", +] + [[package]] name = "prometheus" version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "fnv", "lazy_static", "memchr", @@ -7975,7 +8596,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "fnv", "lazy_static", "libc", @@ -8259,6 +8880,7 @@ version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -8411,6 +9033,16 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -8489,7 +9121,7 @@ dependencies = [ "arcstr", "backon", "bytes", - "cfg-if", + "cfg-if 1.0.4", "combine", "crc16", "futures-channel", @@ -8532,6 +9164,15 @@ dependencies = [ "bitflags 2.10.0", ] +[[package]] +name = "redox_syscall" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "redox_users" version = "0.4.6" @@ -8543,6 +9184,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.17", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -8569,9 +9221,9 @@ version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23bbed272e39c47a095a5242218a67412a220006842558b03fe2935e8f3d7b92" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", - "rustix 1.1.2", + "rustix 1.1.3", "windows 0.62.2", ] @@ -8640,7 +9292,7 @@ dependencies = [ "once_cell", "percent-encoding", "rand 0.8.5", - "reqwest", + "reqwest 0.12.24", "rsa", "serde", "serde_json", @@ -8734,7 +9386,7 @@ dependencies = [ "http 1.4.0", "http-body-util", "reqsign-core", - "reqwest", + "reqwest 0.12.24", "wasm-bindgen-futures", ] @@ -8782,29 +9434,113 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", + "wasm-streams 0.4.2", "web-sys", "webpki-roots 1.0.4", ] [[package]] -name = "resolv-conf" -version = "0.7.6" +name = "reqwest" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.35", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "sync_wrapper 1.0.2", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tower 0.5.2", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams 0.5.0", + "web-sys", +] [[package]] -name = "revision" -version = "0.11.0" +name = "reqwest-middleware" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b8ee532f15b2f0811eb1a50adf10d036e14a6cdae8d99893e7f3b921cb227d" +checksum = "199dda04a536b532d0cc04d7979e39b1c763ea749bf91507017069c00b96056f" dependencies = [ - "chrono", - "geo", - "regex", - "revision-derive", - "roaring", - "rust_decimal", + "anyhow", + "async-trait", + "http 1.4.0", + "reqwest 0.13.2", + "thiserror 2.0.17", + "tower-service", +] + +[[package]] +name = "reqwest-retry" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe2412db2af7d2268e7a5406be0431f37d9eb67ff390f35b395716f5f06c2eaa" +dependencies = [ + "anyhow", + "async-trait", + "futures", + "getrandom 0.2.16", + "http 1.4.0", + "hyper 1.8.1", + "reqwest 0.13.2", + "reqwest-middleware", + "retry-policies", + "thiserror 2.0.17", + "tokio", + "tracing", + "wasmtimer 0.4.3", +] + +[[package]] +name = "resolv-conf" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" + +[[package]] +name = "retry-policies" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a4bd6027df676bcb752d3724db0ea3c0c5fc1dd0376fec51ac7dcaf9cc69be" +dependencies = [ + "rand 0.9.2", +] + +[[package]] +name = "revision" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b8ee532f15b2f0811eb1a50adf10d036e14a6cdae8d99893e7f3b921cb227d" +dependencies = [ + "chrono", + "geo", + "regex", + "revision-derive", + "roaring", + "rust_decimal", "uuid", ] @@ -8837,7 +9573,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "getrandom 0.2.16", "libc", "untrusted", @@ -9024,7 +9760,7 @@ version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "ordered-multimap", ] @@ -9109,9 +9845,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags 2.10.0", "errno", @@ -9191,6 +9927,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls 0.23.35", + "rustls-native-certs 0.8.3", + "rustls-platform-verifier-android", + "rustls-webpki 0.103.8", + "security-framework 3.5.1", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -9225,6 +9988,21 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "safe-transmute" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944826ff8fa8093089aba3acb4ef44b9446a99a16f3bf4e74af3f77d340ab7d" + +[[package]] +name = "safe_arch" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323" +dependencies = [ + "bytemuck", +] + [[package]] name = "salsa20" version = "0.10.2" @@ -9463,6 +10241,28 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "serde_spanned" version = "1.0.4" @@ -9521,7 +10321,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cpufeatures", "digest", ] @@ -9532,7 +10332,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cpufeatures", "digest", ] @@ -9549,9 +10349,19 @@ version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cpufeatures", "digest", + "sha2-asm", +] + +[[package]] +name = "sha2-asm" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab" +dependencies = [ + "cc", ] [[package]] @@ -9569,6 +10379,17 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f" +[[package]] +name = "shellexpand" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" +dependencies = [ + "bstr", + "dirs", + "os_str_bytes", +] + [[package]] name = "shlex" version = "1.3.0" @@ -9604,6 +10425,19 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simba" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c99284beb21666094ba2b75bbceda012e610f5479dfcc2d6e2426f53197ffd95" +dependencies = [ + "approx 0.5.1", + "num-complex", + "num-traits", + "paste", + "wide", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -9897,7 +10731,7 @@ dependencies = [ "stringprep", "thiserror 2.0.17", "tracing", - "whoami", + "whoami 1.6.1", ] [[package]] @@ -9934,7 +10768,7 @@ dependencies = [ "stringprep", "thiserror 2.0.17", "tracing", - "whoami", + "whoami 1.6.1", ] [[package]] @@ -10011,7 +10845,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "psm", "windows-sys 0.59.0", @@ -10029,6 +10863,18 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7beae5182595e9a8b683fa98c4317f956c9a2dec3b9716990d20023cc60c766" +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx 0.5.1", + "nalgebra", + "num-traits", + "rand 0.8.5", +] + [[package]] name = "storekey" version = "0.5.0" @@ -10175,7 +11021,7 @@ dependencies = [ "path-clean", "pharos", "reblessive", - "reqwest", + "reqwest 0.12.24", "revision", "ring", "rust_decimal", @@ -10195,7 +11041,7 @@ dependencies = [ "url", "uuid", "wasm-bindgen-futures", - "wasmtimer", + "wasmtimer 0.2.1", "ws_stream_wasm", ] @@ -10283,7 +11129,7 @@ dependencies = [ "uuid", "vart", "wasm-bindgen-futures", - "wasmtimer", + "wasmtimer 0.2.1", "ws_stream_wasm", ] @@ -10402,6 +11248,15 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synchronoise" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" +dependencies = [ + "crossbeam-queue", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -10477,14 +11332,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -10516,7 +11371,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" dependencies = [ - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.60.2", ] @@ -10572,7 +11427,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -10698,9 +11553,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", @@ -10754,6 +11609,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-retry" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" +dependencies = [ + "pin-project", + "rand 0.8.5", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -11006,9 +11872,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf146f99d442e8e68e585f5d798ccd3cad9a7835b917e09728880a862706456" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags 2.10.0", "bytes", @@ -11097,6 +11963,16 @@ dependencies = [ "web-time", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" @@ -11107,12 +11983,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -11230,6 +12109,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typewit" +version = "1.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" + [[package]] name = "ucd-trie" version = "0.1.7" @@ -11362,6 +12247,32 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "utils" +version = "0.14.5" +dependencies = [ + "async-trait", + "bincode", + "bytes", + "chrono", + "ctor", + "derivative", + "duration-str", + "error_printer", + "futures", + "lazy_static", + "merklehash", + "pin-project", + "rand 0.9.2", + "serde", + "shellexpand", + "thiserror 2.0.17", + "tokio", + "tokio-util", + "tracing", + "web-time", +] + [[package]] name = "uuid" version = "1.19.0" @@ -11470,6 +12381,35 @@ dependencies = [ "try-lock", ] +[[package]] +name = "warp" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d06d9202adc1f15d709c4f4a2069be5428aa912cc025d6f268ac441ab066b0" +dependencies = [ + "bytes", + "futures-util", + "headers", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "log", + "mime", + "mime_guess", + "percent-encoding", + "pin-project", + "scoped-tls", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-util", + "tower-service", + "tracing", +] + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" @@ -11482,13 +12422,31 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.46.0", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", ] [[package]] @@ -11497,6 +12455,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasix" version = "0.12.21" @@ -11508,11 +12475,11 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -11521,11 +12488,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -11534,9 +12502,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -11544,9 +12512,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -11557,18 +12525,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.56" +version = "0.3.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c" +checksum = "45649196a53b0b7a15101d845d44d2dda7374fc1b5b5e2bbf58b7577ff4b346d" dependencies = [ "async-trait", "cast", @@ -11583,19 +12551,48 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", + "wasm-bindgen-test-shared", ] [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.56" +version = "0.3.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1" +checksum = "f579cdd0123ac74b94e1a4a72bd963cf30ebac343f2df347da0b8df24cdebed2" dependencies = [ "proc-macro2", "quote", "syn 2.0.111", ] +[[package]] +name = "wasm-bindgen-test-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8145dd1593bf0fb137dbfa85b8be79ec560a447298955877804640e40c2d6ea" + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.12.1", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -11609,6 +12606,31 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.10.0", + "hashbrown 0.15.5", + "indexmap 2.12.1", + "semver", +] + [[package]] name = "wasmtimer" version = "0.2.1" @@ -11622,6 +12644,20 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "wasmtimer" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b" +dependencies = [ + "futures", + "js-sys", + "parking_lot 0.12.5", + "pin-utils", + "slab", + "wasm-bindgen", +] + [[package]] name = "weak-table" version = "0.3.2" @@ -11630,9 +12666,9 @@ checksum = "323f4da9523e9a669e1eaf9c6e763892769b1d38c623913647bfdc1532fe4549" [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -11660,6 +12696,15 @@ dependencies = [ "string_cache_codegen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -11685,10 +12730,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" dependencies = [ "libredox", - "wasite", + "wasite 0.1.0", + "web-sys", +] + +[[package]] +name = "whoami" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" +dependencies = [ + "libc", + "libredox", + "objc2-system-configuration", + "wasite 1.0.2", "web-sys", ] +[[package]] +name = "wide" +version = "0.7.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03" +dependencies = [ + "bytemuck", + "safe_arch", +] + [[package]] name = "widestring" version = "1.2.1" @@ -11900,6 +12968,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -11945,6 +13022,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -12002,6 +13094,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -12020,6 +13118,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -12038,6 +13142,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -12068,6 +13178,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -12086,6 +13202,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -12104,6 +13226,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -12122,6 +13250,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -12155,7 +13289,7 @@ version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "windows-sys 0.48.0", ] @@ -12165,6 +13299,94 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.12.1", + "prettyplease", + "syn 2.0.111", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.111", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.10.0", + "indexmap 2.12.1", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.12.1", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + [[package]] name = "writeable" version = "0.6.2" @@ -12206,7 +13428,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.2", + "rustix 1.1.3", +] + +[[package]] +name = "xet_config" +version = "0.14.5" +dependencies = [ + "const-str", + "konst", + "utils", +] + +[[package]] +name = "xet_runtime" +version = "0.1.0" +dependencies = [ + "dirs", + "error_printer", + "libc", + "oneshot", + "reqwest 0.13.2", + "thiserror 2.0.17", + "tokio", + "tracing", + "utils", + "xet_config", ] [[package]] diff --git a/core/Cargo.toml b/core/Cargo.toml index 93be3579f2e0..55e4754db988 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -147,8 +147,10 @@ services-gridfs = ["dep:opendal-service-gridfs"] services-hdfs = ["dep:opendal-service-hdfs"] services-hdfs-native = ["dep:opendal-service-hdfs-native"] services-hf = ["dep:opendal-service-hf"] +services-hf-xet = ["dep:opendal-service-hf", "opendal-service-hf?/xet"] services-http = ["dep:opendal-service-http"] services-huggingface = ["services-hf"] +services-huggingface-xet = ["services-hf-xet"] services-ipfs = ["dep:opendal-service-ipfs"] services-ipmfs = ["dep:opendal-service-ipmfs"] services-koofr = ["dep:opendal-service-koofr"] diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index cb42c287e23b..b274098c0cf7 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -30,7 +30,20 @@ version = { workspace = true } [package.metadata.docs.rs] all-features = true +[features] +default = [] +xet = [ + "dep:reqwest", + "dep:xet-data", + "dep:cas_types", + "dep:xet-utils", + "dep:tokio", + "dep:futures", + "dep:async-trait", +] + [dependencies] +base64 = { workspace = true } bytes = { workspace = true } http = { workspace = true } log = { workspace = true } @@ -38,7 +51,23 @@ opendal-core = { path = "../../core", version = "0.55.0", default-features = fal percent-encoding = "2" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +sha2 = "0.10" +tempfile = "3" + +# XET storage protocol support (optional) +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"], optional = true } +# xet-data = { package = "data", git = "https://github.com/huggingface/xet-core", optional = true } +# cas_types = { git = "https://github.com/huggingface/xet-core", optional = true } +# xet-utils = { package = "utils", git = "https://github.com/huggingface/xet-core", optional = true } +xet-data = { package = "data", path = "/Users/kszucs/Workspace/xet-core/data", optional = true } +cas_types = { path = "/Users/kszucs/Workspace/xet-core/cas_types", optional = true } +xet-utils = { package = "utils", path = "/Users/kszucs/Workspace/xet-core/utils", optional = true } +futures = { workspace = true, optional = true } +async-trait = { version = "0.1", optional = true } +tokio = { workspace = true, features = ["sync", "rt"], optional = true } [dev-dependencies] futures = { workspace = true } +opendal-core = { path = "../../core", version = "0.55.0", features = ["reqwest-rustls-tls"] } +serde_json = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 1f79dec7bf2e..629dc00ecb85 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -17,17 +17,15 @@ use std::sync::Arc; -use bytes::Buf; -use http::Response; -use http::StatusCode; use log::debug; use super::HF_SCHEME; use super::config::HfConfig; use super::core::HfCore; -use super::core::HfStatus; -use super::error::parse_error; use super::lister::HfLister; +use super::reader::HfReader; +use super::uri::{HfRepo, RepoType}; +use super::writer::HfWriter; use opendal_core::raw::*; use opendal_core::*; @@ -50,7 +48,9 @@ impl HfBuilder { /// [Reference](https://huggingface.co/docs/hub/repositories) pub fn repo_type(mut self, repo_type: &str) -> Self { if !repo_type.is_empty() { - self.config.repo_type = Some(repo_type.to_string()); + if let Ok(rt) = RepoType::parse(repo_type) { + self.config.repo_type = rt; + } } self } @@ -118,27 +118,26 @@ impl HfBuilder { } self } + + /// Enable XET storage protocol for reads. + /// + /// When true and the `xet` feature is compiled in, reads will + /// check for XET-backed files and use the XET protocol for + /// downloading. Default is false. + pub fn xet(mut self, xet: bool) -> Self { + self.config.xet = xet; + self + } } impl Builder for HfBuilder { type Config = HfConfig; - /// Build an HfBackend. + /// Build a HfBackend. fn build(self) -> Result { debug!("backend build started: {:?}", &self); - let repo_type = match self.config.repo_type.as_deref() { - Some("model") => Ok(RepoType::Model), - Some("dataset") | Some("datasets") => Ok(RepoType::Dataset), - Some("space") => Ok(RepoType::Space), - Some(repo_type) => Err(Error::new( - ErrorKind::ConfigInvalid, - format!("unknown repo_type: {repo_type}").as_str(), - ) - .with_operation("Builder::build") - .with_context("service", HF_SCHEME)), - None => Ok(RepoType::Model), - }?; + let repo_type = self.config.repo_type; debug!("backend use repo_type: {:?}", &repo_type); let repo_id = match &self.config.repo_id { @@ -174,26 +173,30 @@ impl Builder for HfBuilder { }; debug!("backend use endpoint: {}", &endpoint); + let info: Arc = { + let am = AccessorInfo::default(); + am.set_scheme(HF_SCHEME) + .set_native_capability(Capability { + stat: true, + read: true, + write: true, + list: true, + list_with_recursive: true, + shared: true, + ..Default::default() + }); + am.into() + }; + Ok(HfBackend { core: Arc::new(HfCore { - info: { - let am = AccessorInfo::default(); - am.set_scheme(HF_SCHEME).set_native_capability(Capability { - stat: true, - read: true, - list: true, - list_with_recursive: true, - shared: true, - ..Default::default() - }); - am.into() - }, - repo_type, - repo_id, - revision, + info, + repo: HfRepo::new(repo_type, repo_id, Some(revision)), root, token, endpoint, + #[cfg(feature = "xet")] + xet_enabled: self.config.xet, }), }) } @@ -206,8 +209,8 @@ pub struct HfBackend { } impl Access for HfBackend { - type Reader = HttpBody; - type Writer = (); + type Reader = HfReader; + type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type Deleter = (); @@ -221,64 +224,13 @@ impl Access for HfBackend { return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); } - let resp = self.core.hf_path_info(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let mut meta = parse_into_metadata(path, resp.headers())?; - let bs = resp.into_body(); - - let decoded_response: Vec = - serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; - - // NOTE: if the file is not found, the server will return 200 with an empty array - if let Some(status) = decoded_response.first() { - if let Some(commit_info) = status.last_commit.as_ref() { - meta.set_last_modified(commit_info.date.parse::()?); - } - - meta.set_content_length(status.size); - - // Use LFS OID as ETag if available, otherwise use regular OID - let etag = if let Some(lfs) = &status.lfs { - &lfs.oid - } else { - &status.oid - }; - meta.set_etag(etag); - - match status.type_.as_str() { - "directory" => meta.set_mode(EntryMode::DIR), - "file" => meta.set_mode(EntryMode::FILE), - _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")), - }; - } else { - return Err(Error::new(ErrorKind::NotFound, "path not found")); - } - - Ok(RpStat::new(meta)) - } - _ => Err(parse_error(resp)), - } + let info = self.core.path_info(path).await?; + Ok(RpStat::new(info.metadata()?)) } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.hf_resolve(path, args.range(), &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - Ok((RpRead::default(), resp.into_body())) - } - _ => { - let (part, mut body) = resp.into_parts(); - let buf = body.to_buffer().await?; - Err(parse_error(Response::from_parts(part, buf))) - } - } + let reader = HfReader::try_new(&self.core, path, args.range()).await?; + Ok((RpRead::default(), reader)) } async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { @@ -286,15 +238,11 @@ impl Access for HfBackend { Ok((RpList::default(), oio::PageLister::new(l))) } -} -/// Repository type of Hugging Face. Supports `model`, `dataset`, and `space`. -/// [Reference](https://huggingface.co/docs/hub/repositories) -#[derive(Debug, Clone, Copy)] -pub enum RepoType { - Model, - Dataset, - Space, + async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { + let writer = HfWriter::new(&self.core, path, args); + Ok((RpWrite::default(), oio::OneShotWriter::new(writer))) + } } #[cfg(test)] @@ -318,4 +266,97 @@ mod tests { .build() .expect("builder should accept space repo type"); } + + #[test] + fn test_both_schemes_are_supported() { + use opendal_core::OperatorRegistry; + + let registry = OperatorRegistry::new(); + super::super::register_hf_service(®istry); + + // Test short scheme "hf" + let op = registry + .load("hf://user/repo") + .expect("short scheme should be registered and work"); + assert_eq!(op.info().scheme(), "hf"); + + // Test long scheme "huggingface" + let op = registry + .load("huggingface://user/repo") + .expect("long scheme should be registered and work"); + assert_eq!(op.info().scheme(), "hf"); + } + + /// Parquet magic bytes: "PAR1" + const PARQUET_MAGIC: &[u8] = b"PAR1"; + + fn mbpp_operator() -> Operator { + let builder = HfBuilder::default() + .repo_type("dataset") + .repo_id("google-research-datasets/mbpp") + .revision("main") + .root("/"); + + Operator::new(builder).unwrap().finish() + } + + #[tokio::test] + #[ignore = "requires network access"] + async fn test_read_parquet_http() { + let op = mbpp_operator(); + let path = "full/train-00000-of-00001.parquet"; + + let meta = op.stat(path).await.expect("stat should succeed"); + assert!(meta.content_length() > 0); + + // Read the first 4 bytes to check parquet header magic + let header = op + .read_with(path) + .range(0..4) + .await + .expect("read header should succeed"); + assert_eq!(&header.to_vec(), PARQUET_MAGIC); + + // Read the last 4 bytes to check parquet footer magic + let size = meta.content_length(); + let footer = op + .read_with(path) + .range(size - 4..size) + .await + .expect("read footer should succeed"); + assert_eq!(&footer.to_vec(), PARQUET_MAGIC); + } + + #[cfg(feature = "xet")] + fn mbpp_operator_xet() -> Operator { + let repo_id = std::env::var("HF_OPENDAL_DATASET") + .unwrap_or_else(|_| "google-research-datasets/mbpp".to_string()); + let mut builder = HfBuilder::default() + .repo_type("dataset") + .repo_id(&repo_id) + .revision("main") + .root("/") + .xet(true); + + if let Ok(token) = std::env::var("HF_OPENDAL_TOKEN") { + builder = builder.token(&token); + } + + Operator::new(builder).unwrap().finish() + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore = "requires network access"] + async fn test_read_parquet_xet() { + let op = mbpp_operator_xet(); + let path = "full/train-00000-of-00001.parquet"; + + // Full read via XET and verify parquet magic at both ends + let data = op.read(path).await.expect("xet read should succeed"); + let bytes = data.to_vec(); + assert!(bytes.len() > 8); + assert_eq!(&bytes[..4], PARQUET_MAGIC); + assert_eq!(&bytes[bytes.len() - 4..], PARQUET_MAGIC); + } } diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index 15fb405e4679..d174db75aa14 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -22,6 +22,8 @@ use serde::Serialize; use super::HF_SCHEME; use super::backend::HfBuilder; +use super::uri::HfUri; +use super::uri::RepoType; /// Configuration for Hugging Face service support. #[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)] @@ -30,11 +32,8 @@ use super::backend::HfBuilder; pub struct HfConfig { /// Repo type of this backend. Default is model. /// - /// Available values: - /// - model - /// - dataset - /// - datasets (alias for dataset) - pub repo_type: Option, + /// Default is model + pub repo_type: RepoType, /// Repo id of this backend. /// /// This is required. @@ -46,6 +45,7 @@ pub struct HfConfig { /// Root of this backend. Can be "/path/to/dir". /// /// Default is "/". + /// TODO(kszucs): consider removing it pub root: Option, /// Token of this backend. /// @@ -55,6 +55,12 @@ pub struct HfConfig { /// /// Default is "https://huggingface.co". pub endpoint: Option, + /// Enable XET storage protocol for reads. + /// + /// When true and the `xet` feature is compiled in, reads will + /// check for XET-backed files and use the XET protocol for + /// downloading. Default is false. + pub xet: bool, } impl Debug for HfConfig { @@ -72,57 +78,31 @@ impl opendal_core::Configurator for HfConfig { type Builder = HfBuilder; fn from_uri(uri: &opendal_core::OperatorUri) -> opendal_core::Result { - let mut map = uri.options().clone(); - map.retain(|_, v| !v.is_empty()); - - if let Some(repo_type) = uri.name() { - if !repo_type.is_empty() { - map.insert("repo_type".to_string(), repo_type.to_string()); + // Reconstruct the full path from authority (name) and root. + // OperatorUri splits "hf://datasets/user/repo" into + // name="datasets" and root="user/repo". + let mut path = String::new(); + if let Some(name) = uri.name() { + if !name.is_empty() { + path.push_str(name); } } - - if let Some(raw_path) = uri.root() { - let parts: Vec<_> = raw_path.split('/').filter(|s| !s.is_empty()).collect(); - - if parts.len() >= 2 { - map.insert("repo_id".to_string(), format!("{}/{}", parts[0], parts[1])); - - if parts.len() >= 3 { - if map.contains_key("revision") { - let root_value = parts[2..].join("/"); - if !root_value.is_empty() { - map.insert("root".to_string(), root_value); - } - } else { - map.insert("revision".to_string(), parts[2].to_string()); - if parts.len() > 3 { - let root_value = parts[3..].join("/"); - if !root_value.is_empty() { - map.insert("root".to_string(), root_value); - } - } - } + if let Some(root) = uri.root() { + if !root.is_empty() { + if !path.is_empty() { + path.push('/'); } - } else if parts.is_empty() { - // no owner/repo provided, fall back to options-only - } else { - return Err(opendal_core::Error::new( - opendal_core::ErrorKind::ConfigInvalid, - "repository owner and name are required in uri path", - ) - .with_context("service", HF_SCHEME)); + path.push_str(root); } } - if !map.contains_key("repo_id") { - return Err(opendal_core::Error::new( - opendal_core::ErrorKind::ConfigInvalid, - "repo_id is required via uri path or option", - ) - .with_context("service", HF_SCHEME)); - } - - Self::from_iter(map) + let parsed = HfUri::parse(&path)?; + Ok(Self { + repo_type: parsed.repo.repo_type, + repo_id: Some(parsed.repo.repo_id), + revision: parsed.repo.revision, + ..Default::default() + }) } fn into_builder(self) -> Self::Builder { @@ -133,78 +113,22 @@ impl opendal_core::Configurator for HfConfig { #[cfg(test)] mod tests { use super::*; - use opendal_core::Configurator; - use opendal_core::OperatorUri; #[test] - fn from_uri_sets_repo_type_id_and_revision() { - let uri = OperatorUri::new( - "hf://model/opendal/sample/main/dataset", - Vec::<(String, String)>::new(), - ) - .unwrap(); + fn from_uri_with_all_components() { + use opendal_core::Configurator; + use opendal_core::OperatorUri; - let cfg = HfConfig::from_uri(&uri).unwrap(); - assert_eq!(cfg.repo_type.as_deref(), Some("model")); - assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); - assert_eq!(cfg.revision.as_deref(), Some("main")); - assert_eq!(cfg.root.as_deref(), Some("dataset")); - } - - #[test] - fn from_uri_uses_existing_revision_and_sets_root() { let uri = OperatorUri::new( - "hf://dataset/opendal/sample/data/train", - vec![("revision".to_string(), "dev".to_string())], + "hf://datasets/username/my_dataset@dev/train/data.csv", + Vec::<(String, String)>::new(), ) .unwrap(); let cfg = HfConfig::from_uri(&uri).unwrap(); - assert_eq!(cfg.repo_type.as_deref(), Some("dataset")); - assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); + assert_eq!(cfg.repo_type, RepoType::Dataset); + assert_eq!(cfg.repo_id.as_deref(), Some("username/my_dataset")); assert_eq!(cfg.revision.as_deref(), Some("dev")); - assert_eq!(cfg.root.as_deref(), Some("data/train")); - } - - #[test] - fn from_uri_allows_options_only() { - let uri = OperatorUri::new( - "hf", - vec![ - ("repo_type".to_string(), "model".to_string()), - ("repo_id".to_string(), "opendal/sample".to_string()), - ("revision".to_string(), "main".to_string()), - ("root".to_string(), "".to_string()), - ], - ) - .unwrap(); - - let cfg = HfConfig::from_uri(&uri).unwrap(); - assert_eq!(cfg.repo_type.as_deref(), Some("model")); - assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); - assert_eq!(cfg.revision.as_deref(), Some("main")); assert!(cfg.root.is_none()); } - - #[test] - fn from_uri_requires_owner_and_repo() { - let uri = OperatorUri::new("hf://model/opendal", Vec::<(String, String)>::new()).unwrap(); - - assert!(HfConfig::from_uri(&uri).is_err()); - } - - #[test] - fn from_uri_huggingface_alias_works() { - let uri = OperatorUri::new( - "huggingface://model/opendal/sample/main/dataset", - Vec::<(String, String)>::new(), - ) - .unwrap(); - - let cfg = HfConfig::from_uri(&uri).unwrap(); - assert_eq!(cfg.repo_type.as_deref(), Some("model")); - assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); - assert_eq!(cfg.revision.as_deref(), Some("main")); - assert_eq!(cfg.root.as_deref(), Some("dataset")); - } } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 55ad405efb2d..adfc3ee02272 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -18,325 +18,503 @@ use std::fmt::Debug; use std::sync::Arc; +use bytes::Buf; use bytes::Bytes; use http::Request; -use http::Response; use http::header; -use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; use serde::Deserialize; -use super::backend::RepoType; +#[cfg(feature = "xet")] +use xet_utils::auth::TokenRefresher; + +use super::error::parse_error; +use super::uri::HfRepo; use opendal_core::raw::*; use opendal_core::*; -fn percent_encode_revision(revision: &str) -> String { - utf8_percent_encode(revision, NON_ALPHANUMERIC).to_string() +/// API payload structures for preupload operations +#[derive(serde::Serialize)] +pub(super) struct PreuploadFile { + pub path: String, + pub size: u64, + pub sample: String, + #[serde(rename = "sha256")] + pub sha256: String, +} + +#[derive(serde::Serialize)] +pub(super) struct PreuploadRequest { + pub files: Vec, +} + +/// API payload structures for commit operations +#[derive(Debug, serde::Serialize)] +pub(super) struct CommitFile { + pub path: String, + pub content: String, + pub encoding: String, } +#[derive(Debug, serde::Serialize)] +pub(super) struct LfsFile { + pub path: String, + pub oid: String, + pub algo: String, + pub size: u64, +} + +#[derive(serde::Serialize)] +pub(super) struct MixedCommitPayload { + pub summary: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub files: Vec, + #[serde(rename = "lfsFiles", skip_serializing_if = "Vec::is_empty")] + pub lfs_files: Vec, +} + +// API response types + +#[derive(serde::Deserialize, Debug)] +pub(super) struct PreuploadFileResponse { + #[allow(dead_code)] + pub path: String, + #[serde(rename = "uploadMode")] + pub upload_mode: String, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct PreuploadResponse { + pub files: Vec, +} + +#[derive(Deserialize, Eq, PartialEq, Debug)] +#[serde(rename_all = "camelCase")] +pub(super) struct PathInfo { + #[serde(rename = "type")] + pub type_: String, + pub oid: String, + pub size: u64, + #[serde(default)] + pub lfs: Option, + pub path: String, + #[serde(default)] + pub last_commit: Option, +} + +impl PathInfo { + pub fn entry_mode(&self) -> EntryMode { + match self.type_.as_str() { + "directory" => EntryMode::DIR, + "file" => EntryMode::FILE, + _ => EntryMode::Unknown, + } + } + + pub fn metadata(&self) -> Result { + let mode = self.entry_mode(); + let mut meta = Metadata::new(mode); + + if let Some(commit_info) = self.last_commit.as_ref() { + meta.set_last_modified(commit_info.date.parse::()?); + } + + if mode == EntryMode::FILE { + meta.set_content_length(self.size); + let etag = if let Some(lfs) = &self.lfs { + &lfs.oid + } else { + &self.oid + }; + meta.set_etag(etag); + } + + Ok(meta) + } +} + +#[derive(Deserialize, Eq, PartialEq, Debug)] +pub(super) struct LfsInfo { + pub oid: String, +} + +#[derive(Deserialize, Eq, PartialEq, Debug)] +pub(super) struct LastCommit { + pub date: String, +} + +/// Response from the tree/list API endpoint +#[derive(Debug)] +pub(super) struct FileTree { + pub files: Vec, + pub next_cursor: Option, +} + +#[cfg(feature = "xet")] +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(super) struct XetToken { + pub access_token: String, + pub cas_url: String, + pub exp: u64, +} + +#[cfg(feature = "xet")] +#[derive(Clone, Debug)] +pub(super) struct XetFile { + pub hash: String, + pub size: u64, +} + +// Core HuggingFace client that manages API interactions, authentication +// and shared logic for reader/writer/lister. + +#[derive(Clone)] pub struct HfCore { pub info: Arc, - pub repo_type: RepoType, - pub repo_id: String, - pub revision: String, + pub repo: HfRepo, pub root: String, pub token: Option, pub endpoint: String, + + #[cfg(feature = "xet")] + pub xet_enabled: bool, } impl Debug for HfCore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("HfCore") - .field("repo_type", &self.repo_type) - .field("repo_id", &self.repo_id) - .field("revision", &self.revision) + let mut s = f.debug_struct("HfCore"); + s.field("repo", &self.repo) .field("root", &self.root) - .field("endpoint", &self.endpoint) - .finish_non_exhaustive() + .field("endpoint", &self.endpoint); + #[cfg(feature = "xet")] + s.field("xet_enabled", &self.xet_enabled); + s.finish_non_exhaustive() } } -impl HfCore { - pub async fn hf_path_info(&self, path: &str) -> Result> { - let p = build_abs_path(&self.root, path) - .trim_end_matches('/') - .to_string(); - - let url = match self.repo_type { - RepoType::Model => format!( - "{}/api/models/{}/paths-info/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision) - ), - RepoType::Dataset => format!( - "{}/api/datasets/{}/paths-info/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision) - ), - RepoType::Space => format!( - "{}/api/spaces/{}/paths-info/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision) - ), - }; +/// Extract the cursor value from a Link header's "next" URL. +fn parse_next_cursor(link_str: &str) -> Option { + for link in link_str.split(',') { + if link.contains("rel=\"next\"") || link.contains("rel='next'") { + let (_, rest) = link.split_once('<')?; + let (url, _) = rest.split_once('>')?; + let query = url.split_once('?')?.1; + return query + .split('&') + .find_map(|p| p.strip_prefix("cursor=")) + .map(|v| v.to_string()); + } + } + None +} - let mut req = Request::post(&url); - // Inject operation to the request. - req = req.extension(Operation::Stat); +impl HfCore { + /// Build an authenticated HTTP request. + pub(super) fn request( + &self, + method: http::Method, + url: &str, + op: Operation, + ) -> http::request::Builder { + let mut req = Request::builder().method(method).uri(url).extension(op); if let Some(token) = &self.token { - let auth_header_content = format_authorization_by_bearer(token)?; - req = req.header(header::AUTHORIZATION, auth_header_content); + if let Ok(auth) = format_authorization_by_bearer(token) { + req = req.header(header::AUTHORIZATION, auth); + } } + req + } - req = req.header(header::CONTENT_TYPE, "application/x-www-form-urlencoded"); + /// Send a request, check for success, and deserialize the JSON response. + /// + /// Returns the response parts (status, headers, etc.) alongside the + /// deserialized body so callers can inspect headers when needed. + async fn send_request( + &self, + req: Request, + ) -> Result<(http::response::Parts, T)> { + let resp = self.info.http_client().send(req).await?; + if !resp.status().is_success() { + return Err(parse_error(resp)); + } + let (parts, body) = resp.into_parts(); + let parsed = serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; + Ok((parts, parsed)) + } - let req_body = format!("paths={}&expand=True", percent_encode_path(&p)); + pub async fn path_info(&self, path: &str) -> Result { + let uri = self.repo.uri(&self.root, path); + let url = uri.paths_info_url(&self.endpoint); + let form_body = format!("paths={}&expand=True", percent_encode_path(&uri.path)); - let req = req - .body(Buffer::from(Bytes::from(req_body))) + let req = self + .request(http::Method::POST, &url, Operation::Stat) + .header(header::CONTENT_TYPE, "application/x-www-form-urlencoded") + .body(Buffer::from(Bytes::from(form_body))) .map_err(new_request_build_error)?; + let (_, mut files) = self.send_request::>(req).await?; + + // NOTE: if the file is not found, the server will return 200 with an empty array + if files.is_empty() { + return Err(Error::new(ErrorKind::NotFound, "path not found")); + } - self.info.http_client().send(req).await + Ok(files.remove(0)) } - pub async fn hf_list( + pub async fn file_tree( &self, path: &str, recursive: bool, cursor: Option<&str>, - ) -> Result> { - let p = build_abs_path(&self.root, path) - .trim_end_matches('/') - .to_string(); - - let mut url = match self.repo_type { - RepoType::Model => format!( - "{}/api/models/{}/tree/{}/{}?expand=True", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - RepoType::Dataset => format!( - "{}/api/datasets/{}/tree/{}/{}?expand=True", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - RepoType::Space => format!( - "{}/api/spaces/{}/tree/{}/{}?expand=True", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - }; - - if recursive { - url.push_str("&recursive=True"); - } + ) -> Result { + let uri = self.repo.uri(&self.root, path); + let url = uri.file_tree_url(&self.endpoint, recursive, cursor); - if let Some(cursor_val) = cursor { - url.push_str(&format!("&cursor={}", cursor_val)); - } + let req = self + .request(http::Method::GET, &url, Operation::List) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + let (parts, files) = self.send_request::>(req).await?; - let mut req = Request::get(&url); - // Inject operation to the request. - req = req.extension(Operation::List); - if let Some(token) = &self.token { - let auth_header_content = format_authorization_by_bearer(token)?; - req = req.header(header::AUTHORIZATION, auth_header_content); - } + let next_cursor = parts + .headers + .get(http::header::LINK) + .and_then(|v| v.to_str().ok()) + .and_then(parse_next_cursor); - let req = req.body(Buffer::new()).map_err(new_request_build_error)?; + Ok(FileTree { files, next_cursor }) + } - self.info.http_client().send(req).await + #[cfg(feature = "xet")] + pub(super) async fn get_xet_token(&self, token_type: &str) -> Result { + let url = self.repo.xet_token_url(&self.endpoint, token_type); + let req = self + .request(http::Method::GET, &url, Operation::Read) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + let (_, token) = self.send_request(req).await?; + Ok(token) } - pub async fn hf_list_with_url(&self, url: &str) -> Result> { - let mut req = Request::get(url); - // Inject operation to the request. - req = req.extension(Operation::List); - if let Some(token) = &self.token { - let auth_header_content = format_authorization_by_bearer(token)?; - req = req.header(header::AUTHORIZATION, auth_header_content); - } + /// Issue a HEAD request and extract XET file info (hash and size). + /// + /// Uses a custom HTTP client that does NOT follow redirects so we can + /// inspect response headers (e.g. `X-Xet-Hash`) from the 302 response. + /// + /// Returns `None` if the `X-Xet-Hash` header is absent or empty. + #[cfg(feature = "xet")] + pub(super) async fn get_xet_file(&self, path: &str) -> Result> { + let uri = self.repo.uri(&self.root, path); + let url = uri.resolve_url(&self.endpoint); + + let reqwest_client = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to build http client").set_source(err) + })?; + let client = HttpClient::with(reqwest_client); + + let req = self + .request(http::Method::HEAD, &url, Operation::Stat) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + + let resp = client.send(req).await?; - let req = req.body(Buffer::new()).map_err(new_request_build_error)?; + let hash = resp + .headers() + .get("X-Xet-Hash") + .and_then(|v| v.to_str().ok()) + .filter(|s| !s.is_empty()); - self.info.http_client().send(req).await + let Some(hash) = hash else { + return Ok(None); + }; + + let size = resp + .headers() + .get("X-Linked-Size") + .or_else(|| resp.headers().get(header::CONTENT_LENGTH)) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + Ok(Some(XetFile { + hash: hash.to_string(), + size, + })) } - pub async fn hf_resolve( + /// Call the preupload API to determine upload strategy for files. + pub(super) async fn preupload_files( &self, - path: &str, - range: BytesRange, - _args: &OpRead, - ) -> Result> { - let p = build_abs_path(&self.root, path) - .trim_end_matches('/') - .to_string(); - - let url = match self.repo_type { - RepoType::Model => format!( - "{}/{}/resolve/{}/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - RepoType::Dataset => format!( - "{}/datasets/{}/resolve/{}/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - RepoType::Space => format!( - "{}/spaces/{}/resolve/{}/{}", - &self.endpoint, - &self.repo_id, - percent_encode_revision(&self.revision), - percent_encode_path(&p) - ), - }; + files: Vec, + ) -> Result { + let _token = self.token.as_deref().ok_or_else(|| { + Error::new( + ErrorKind::PermissionDenied, + "token is required for write operations", + ) + .with_operation("preupload") + })?; + + let first_path = files + .first() + .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to preupload"))?; + + let uri = self.repo.uri(&self.root, &first_path.path); + let url = uri.preupload_url(&self.endpoint); + + let payload = PreuploadRequest { files }; + let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; + + let req = self + .request(http::Method::POST, &url, Operation::Write) + .header(header::CONTENT_TYPE, "application/json") + .body(Buffer::from(json_body)) + .map_err(new_request_build_error)?; - let mut req = Request::get(&url); + let (_, resp) = self.send_request(req).await?; + Ok(resp) + } - if let Some(token) = &self.token { - let auth_header_content = format_authorization_by_bearer(token)?; - req = req.header(header::AUTHORIZATION, auth_header_content); + /// Commit uploaded files to the repository. + pub(super) async fn commit_files( + &self, + regular_files: Vec, + lfs_files: Vec, + ) -> Result> { + let _token = self.token.as_deref().ok_or_else(|| { + Error::new( + ErrorKind::PermissionDenied, + "token is required for write operations", + ) + .with_operation("commit") + })?; + + let mut summary_paths = Vec::new(); + for file in ®ular_files { + summary_paths.push(file.path.clone()); } - - if !range.is_full() { - req = req.header(header::RANGE, range.to_header()); + for file in &lfs_files { + summary_paths.push(file.path.clone()); } - // Inject operation to the request. - let req = req.extension(Operation::Read); - let req = req.body(Buffer::new()).map_err(new_request_build_error)?; - self.info.http_client().fetch(req).await - } -} + let summary = if summary_paths.len() == 1 { + format!("Upload {} via OpenDAL", summary_paths[0]) + } else { + format!("Upload {} files via OpenDAL", summary_paths.len()) + }; -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[serde(rename_all = "camelCase")] -#[allow(dead_code)] -pub(super) struct HfStatus { - #[serde(rename = "type")] - pub type_: String, - pub oid: String, - pub size: u64, - pub lfs: Option, - pub path: String, - pub last_commit: Option, - pub security: Option, -} + let client = self.info.http_client(); + // Use the first file's path to determine the commit URL + let first_path = summary_paths + .first() + .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to commit"))?; + let uri = self.repo.uri(&self.root, first_path); + let url = uri.commit_url(&self.endpoint); + + let payload = MixedCommitPayload { + summary, + files: regular_files, + lfs_files, + }; -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[serde(rename_all = "camelCase")] -#[allow(dead_code)] -pub(super) struct HfLfs { - pub oid: String, - pub size: u64, - pub pointer_size: u64, -} + let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[serde(rename_all = "camelCase")] -#[allow(dead_code)] -pub(super) struct HfLastCommit { - pub id: String, - pub title: String, - pub date: String, -} + let req = self + .request(http::Method::POST, &url, Operation::Write) + .header(header::CONTENT_TYPE, "application/json") + .header(header::CONTENT_LENGTH, json_body.len()) + .body(Buffer::from(json_body)) + .map_err(new_request_build_error)?; -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[serde(rename_all = "camelCase")] -#[allow(dead_code)] -pub(super) struct HfSecurity { - pub blob_id: String, - pub safe: bool, - pub av_scan: Option, - pub pickle_import_scan: Option, + client.send(req).await + } } -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[allow(dead_code)] -#[serde(rename_all = "camelCase")] -pub(super) struct HfAvScan { - pub virus_found: bool, - pub virus_names: Option>, +#[cfg(feature = "xet")] +pub(super) struct XetTokenRefresher { + core: HfCore, + token_type: &'static str, } -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[serde(rename_all = "camelCase")] -#[allow(dead_code)] -pub(super) struct HfPickleImportScan { - pub highest_safety_level: String, - pub imports: Vec, +#[cfg(feature = "xet")] +impl XetTokenRefresher { + pub(super) fn new(core: &HfCore, token_type: &'static str) -> Self { + Self { + core: core.clone(), + token_type, + } + } } -#[derive(Deserialize, Eq, PartialEq, Debug)] -#[allow(dead_code)] -pub(super) struct HfImport { - pub module: String, - pub name: String, - pub safety: String, +#[cfg(feature = "xet")] +#[async_trait::async_trait] +impl TokenRefresher for XetTokenRefresher { + async fn refresh(&self) -> std::result::Result<(String, u64), xet_utils::errors::AuthError> { + let token = self + .core + .get_xet_token(self.token_type) + .await + .map_err(xet_utils::errors::AuthError::token_refresh_failure)?; + Ok((token.access_token, token.exp)) + } } #[cfg(test)] -mod tests { - use bytes::Bytes; +pub(crate) mod test_utils { use http::{Request, Response, StatusCode}; use std::sync::{Arc, Mutex}; + use super::super::uri::RepoType; use super::*; - // Mock HTTP client that captures the request URL and headers #[derive(Clone)] - struct MockHttpClient { + pub(crate) struct MockHttpClient { url: Arc>>, - headers: Arc>>, } impl MockHttpClient { - fn new() -> Self { + pub(crate) fn new() -> Self { Self { url: Arc::new(Mutex::new(None)), - headers: Arc::new(Mutex::new(None)), } } - fn get_captured_url(&self) -> String { + pub(crate) fn get_captured_url(&self) -> String { self.url.lock().unwrap().clone().unwrap() } - - fn get_captured_headers(&self) -> http::HeaderMap { - self.headers.lock().unwrap().clone().unwrap() - } } impl HttpFetch for MockHttpClient { async fn fetch(&self, req: Request) -> Result> { - // Capture the URL and headers *self.url.lock().unwrap() = Some(req.uri().to_string()); - *self.headers.lock().unwrap() = Some(req.headers().clone()); - // Return a mock response with empty body + // Return a minimal valid JSON response for API requests + let body = if req.uri().to_string().contains("/paths-info/") + || req.uri().to_string().contains("/tree/") + { + let data = + Bytes::from(r#"[{"type":"file","oid":"abc123","size":100,"path":"test.txt"}]"#); + let size = data.len() as u64; + let buffer = Buffer::from(data); + HttpBody::new(futures::stream::iter(vec![Ok(buffer)]), Some(size)) + } else { + HttpBody::new(futures::stream::empty(), Some(0)) + }; + Ok(Response::builder() .status(StatusCode::OK) - .body(HttpBody::new(futures::stream::empty(), Some(0))) + .body(body) .unwrap()) } } - /// Utility function to create HfCore with mocked HTTP client - fn create_test_core( + pub(crate) fn create_test_core( repo_type: RepoType, repo_id: &str, revision: &str, @@ -352,16 +530,25 @@ mod tests { let core = HfCore { info: Arc::new(info), - repo_type, - repo_id: repo_id.to_string(), - revision: revision.to_string(), + repo: HfRepo::new(repo_type, repo_id.to_string(), Some(revision.to_string())), root: "/".to_string(), token: None, endpoint: endpoint.to_string(), + #[cfg(feature = "xet")] + xet_enabled: false, }; (core, mock_client) } +} + +#[cfg(test)] +mod tests { + use bytes::Bytes; + + use super::super::uri::RepoType; + use super::test_utils::create_test_core; + use super::*; #[tokio::test] async fn test_hf_path_info_url_model() -> Result<()> { @@ -372,7 +559,7 @@ mod tests { "https://huggingface.co", ); - core.hf_path_info("test.txt").await?; + core.path_info("test.txt").await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -392,7 +579,7 @@ mod tests { "https://huggingface.co", ); - core.hf_path_info("data/file.csv").await?; + core.path_info("data/file.csv").await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -412,7 +599,7 @@ mod tests { "https://custom-hf.example.com", ); - core.hf_path_info("model.bin").await?; + core.path_info("model.bin").await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -432,7 +619,7 @@ mod tests { "https://huggingface.co", ); - core.hf_list("path1", false, None).await?; + core.file_tree("path1", false, None).await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -452,7 +639,7 @@ mod tests { "https://huggingface.co", ); - core.hf_list("path2", true, None).await?; + core.file_tree("path2", true, None).await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -472,7 +659,7 @@ mod tests { "https://huggingface.co", ); - core.hf_list("path3", false, Some("abc123")).await?; + core.file_tree("path3", false, Some("abc123")).await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -483,50 +670,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_hf_resolve_url_model() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Model, - "user/model", - "main", - "https://huggingface.co", - ); - - let args = OpRead::default(); - core.hf_resolve("config.json", BytesRange::default(), &args) - .await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/user/model/resolve/main/config.json" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_hf_resolve_url_dataset() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Dataset, - "org/data", - "v1.0", - "https://huggingface.co", - ); - - let args = OpRead::default(); - core.hf_resolve("train.csv", BytesRange::default(), &args) - .await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/datasets/org/data/resolve/v1%2E0/train.csv" - ); - - Ok(()) - } - #[tokio::test] async fn test_hf_path_info_url_space() -> Result<()> { let (core, mock_client) = create_test_core( @@ -536,7 +679,7 @@ mod tests { "https://huggingface.co", ); - core.hf_path_info("app.py").await?; + core.path_info("app.py").await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -556,7 +699,7 @@ mod tests { "https://huggingface.co", ); - core.hf_list("static", false, None).await?; + core.file_tree("static", false, None).await?; let url = mock_client.get_captured_url(); assert_eq!( @@ -567,52 +710,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_hf_resolve_url_space() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Space, - "user/space", - "main", - "https://huggingface.co", - ); - - let args = OpRead::default(); - core.hf_resolve("README.md", BytesRange::default(), &args) - .await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/spaces/user/space/resolve/main/README.md" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_hf_resolve_with_range() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Model, - "user/model", - "main", - "https://huggingface.co", - ); - - let args = OpRead::default(); - let range = BytesRange::new(0, Some(1024)); - core.hf_resolve("large_file.bin", range, &args).await?; - - let url = mock_client.get_captured_url(); - let headers = mock_client.get_captured_headers(); - assert_eq!( - url, - "https://huggingface.co/user/model/resolve/main/large_file.bin" - ); - assert_eq!(headers.get(http::header::RANGE).unwrap(), "bytes=0-1023"); - - Ok(()) - } - #[test] fn parse_list_response_test() -> Result<()> { let resp = Bytes::from( @@ -640,34 +737,30 @@ mod tests { ); let decoded_response = - serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; + serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; assert_eq!(decoded_response.len(), 2); - let file_entry = HfStatus { + let file_entry = PathInfo { type_: "file".to_string(), oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), size: 69512435, - lfs: Some(HfLfs { + lfs: Some(LfsInfo { oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), - size: 69512435, - pointer_size: 133, }), path: "maelstrom/lib/maelstrom.jar".to_string(), last_commit: None, - security: None, }; assert_eq!(decoded_response[0], file_entry); - let dir_entry = HfStatus { + let dir_entry = PathInfo { type_: "directory".to_string(), oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), size: 69512435, lfs: None, path: "maelstrom/lib/plugins".to_string(), last_commit: None, - security: None, }; assert_eq!(decoded_response[1], dir_entry); @@ -719,58 +812,21 @@ mod tests { ); let decoded_response = - serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; + serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; assert_eq!(decoded_response.len(), 1); - let file_info = HfStatus { + let file_info = PathInfo { type_: "file".to_string(), oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), size: 69512435, - lfs: Some(HfLfs { + lfs: Some(LfsInfo { oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), - size: 69512435, - pointer_size: 133, }), path: "maelstrom/lib/maelstrom.jar".to_string(), - last_commit: Some(HfLastCommit { - id: "bc1ef030bf3743290d5e190695ab94582e51ae2f".to_string(), - title: "Upload 141 files".to_string(), + last_commit: Some(LastCommit { date: "2023-11-17T23:50:28.000Z".to_string(), }), - security: Some(HfSecurity { - blob_id: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), - safe: true, - av_scan: Some(HfAvScan { - virus_found: false, - virus_names: None, - }), - pickle_import_scan: Some(HfPickleImportScan { - highest_safety_level: "innocuous".to_string(), - imports: vec![ - HfImport { - module: "torch".to_string(), - name: "FloatStorage".to_string(), - safety: "innocuous".to_string(), - }, - HfImport { - module: "collections".to_string(), - name: "OrderedDict".to_string(), - safety: "innocuous".to_string(), - }, - HfImport { - module: "torch".to_string(), - name: "LongStorage".to_string(), - safety: "innocuous".to_string(), - }, - HfImport { - module: "torch._utils".to_string(), - name: "_rebuild_tensor_v2".to_string(), - safety: "innocuous".to_string(), - }, - ], - }), - }), }; assert_eq!(decoded_response[0], file_info); @@ -778,3 +834,8 @@ mod tests { Ok(()) } } + +#[cfg(feature = "xet")] +pub(super) fn map_xet_error(err: impl std::error::Error + Send + Sync + 'static) -> Error { + Error::new(ErrorKind::Unexpected, "xet operation failed").set_source(err) +} diff --git a/core/services/hf/src/docs.md b/core/services/hf/src/docs.md index c4e50fe78e9d..5564419c1f29 100644 --- a/core/services/hf/src/docs.md +++ b/core/services/hf/src/docs.md @@ -1,5 +1,4 @@ This service will visit the [Hugging Face API](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api) to access the Hugging Face File System. -Currently, we only support the `model` and `dataset` types of repositories, and operations are limited to reading and listing/stating. Hugging Face doesn't host official HTTP API docs. Detailed HTTP request API information can be found on the [`huggingface_hub` Source Code](https://github.com/huggingface/huggingface_hub). @@ -12,7 +11,7 @@ This service can be used to: - [ ] create_dir - [x] stat - [x] read -- [ ] write +- [x] write - [ ] delete - [x] list - [ ] copy @@ -21,11 +20,11 @@ This service can be used to: ## Configurations -- `repo_type`: The type of the repository. +- `repo_type`: The type of the repository (model, dataset, or space). - `repo_id`: The id of the repository. - `revision`: The revision of the repository. - `root`: Set the work directory for backend. -- `token`: The token for accessing the repository. +- `token`: The token for accessing the repository. Required for write operations. Refer to [`HfBuilder`]'s public API docs for more information. diff --git a/core/services/hf/src/lib.rs b/core/services/hf/src/lib.rs index e54175b466ea..018b8feb4675 100644 --- a/core/services/hf/src/lib.rs +++ b/core/services/hf/src/lib.rs @@ -32,6 +32,9 @@ mod config; mod core; mod error; mod lister; +mod reader; +mod writer; +mod uri; pub use backend::HfBuilder as Hf; pub use config::HfConfig; diff --git a/core/services/hf/src/lister.rs b/core/services/hf/src/lister.rs index a01f5fe9ba62..f5de04cc3dc0 100644 --- a/core/services/hf/src/lister.rs +++ b/core/services/hf/src/lister.rs @@ -17,11 +17,7 @@ use std::sync::Arc; -use bytes::Buf; - use super::core::HfCore; -use super::core::HfStatus; -use super::error::parse_error; use opendal_core::raw::*; use opendal_core::*; @@ -43,64 +39,29 @@ impl HfLister { impl oio::PageList for HfLister { async fn next_page(&self, ctx: &mut oio::PageContext) -> Result<()> { - // Use the next page URL from context if available, otherwise start from beginning - let response = if ctx.token.is_empty() { - self.core.hf_list(&self.path, self.recursive, None).await? + let cursor = if ctx.token.is_empty() { + None } else { - self.core.hf_list_with_url(&ctx.token).await? + Some(ctx.token.as_str()) }; + let response = self + .core + .file_tree(&self.path, self.recursive, cursor) + .await?; - let status_code = response.status(); - if !status_code.is_success() { - let error = parse_error(response); - return Err(error); - } - - // Parse Link header for pagination - let next_link = parse_link_header(response.headers()); - - let bytes = response.into_body(); - let decoded_response: Vec = - serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; - - // Only mark as done if there's no next page - if let Some(next_url) = next_link { - ctx.token = next_url; + if let Some(next_cursor) = response.next_cursor { + ctx.token = next_cursor; } else { ctx.done = true; } - for status in decoded_response { - let entry_type = match status.type_.as_str() { - "directory" => EntryMode::DIR, - "file" => EntryMode::FILE, - _ => EntryMode::Unknown, - }; - - let mut meta = Metadata::new(entry_type); - - if let Some(commit_info) = status.last_commit.as_ref() { - meta.set_last_modified(commit_info.date.parse::()?); - } - - if entry_type == EntryMode::FILE { - meta.set_content_length(status.size); - - // Use LFS OID as ETag if available, otherwise use regular OID - let etag = if let Some(lfs) = &status.lfs { - &lfs.oid - } else { - &status.oid - }; - meta.set_etag(etag); - } - - let path = if entry_type == EntryMode::DIR { - format!("{}/", &status.path) + for info in response.files { + let meta = info.metadata()?; + let path = if meta.mode() == EntryMode::DIR { + format!("{}/", &info.path) } else { - status.path.clone() + info.path.clone() }; - ctx.entries.push_back(oio::Entry::new( &build_rel_path(&self.core.root, &path), meta, @@ -110,102 +71,3 @@ impl oio::PageList for HfLister { Ok(()) } } - -/// Parse the Link header to extract the next page URL. -/// HuggingFace API returns pagination info in the Link header with rel="next". -/// Example: ; rel="next" -fn parse_link_header(headers: &http::HeaderMap) -> Option { - let link_header = headers.get(http::header::LINK)?; - let link_str = link_header.to_str().ok()?; - - // Parse Link header format: ; rel="next" - for link in link_str.split(',') { - if link.contains("rel=\"next\"") || link.contains("rel='next'") { - // Extract URL from using split_once for cleaner parsing - let (_, rest) = link.split_once('<')?; - let (inside, _) = rest.split_once('>')?; - return Some(inside.to_string()); - } - } - - None -} - -#[cfg(test)] -mod tests { - use super::*; - use http::HeaderMap; - use http::HeaderValue; - - #[test] - fn test_parse_link_header_with_next() { - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LINK, - HeaderValue::from_static( - r#"; rel="next""#, - ), - ); - - let result = parse_link_header(&headers); - assert_eq!( - result, - Some("https://huggingface.co/api/models/test/tree/main?cursor=abc123".to_string()) - ); - } - - #[test] - fn test_parse_link_header_with_single_quotes() { - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LINK, - HeaderValue::from_static( - r#"; rel='next'"#, - ), - ); - - let result = parse_link_header(&headers); - assert_eq!( - result, - Some("https://huggingface.co/api/models/test/tree/main?cursor=xyz".to_string()) - ); - } - - #[test] - fn test_parse_link_header_without_next() { - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LINK, - HeaderValue::from_static( - r#"; rel="prev""#, - ), - ); - - let result = parse_link_header(&headers); - assert_eq!(result, None); - } - - #[test] - fn test_parse_link_header_multiple_links() { - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LINK, - HeaderValue::from_static( - r#"; rel="prev", ; rel="next""#, - ), - ); - - let result = parse_link_header(&headers); - assert_eq!( - result, - Some("https://huggingface.co/api/next?cursor=456".to_string()) - ); - } - - #[test] - fn test_parse_link_header_no_header() { - let headers = HeaderMap::new(); - let result = parse_link_header(&headers); - assert_eq!(result, None); - } -} diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs new file mode 100644 index 000000000000..bc10a537bf0b --- /dev/null +++ b/core/services/hf/src/reader.rs @@ -0,0 +1,298 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(feature = "xet")] +use std::pin::Pin; +#[cfg(feature = "xet")] +use std::sync::Arc; + +#[cfg(feature = "xet")] +use bytes::Bytes; +use http::Response; +use http::StatusCode; +use http::header; + +#[cfg(feature = "xet")] +use cas_types::FileRange; +#[cfg(feature = "xet")] +use futures::StreamExt; + +use super::core::HfCore; +#[cfg(feature = "xet")] +use super::core::{XetFile, XetTokenRefresher, map_xet_error}; +use opendal_core::raw::*; +use opendal_core::*; + +#[cfg(feature = "xet")] +type XetByteStream = + Pin> + Send + Sync>>; + +pub enum HfReader { + Http(HttpBody), + #[cfg(feature = "xet")] + Xet(XetByteStream), +} + +impl HfReader { + /// Create a reader, automatically choosing between XET and HTTP. + /// + /// When XET is enabled a HEAD request probes for the `X-Xet-Hash` + /// header. Files stored on XET are downloaded via the CAS protocol; + /// all others fall back to a regular HTTP GET. + pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { + #[cfg(feature = "xet")] + if core.xet_enabled { + if let Some(xet_file) = core.get_xet_file(path).await? { + return Self::download_xet(core, &xet_file, range).await; + } + } + + Self::download_http(core, path, range).await + } + + pub async fn download_http(core: &HfCore, path: &str, range: BytesRange) -> Result { + let client = core.info.http_client(); + let uri = core.repo.uri(&core.root, path); + let url = uri.resolve_url(&core.endpoint); + + let mut req = core.request(http::Method::GET, &url, Operation::Read); + + if !range.is_full() { + req = req.header(header::RANGE, range.to_header()); + } + + let req = req.body(Buffer::new()).map_err(new_request_build_error)?; + + let resp = client.fetch(req).await?; + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(Self::Http(resp.into_body())), + _ => { + let (part, mut body) = resp.into_parts(); + let buf = body.to_buffer().await?; + Err(super::error::parse_error(Response::from_parts(part, buf))) + } + } + } + + #[cfg(feature = "xet")] + pub async fn download_xet( + core: &HfCore, + xet_file: &XetFile, + range: BytesRange, + ) -> Result { + let token = core.get_xet_token("read").await?; + + let file_info = xet_data::XetFileInfo::new(xet_file.hash.clone(), xet_file.size); + + let file_range = if !range.is_full() { + let offset = range.offset(); + let size = range.size().unwrap_or(xet_file.size - offset); + let end = offset + size; + Some(FileRange::new(offset, end)) + } else { + None + }; + + let refresher = Arc::new(XetTokenRefresher::new(core, "read")); + + let mut streams = xet_data::data_client::download_bytes_async( + vec![file_info], + Some(vec![file_range]), + Some(token.cas_url), + Some((token.access_token, token.exp)), + Some(refresher), + None, + "opendal/1.0".to_string(), + 256, + ) + .await + .map_err(map_xet_error)?; + + let stream = streams.remove(0); + Ok(Self::Xet(Box::pin(stream))) + } +} + +impl oio::Read for HfReader { + async fn read(&mut self) -> Result { + match self { + Self::Http(body) => body.read().await, + #[cfg(feature = "xet")] + Self::Xet(stream) => match stream.next().await { + Some(Ok(bytes)) => Ok(Buffer::from(bytes)), + Some(Err(e)) => Err(map_xet_error(e)), + None => Ok(Buffer::new()), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::super::core::HfCore; + use super::super::uri::{HfRepo, RepoType}; + use super::*; + + /// Parquet magic bytes: "PAR1" + const PARQUET_MAGIC: &[u8] = b"PAR1"; + + fn testing_core(repo_type: RepoType, repo_id: &str, _xet: bool) -> HfCore { + let info = AccessorInfo::default(); + info.set_scheme("huggingface") + .set_native_capability(Capability { + read: true, + ..Default::default() + }); + + HfCore { + info: info.into(), + repo: HfRepo::new( + repo_type, + repo_id.to_string(), + Some("main".to_string()), + ), + root: "/".to_string(), + token: None, + endpoint: "https://huggingface.co".to_string(), + #[cfg(feature = "xet")] + xet_enabled: _xet, + } + } + + async fn read_all(reader: &mut HfReader) -> Vec { + use oio::Read; + + let mut buf = Vec::new(); + loop { + let chunk = reader.read().await.expect("read should succeed"); + if chunk.is_empty() { + break; + } + buf.extend_from_slice(&chunk.to_bytes()); + } + buf + } + + #[tokio::test] + async fn test_download_http_model() { + let core = testing_core(RepoType::Model, "openai-community/gpt2", false); + let mut reader = HfReader::download_http(&core, "config.json", BytesRange::default()) + .await + .expect("download should succeed"); + + let data = read_all(&mut reader).await; + serde_json::from_slice::(&data) + .expect("config.json should be valid JSON"); + } + + #[tokio::test] + #[ignore] + async fn test_download_http_dataset_parquet() { + let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", false); + let range = BytesRange::new(0, Some(4)); + let mut reader = HfReader::download_http(&core, "full/train-00000-of-00001.parquet", range) + .await + .expect("download should succeed"); + + let data = read_all(&mut reader).await; + assert_eq!(&data, PARQUET_MAGIC); + } + + #[tokio::test] + #[ignore] + async fn test_download_http_range() { + let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", false); + let range = BytesRange::new(0, Some(4)); + let mut reader = HfReader::download_http(&core, "full/train-00000-of-00001.parquet", range) + .await + .expect("range download should succeed"); + + let data = read_all(&mut reader).await; + assert_eq!(data.len(), 4); + assert_eq!(&data, PARQUET_MAGIC); + } + + #[tokio::test] + async fn test_download_dispatches_to_http() { + let core = testing_core(RepoType::Model, "openai-community/gpt2", false); + let reader = HfReader::try_new(&core, "config.json", BytesRange::default()) + .await + .expect("download should succeed"); + + assert!(matches!(reader, HfReader::Http(_))); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_download_xet_parquet() { + let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); + let xet_file = core + .get_xet_file("full/train-00000-of-00001.parquet") + .await + .expect("xet probe should succeed") + .expect("parquet file should be xet-backed"); + + let mut reader = HfReader::download_xet(&core, &xet_file, BytesRange::default()) + .await + .expect("xet download should succeed"); + + let data = read_all(&mut reader).await; + assert!(data.len() > 8); + assert_eq!(&data[..4], PARQUET_MAGIC); + assert_eq!(&data[data.len() - 4..], PARQUET_MAGIC); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_download_xet_range() { + let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); + let xet_file = core + .get_xet_file("full/train-00000-of-00001.parquet") + .await + .expect("xet probe should succeed") + .expect("parquet file should be xet-backed"); + + let range = BytesRange::new(0, Some(4)); + let mut reader = HfReader::download_xet(&core, &xet_file, range) + .await + .expect("xet range download should succeed"); + + let data = read_all(&mut reader).await; + assert_eq!(data.len(), 4); + assert_eq!(&data, PARQUET_MAGIC); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_download_dispatches_to_xet() { + let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); + let reader = HfReader::try_new( + &core, + "full/train-00000-of-00001.parquet", + BytesRange::default(), + ) + .await + .expect("download should succeed"); + + assert!(matches!(reader, HfReader::Xet(_))); + } +} diff --git a/core/services/hf/src/uri.rs b/core/services/hf/src/uri.rs new file mode 100644 index 000000000000..7cc8a5e76f1b --- /dev/null +++ b/core/services/hf/src/uri.rs @@ -0,0 +1,522 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use serde::Deserialize; +use serde::Serialize; + +use super::HUGGINGFACE_SCHEME; +use opendal_core::raw::*; + +/// Repository type of Huggingface. Supports `model`, `dataset`, and `space`. +/// [Reference](https://huggingface.co/docs/hub/repositories) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum RepoType { + #[default] + Model, + Dataset, + Space, +} + +impl RepoType { + pub fn parse(s: &str) -> opendal_core::Result { + match s.to_lowercase().replace(' ', "").as_str() { + "model" | "models" => Ok(Self::Model), + "dataset" | "datasets" => Ok(Self::Dataset), + "space" | "spaces" => Ok(Self::Space), + other => Err(opendal_core::Error::new( + opendal_core::ErrorKind::ConfigInvalid, + format!("unknown repo type: {other}"), + ) + .with_context("service", HUGGINGFACE_SCHEME)), + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::Model => "model", + Self::Dataset => "dataset", + Self::Space => "space", + } + } + + pub fn as_plural_str(&self) -> &'static str { + match self { + Self::Model => "models", + Self::Dataset => "datasets", + Self::Space => "spaces", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HfRepo { + pub repo_type: RepoType, + pub repo_id: String, + pub revision: Option, +} + +impl HfRepo { + pub fn new(repo_type: RepoType, repo_id: String, revision: Option) -> Self { + Self { + repo_type, + repo_id, + revision, + } + } + + /// Return the revision, defaulting to "main" if unset. + pub fn revision(&self) -> &str { + self.revision.as_deref().unwrap_or("main") + } + + /// Create an `HfUri` for the given root and path within this repo. + pub fn uri(&self, root: &str, path: &str) -> HfUri { + HfUri { + repo: self.clone(), + path: build_abs_path(root, path) + .trim_start_matches('/') + .trim_end_matches('/') + .to_string(), + } + } + + /// Build the paths-info API URL for this repository. + pub fn paths_info_url(&self, endpoint: &str) -> String { + format!( + "{}/api/{}/{}/paths-info/{}", + endpoint, + self.repo_type.as_plural_str(), + &self.repo_id, + percent_encode_revision(self.revision()), + ) + } + + /// Build the XET token API URL for this repository. + #[cfg(feature = "xet")] + pub fn xet_token_url(&self, endpoint: &str, token_type: &str) -> String { + format!( + "{}/api/{}/{}/xet-{}-token/{}", + endpoint, + self.repo_type.as_plural_str(), + &self.repo_id, + token_type, + self.revision(), + ) + } +} + +/// Parsed Hugging Face URI following the official format: +/// `hf://[/][@][/]` +/// +/// Use this directly when you need access to `path_in_repo` separately +/// from the config (e.g. to resolve a specific file within the repo). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HfUri { + pub repo: HfRepo, + pub path: String, +} + +impl HfUri { + /// Parse a Hugging Face path into its components. + /// Path format: `[/][@][/]` + pub fn parse(path: &str) -> opendal_core::Result { + if path.is_empty() { + return Err(opendal_core::Error::new( + opendal_core::ErrorKind::ConfigInvalid, + "repo_id is required in uri path", + ) + .with_context("service", HUGGINGFACE_SCHEME)); + } + + let mut path = path.to_string(); + + // Strip repo_type prefix if present (e.g. "datasets/user/repo" → "user/repo") + let repo_type = if let Some((first, rest)) = path.split_once('/') { + if let Ok(rt) = RepoType::parse(first) { + path = rest.to_string(); + rt + } else { + RepoType::Model + } + } else if RepoType::parse(&path).is_ok() { + return Err(opendal_core::Error::new( + opendal_core::ErrorKind::ConfigInvalid, + "repository name is required in uri path", + ) + .with_context("service", HUGGINGFACE_SCHEME)); + } else { + RepoType::Model + }; + + // Parse repo_id, revision, and path_in_repo. + // Path is now: [@][/] + let (repo_id, revision, path_in_repo) = if path.contains('/') { + // Check if @ appears in the first two segments (the repo_id portion). + // This distinguishes "user/repo@rev/file" from "user/repo/path/to/@file". + let first_two: String = path.splitn(3, '/').take(2).collect::>().join("/"); + + if first_two.contains('@') { + let (repo_id, rev_and_path) = path.split_once('@').unwrap(); + let rev_and_path = rev_and_path.replace("%2F", "/"); + let (revision, path_in_repo) = Self::parse_revision(&rev_and_path); + (repo_id.to_string(), Some(revision), path_in_repo) + } else { + let segments: Vec<_> = path.splitn(3, '/').collect(); + let repo_id = format!("{}/{}", segments[0], segments[1]); + let path_in_repo = segments.get(2).copied().unwrap_or("").to_string(); + (repo_id, None, path_in_repo) + } + } else if let Some((repo_id, rev)) = path.split_once('@') { + let rev = rev.replace("%2F", "/"); + ( + repo_id.to_string(), + if rev.is_empty() { None } else { Some(rev) }, + String::new(), + ) + } else { + (path, None, String::new()) + }; + + Ok(Self { + repo: HfRepo::new(repo_type, repo_id, revision), + path: path_in_repo, + }) + } + + /// Split a string after `@` into (revision, path_in_repo). + /// Handles special refs like `refs/convert/parquet` and `refs/pr/10`. + fn parse_revision(rev_and_path: &str) -> (String, String) { + if !rev_and_path.contains('/') { + return (rev_and_path.to_string(), String::new()); + } + + // Match special refs: refs/(convert|pr)/ + if let Some(rest) = rev_and_path.strip_prefix("refs/convert/") { + return if let Some(slash) = rest.find('/') { + ( + rev_and_path[..14 + slash].to_string(), + rest[slash + 1..].to_string(), + ) + } else { + (rev_and_path.to_string(), String::new()) + }; + } + if let Some(rest) = rev_and_path.strip_prefix("refs/pr/") { + return if let Some(slash) = rest.find('/') { + let revision = format!("refs/pr/{}", &rest[..slash]); + (revision, rest[slash + 1..].to_string()) + } else { + (rev_and_path.to_string(), String::new()) + }; + } + + // Regular revision: split on first / + let (rev, path) = rev_and_path.split_once('/').unwrap(); + (rev.to_string(), path.to_string()) + } + + /// Return the revision, defaulting to "main" if unset. + pub fn revision(&self) -> &str { + self.repo.revision() + } + + /// Build the resolve URL for this URI. + pub fn resolve_url(&self, endpoint: &str) -> String { + let revision = percent_encode_revision(self.revision()); + let path = percent_encode_path(&self.path); + match self.repo.repo_type { + RepoType::Model => { + format!( + "{}/{}/resolve/{}/{}", + endpoint, &self.repo.repo_id, revision, path + ) + } + RepoType::Dataset => { + format!( + "{}/datasets/{}/resolve/{}/{}", + endpoint, &self.repo.repo_id, revision, path + ) + } + RepoType::Space => { + format!( + "{}/spaces/{}/resolve/{}/{}", + endpoint, &self.repo.repo_id, revision, path + ) + } + } + } + + /// Build the paths-info API URL for this URI. + pub fn paths_info_url(&self, endpoint: &str) -> String { + self.repo.paths_info_url(endpoint) + } + + /// Build the file tree API URL for this URI. + pub fn file_tree_url(&self, endpoint: &str, recursive: bool, cursor: Option<&str>) -> String { + let mut url = format!( + "{}/api/{}/{}/tree/{}/{}?expand=True", + endpoint, + self.repo.repo_type.as_plural_str(), + &self.repo.repo_id, + percent_encode_revision(self.revision()), + percent_encode_path(&self.path), + ); + + if recursive { + url.push_str("&recursive=True"); + } + + if let Some(cursor_val) = cursor { + url.push_str(&format!("&cursor={}", cursor_val)); + } + + url + } + + /// Build the preupload API URL for this URI. + pub fn preupload_url(&self, endpoint: &str) -> String { + // Split repo_id into namespace and repo (e.g., "user/repo" -> "user", "repo") + let parts: Vec<&str> = self.repo.repo_id.splitn(2, '/').collect(); + let (namespace, repo) = if parts.len() == 2 { + (parts[0], parts[1]) + } else { + ("", self.repo.repo_id.as_str()) + }; + + format!( + "{}/api/{}/{}/{}/preupload/{}", + endpoint, + self.repo.repo_type.as_plural_str(), + namespace, + repo, + percent_encode_revision(self.revision()), + ) + } + + /// Build the commit API URL for this URI. + pub fn commit_url(&self, endpoint: &str) -> String { + // Split repo_id into namespace and repo (e.g., "user/repo" -> "user", "repo") + let parts: Vec<&str> = self.repo.repo_id.splitn(2, '/').collect(); + let (namespace, repo) = if parts.len() == 2 { + (parts[0], parts[1]) + } else { + // Handle case where repo_id doesn't contain a slash (shouldn't happen normally) + ("", self.repo.repo_id.as_str()) + }; + + format!( + "{}/api/{}/{}/{}/commit/{}", + endpoint, + self.repo.repo_type.as_plural_str(), + namespace, + repo, + percent_encode_revision(self.revision()), + ) + } +} + +pub(super) fn percent_encode_revision(revision: &str) -> String { + utf8_percent_encode(revision, NON_ALPHANUMERIC).to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn resolve(path: &str) -> HfUri { + HfUri::parse(path).unwrap() + } + + #[test] + fn test_repo_type_parse() { + assert_eq!(RepoType::parse("models").unwrap(), RepoType::Model); + assert_eq!(RepoType::parse("Models").unwrap(), RepoType::Model); + assert_eq!(RepoType::parse("MODELS").unwrap(), RepoType::Model); + assert_eq!(RepoType::parse("datasets").unwrap(), RepoType::Dataset); + assert_eq!(RepoType::parse("Datasets").unwrap(), RepoType::Dataset); + assert_eq!(RepoType::parse("spaces").unwrap(), RepoType::Space); + assert_eq!(RepoType::parse("Spaces").unwrap(), RepoType::Space); + assert_eq!(RepoType::parse("model").unwrap(), RepoType::Model); + assert_eq!(RepoType::parse("dataset").unwrap(), RepoType::Dataset); + assert_eq!(RepoType::parse("space").unwrap(), RepoType::Space); + assert_eq!(RepoType::parse("data sets").unwrap(), RepoType::Dataset); + assert_eq!(RepoType::parse("Data Sets").unwrap(), RepoType::Dataset); + assert!(RepoType::parse("unknown").is_err()); + assert!(RepoType::parse("foobar").is_err()); + } + + #[test] + fn resolve_with_namespace() { + let p = resolve("username/my_model"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_with_revision() { + let p = resolve("username/my_model@dev"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_datasets_prefix() { + let p = resolve("datasets/username/my_dataset"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "username/my_dataset"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_datasets_prefix_and_revision() { + let p = resolve("datasets/username/my_dataset@dev"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "username/my_dataset"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_with_path_in_repo() { + let p = resolve("username/my_model/config.json"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, "config.json"); + } + + #[test] + fn resolve_with_revision_and_path() { + let p = resolve("username/my_model@dev/path/to/file.txt"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, "path/to/file.txt"); + } + + #[test] + fn resolve_datasets_revision_and_path() { + let p = resolve("datasets/username/my_dataset@dev/train/data.csv"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "username/my_dataset"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, "train/data.csv"); + } + + #[test] + fn resolve_refs_convert_revision() { + let p = resolve("datasets/squad@refs/convert/parquet"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "squad"); + assert_eq!(p.repo.revision.as_deref(), Some("refs/convert/parquet")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_refs_pr_revision() { + let p = resolve("username/my_model@refs/pr/10"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert_eq!(p.repo.revision.as_deref(), Some("refs/pr/10")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_encoded_revision() { + let p = resolve("username/my_model@refs%2Fpr%2F10"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert_eq!(p.repo.revision.as_deref(), Some("refs/pr/10")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_at_in_path_not_revision() { + let p = resolve("username/my_model/path/to/@not-a-revision.txt"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, "path/to/@not-a-revision.txt"); + } + + #[test] + fn resolve_bare_repo_type_fails() { + assert!(HfUri::parse("datasets").is_err()); + assert!(HfUri::parse("").is_err()); + } + + #[test] + fn resolve_bare_repo_no_namespace() { + let p = resolve("gpt2"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "gpt2"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_bare_repo_with_revision() { + let p = resolve("gpt2@dev"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "gpt2"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_bare_dataset_no_namespace() { + let p = resolve("datasets/squad"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "squad"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_bare_dataset_with_revision() { + let p = resolve("datasets/squad@dev"); + assert_eq!(p.repo.repo_type, RepoType::Dataset); + assert_eq!(p.repo.repo_id, "squad"); + assert_eq!(p.repo.revision.as_deref(), Some("dev")); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_models_prefix() { + let p = resolve("models/username/my_model"); + assert_eq!(p.repo.repo_type, RepoType::Model); + assert_eq!(p.repo.repo_id, "username/my_model"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_spaces_prefix() { + let p = resolve("spaces/username/my_space"); + assert_eq!(p.repo.repo_type, RepoType::Space); + assert_eq!(p.repo.repo_id, "username/my_space"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } +} diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs new file mode 100644 index 000000000000..36431ec0d078 --- /dev/null +++ b/core/services/hf/src/writer.rs @@ -0,0 +1,348 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use base64::Engine; +use http::StatusCode; +use sha2::{Digest, Sha256}; + +use super::core::{CommitFile, HfCore, LfsFile, PreuploadFile}; +#[cfg(feature = "xet")] +use super::core::{XetTokenRefresher, map_xet_error}; +use opendal_core::raw::*; +use opendal_core::*; + +pub struct HfWriter { + core: Arc, + #[allow(dead_code)] + op: OpWrite, + path: String, +} + +impl HfWriter { + /// Create a writer. + pub fn new(core: &Arc, path: &str, op: OpWrite) -> Self { + Self { + core: core.clone(), + op, + path: path.to_string(), + } + } + + /// Determine upload mode via preupload API. + async fn determine_upload_mode(core: &HfCore, path: &str, body: &Buffer) -> Result { + let bytes = body.to_bytes(); + let size = bytes.len() as u64; + + // Compute SHA256 hash + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let sha256_hash = format!("{:x}", hasher.finalize()); + + // Get sample (first 512 bytes, base64 encoded) + let sample_size = std::cmp::min(512, bytes.len()); + let sample = base64::engine::general_purpose::STANDARD.encode(&bytes[..sample_size]); + + // Call preupload endpoint + let preupload_files = vec![PreuploadFile { + path: path.to_string(), + size, + sample, + sha256: sha256_hash, + }]; + + let preupload_resp = core.preupload_files(preupload_files).await?; + + // Get upload mode from response + let mode = preupload_resp + .files + .first() + .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files in preupload response"))? + .upload_mode + .clone(); + + Ok(mode) + } + + /// Prepare file content for HTTP storage (base64 encode for regular upload). + async fn upload_http(path: &str, body: Buffer) -> Result { + let bytes = body.to_bytes(); + let content = base64::engine::general_purpose::STANDARD.encode(bytes); + Ok(CommitFile { + path: path.to_string(), + content, + encoding: "base64".to_string(), + }) + } + + /// Upload file content to XET storage. + #[cfg(feature = "xet")] + async fn upload_xet( + core: &HfCore, + path: &str, + body: Buffer, + ) -> Result { + let bytes = body.to_bytes(); + let size = bytes.len() as u64; + + // Compute SHA256 hash for LFS OID + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let sha256_hash = format!("{:x}", hasher.finalize()); + + // Upload to XET storage + let token = core.get_xet_token("write").await?; + let refresher = Arc::new(XetTokenRefresher::new(core, "write")); + + let file_contents = vec![bytes.to_vec()]; + + let results = xet_data::data_client::upload_bytes_async( + file_contents, + Some(token.cas_url), + Some((token.access_token, token.exp)), + Some(refresher), + None, + "opendal/1.0".to_string(), + ) + .await + .map_err(map_xet_error)?; + + let _file_info = results.first().ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + "No file info returned from XET upload", + ) + })?; + + Ok(LfsFile { + path: path.to_string(), + oid: sha256_hash, + algo: "sha256".to_string(), + size, + }) + } + + /// Upload file and commit based on determined mode. + /// + /// Retries on commit conflicts (HTTP 412) and transient server errors + /// (HTTP 5xx), matching the behavior of the official HuggingFace Hub + /// client. + async fn upload_and_commit(&self, body: Buffer) -> Result { + const MAX_RETRIES: usize = 3; + + let mut last_err = None; + for _ in 0..MAX_RETRIES { + match self.try_upload_and_commit(body.clone()).await { + Ok(meta) => return Ok(meta), + Err(err) + if err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary() => + { + last_err = Some(err); + continue; + } + Err(err) => return Err(err), + } + } + Err(last_err.unwrap()) + } + + async fn try_upload_and_commit(&self, body: Buffer) -> Result { + #[cfg_attr(not(feature = "xet"), allow(unused_variables))] + let mode = Self::determine_upload_mode(&self.core, &self.path, &body).await?; + + // Prepare file based on mode + let (commit_file, lfs_file) = { + #[cfg(feature = "xet")] + { + if self.core.xet_enabled && mode == "xet" { + let lfs = Self::upload_xet(&self.core, &self.path, body).await?; + (None, Some(lfs)) + } else { + let commit = Self::upload_http(&self.path, body).await?; + (Some(commit), None) + } + } + #[cfg(not(feature = "xet"))] + { + let commit = Self::upload_http(&self.path, body).await?; + (Some(commit), None) + } + }; + + // Commit the files + let regular_files: Vec<_> = commit_file.into_iter().collect(); + let lfs_files: Vec<_> = lfs_file.into_iter().collect(); + let resp = self.core.commit_files(regular_files, lfs_files).await?; + + match resp.status() { + StatusCode::OK | StatusCode::CREATED => Ok(Metadata::default()), + _ => Err(super::error::parse_error(resp)), + } + } +} + +impl oio::OneShotWrite for HfWriter { + async fn write_once(&self, bs: Buffer) -> Result { + self.upload_and_commit(bs).await + } +} + +#[cfg(test)] +mod tests { + use super::super::core::HfCore; + use super::super::uri::{HfRepo, RepoType}; + use super::*; + use oio::OneShotWrite; + + fn testing_core(_xet: bool) -> HfCore { + let repo_id = std::env::var("HF_OPENDAL_DATASET").expect("HF_OPENDAL_DATASET must be set"); + + let info = AccessorInfo::default(); + info.set_scheme("huggingface") + .set_native_capability(Capability { + write: true, + ..Default::default() + }); + + HfCore { + info: info.into(), + repo: HfRepo::new(RepoType::Dataset, repo_id, Some("main".to_string())), + root: "/".to_string(), + token: std::env::var("HF_OPENDAL_TOKEN").ok(), + endpoint: "https://huggingface.co".to_string(), + #[cfg(feature = "xet")] + xet_enabled: _xet, + } + } + + #[tokio::test] + #[ignore] + async fn test_upload_http() { + let core = testing_core(false); + + let test_data = b"Hello, HuggingFace!"; + let buffer = Buffer::from(test_data.as_slice()); + + let commit_file = HfWriter::upload_http("test-file.txt", buffer) + .await + .expect("upload should succeed"); + + let resp = core + .commit_files(vec![commit_file], vec![]) + .await + .expect("commit should succeed"); + + assert!( + resp.status() == StatusCode::OK || resp.status() == StatusCode::CREATED, + "expected OK or CREATED status, got {}", + resp.status() + ); + } + + #[tokio::test] + #[ignore] + async fn test_write_once_http() { + let core = Arc::new(testing_core(false)); + + let test_data = b"Test content for write_once"; + let buffer = Buffer::from(test_data.as_slice()); + + let writer = HfWriter::new(&core, "write-once-test.txt", OpWrite::default()); + let result = writer.write_once(buffer).await; + + assert!(result.is_ok(), "write_once should succeed: {:?}", result); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_upload_xet() { + let core = testing_core(true); + + let test_data = b"Binary data for XET test"; + let buffer = Buffer::from(test_data.as_slice()); + + let result = HfWriter::upload_xet(&core, "test-xet.bin", buffer).await; + assert!(result.is_ok(), "xet upload should succeed: {:?}", result); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_upload_and_commit_xet() { + let core = testing_core(true); + + let test_data = b"Binary data for XET commit test"; + let buffer = Buffer::from(test_data.as_slice()); + + let lfs_file = HfWriter::upload_xet(&core, "test-xet.bin", buffer) + .await + .expect("xet upload should succeed"); + + let resp = core + .commit_files(vec![], vec![lfs_file]) + .await + .expect("commit should succeed"); + + assert!( + resp.status() == StatusCode::OK || resp.status() == StatusCode::CREATED, + "expected OK or CREATED status, got {}", + resp.status() + ); + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_write_once_dispatches_to_xet() { + let core = Arc::new(testing_core(true)); + + let test_data = b"Binary content for XET dispatch"; + let buffer = Buffer::from(test_data.as_slice()); + + let writer = HfWriter::new(&core, "test-file.bin", OpWrite::default()); + let result = writer.write_once(buffer).await; + + assert!( + result.is_ok(), + "write_once with binary file should use xet: {:?}", + result + ); + } + + #[tokio::test] + #[ignore] + async fn test_upload_with_content_type() { + let core = Arc::new(testing_core(false)); + + let test_data = br#"{"test": "data"}"#; + let buffer = Buffer::from(test_data.as_slice()); + + let mut op = OpWrite::default(); + op = op.with_content_type("application/json"); + + let writer = HfWriter::new(&core, "test.json", op); + let result = writer.write_once(buffer).await; + + assert!( + result.is_ok(), + "upload with content type should succeed: {:?}", + result + ); + } +} From 15ae2660ed7ae666106e9a056ce784d58e06011d Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 9 Feb 2026 23:24:30 +0100 Subject: [PATCH 02/25] style(hf): cargo and taplo fmt --- core/services/hf/Cargo.toml | 28 ++++++++++++++++------------ core/services/hf/src/reader.rs | 6 +----- core/services/hf/src/writer.rs | 10 ++-------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index b274098c0cf7..b943163b86f8 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -33,13 +33,13 @@ all-features = true [features] default = [] xet = [ - "dep:reqwest", - "dep:xet-data", - "dep:cas_types", - "dep:xet-utils", - "dep:tokio", - "dep:futures", - "dep:async-trait", + "dep:reqwest", + "dep:xet-data", + "dep:cas_types", + "dep:xet-utils", + "dep:tokio", + "dep:futures", + "dep:async-trait", ] [dependencies] @@ -55,19 +55,23 @@ sha2 = "0.10" tempfile = "3" # XET storage protocol support (optional) -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"], optional = true } +reqwest = { version = "0.12", default-features = false, features = [ + "rustls-tls", +], optional = true } # xet-data = { package = "data", git = "https://github.com/huggingface/xet-core", optional = true } # cas_types = { git = "https://github.com/huggingface/xet-core", optional = true } # xet-utils = { package = "utils", git = "https://github.com/huggingface/xet-core", optional = true } -xet-data = { package = "data", path = "/Users/kszucs/Workspace/xet-core/data", optional = true } +async-trait = { version = "0.1", optional = true } cas_types = { path = "/Users/kszucs/Workspace/xet-core/cas_types", optional = true } -xet-utils = { package = "utils", path = "/Users/kszucs/Workspace/xet-core/utils", optional = true } futures = { workspace = true, optional = true } -async-trait = { version = "0.1", optional = true } tokio = { workspace = true, features = ["sync", "rt"], optional = true } +xet-data = { package = "data", path = "/Users/kszucs/Workspace/xet-core/data", optional = true } +xet-utils = { package = "utils", path = "/Users/kszucs/Workspace/xet-core/utils", optional = true } [dev-dependencies] futures = { workspace = true } -opendal-core = { path = "../../core", version = "0.55.0", features = ["reqwest-rustls-tls"] } +opendal-core = { path = "../../core", version = "0.55.0", features = [ + "reqwest-rustls-tls", +] } serde_json = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index bc10a537bf0b..79ffcffd0f26 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -162,11 +162,7 @@ mod tests { HfCore { info: info.into(), - repo: HfRepo::new( - repo_type, - repo_id.to_string(), - Some("main".to_string()), - ), + repo: HfRepo::new(repo_type, repo_id.to_string(), Some("main".to_string())), root: "/".to_string(), token: None, endpoint: "https://huggingface.co".to_string(), diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 36431ec0d078..3ba955527e11 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -92,11 +92,7 @@ impl HfWriter { /// Upload file content to XET storage. #[cfg(feature = "xet")] - async fn upload_xet( - core: &HfCore, - path: &str, - body: Buffer, - ) -> Result { + async fn upload_xet(core: &HfCore, path: &str, body: Buffer) -> Result { let bytes = body.to_bytes(); let size = bytes.len() as u64; @@ -149,9 +145,7 @@ impl HfWriter { for _ in 0..MAX_RETRIES { match self.try_upload_and_commit(body.clone()).await { Ok(meta) => return Ok(meta), - Err(err) - if err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary() => - { + Err(err) if err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary() => { last_err = Some(err); continue; } From 44a3682bb3be1f2ad4c9f2b3fd78fa671201f243 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 9 Feb 2026 23:27:18 +0100 Subject: [PATCH 03/25] build(hf): switch to use github branch as xet-core optional dependency --- core/Cargo.lock | 15 +++++++++++++++ core/services/hf/Cargo.toml | 13 +++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index fab12d63dcf0..5a0f8bec7158 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1552,6 +1552,7 @@ dependencies = [ [[package]] name = "cas_client" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "anyhow", "async-trait", @@ -1600,6 +1601,7 @@ dependencies = [ [[package]] name = "cas_object" version = "0.1.0" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "anyhow", "blake3", @@ -1626,6 +1628,7 @@ dependencies = [ [[package]] name = "cas_types" version = "0.1.0" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "merklehash", "serde", @@ -2574,6 +2577,7 @@ dependencies = [ [[package]] name = "data" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "anyhow", "async-trait", @@ -2618,6 +2622,7 @@ checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "deduplication" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "async-trait", "bytes", @@ -3173,6 +3178,7 @@ dependencies = [ [[package]] name = "error_printer" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "tracing", ] @@ -3345,6 +3351,7 @@ dependencies = [ [[package]] name = "file_reconstruction" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "async-trait", "bytes", @@ -3364,6 +3371,7 @@ dependencies = [ [[package]] name = "file_utils" version = "0.14.2" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "colored", "lazy_static", @@ -4572,6 +4580,7 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hub_client" version = "0.1.0" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "anyhow", "async-trait", @@ -5748,6 +5757,7 @@ dependencies = [ [[package]] name = "mdb_shard" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "anyhow", "async-trait", @@ -5828,6 +5838,7 @@ dependencies = [ [[package]] name = "merklehash" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "base64 0.22.1", "blake3", @@ -8568,6 +8579,7 @@ dependencies = [ [[package]] name = "progress_tracking" version = "0.1.0" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "async-trait", "merklehash", @@ -12250,6 +12262,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utils" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "async-trait", "bincode", @@ -13434,6 +13447,7 @@ dependencies = [ [[package]] name = "xet_config" version = "0.14.5" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "const-str", "konst", @@ -13443,6 +13457,7 @@ dependencies = [ [[package]] name = "xet_runtime" version = "0.1.0" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" dependencies = [ "dirs", "error_printer", diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index b943163b86f8..303a305cffc3 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -55,18 +55,15 @@ sha2 = "0.10" tempfile = "3" # XET storage protocol support (optional) +async-trait = { version = "0.1", optional = true } +cas_types = { git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } +futures = { workspace = true, optional = true } reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", ], optional = true } -# xet-data = { package = "data", git = "https://github.com/huggingface/xet-core", optional = true } -# cas_types = { git = "https://github.com/huggingface/xet-core", optional = true } -# xet-utils = { package = "utils", git = "https://github.com/huggingface/xet-core", optional = true } -async-trait = { version = "0.1", optional = true } -cas_types = { path = "/Users/kszucs/Workspace/xet-core/cas_types", optional = true } -futures = { workspace = true, optional = true } tokio = { workspace = true, features = ["sync", "rt"], optional = true } -xet-data = { package = "data", path = "/Users/kszucs/Workspace/xet-core/data", optional = true } -xet-utils = { package = "utils", path = "/Users/kszucs/Workspace/xet-core/utils", optional = true } +xet-data = { package = "data", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } +xet-utils = { package = "utils", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } [dev-dependencies] futures = { workspace = true } From 3fdcfb14293987240c48ed0ab74910e340d0adaa Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 9 Feb 2026 23:50:00 +0100 Subject: [PATCH 04/25] fix(hf): support config from options --- .github/workflows/ci_core.yml | 2 +- core/services/hf/src/config.rs | 69 +++++++++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci_core.yml b/.github/workflows/ci_core.yml index 26466107755f..09d6f46ea66e 100644 --- a/.github/workflows/ci_core.yml +++ b/.github/workflows/ci_core.yml @@ -94,7 +94,7 @@ jobs: cargo update zerofrom --precise 0.1.5 cargo update idna_adapter --precise 1.2.0 cargo update litemap --precise 0.7.4 - cargo update ctor --precise 0.6.1 + cargo update ctor@0.6.3 --precise 0.6.1 cargo +${OPENDAL_MSRV} clippy -- -D warnings build_default_features: diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index d174db75aa14..d4915a86e2cb 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -78,9 +78,11 @@ impl opendal_core::Configurator for HfConfig { type Builder = HfBuilder; fn from_uri(uri: &opendal_core::OperatorUri) -> opendal_core::Result { + let opts = uri.options(); + // Reconstruct the full path from authority (name) and root. - // OperatorUri splits "hf://datasets/user/repo" into - // name="datasets" and root="user/repo". + // OperatorUri splits "hf://datasets/user/repo@rev/path" into + // name="datasets" and root="user/repo@rev/path". let mut path = String::new(); if let Some(name) = uri.name() { if !name.is_empty() { @@ -96,13 +98,35 @@ impl opendal_core::Configurator for HfConfig { } } - let parsed = HfUri::parse(&path)?; - Ok(Self { - repo_type: parsed.repo.repo_type, - repo_id: Some(parsed.repo.repo_id), - revision: parsed.repo.revision, - ..Default::default() - }) + if !path.is_empty() { + // Full URI like "hf://datasets/user/repo@rev/path" + let parsed = HfUri::parse(&path)?; + Ok(Self { + repo_type: parsed.repo.repo_type, + repo_id: Some(parsed.repo.repo_id), + revision: parsed.repo.revision, + token: opts.get("token").cloned(), + endpoint: opts.get("endpoint").cloned(), + ..Default::default() + }) + } else { + // Bare scheme from via_iter, all config is in options. + let repo_type = opts + .get("repo_type") + .map(|s| RepoType::parse(s)) + .transpose()? + .unwrap_or_default(); + Ok(Self { + repo_type, + repo_id: opts.get("repo_id").cloned(), + revision: opts.get("revision").cloned(), + root: opts.get("root").cloned(), + token: opts.get("token").cloned(), + endpoint: opts.get("endpoint").cloned(), + xet: opts.get("xet").is_some_and(|v| v == "true"), + ..Default::default() + }) + } } fn into_builder(self) -> Self::Builder { @@ -131,4 +155,31 @@ mod tests { assert_eq!(cfg.revision.as_deref(), Some("dev")); assert!(cfg.root.is_none()); } + + #[test] + fn from_uri_via_iter_options() { + use opendal_core::Configurator; + use opendal_core::OperatorUri; + + // Simulates the via_iter path: bare scheme with options map. + let uri = OperatorUri::new( + "huggingface", + vec![ + ("repo_type".to_string(), "dataset".to_string()), + ( + "repo_id".to_string(), + "opendal/huggingface-testdata".to_string(), + ), + ("revision".to_string(), "main".to_string()), + ("root".to_string(), "/testdata/".to_string()), + ], + ) + .unwrap(); + + let cfg = HfConfig::from_uri(&uri).unwrap(); + assert_eq!(cfg.repo_type, RepoType::Dataset); + assert_eq!(cfg.repo_id.as_deref(), Some("opendal/huggingface-testdata")); + assert_eq!(cfg.revision.as_deref(), Some("main")); + assert_eq!(cfg.root.as_deref(), Some("/testdata/")); + } } From 3e0c68403f8bf651ebdd23187012fe95ba6efeac Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 9 Feb 2026 23:58:27 +0100 Subject: [PATCH 05/25] fix(hf): only enable write capability if token is provided --- core/services/hf/src/backend.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 629dc00ecb85..3e4f87f07c10 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -179,7 +179,7 @@ impl Builder for HfBuilder { .set_native_capability(Capability { stat: true, read: true, - write: true, + write: token.is_some(), list: true, list_with_recursive: true, shared: true, From c9921eb6a0e07af1e455da242819c33c28e0deac Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 10 Feb 2026 09:19:22 +0100 Subject: [PATCH 06/25] feat(hf): add support for deleting files --- core/services/hf/src/backend.rs | 11 +++- core/services/hf/src/core.rs | 47 +++++++------- core/services/hf/src/deleter.rs | 107 ++++++++++++++++++++++++++++++++ core/services/hf/src/docs.md | 2 +- core/services/hf/src/lib.rs | 1 + core/services/hf/src/writer.rs | 33 +++------- 6 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 core/services/hf/src/deleter.rs diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 3e4f87f07c10..762586fddc86 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -22,6 +22,7 @@ use log::debug; use super::HF_SCHEME; use super::config::HfConfig; use super::core::HfCore; +use super::deleter::HfDeleter; use super::lister::HfLister; use super::reader::HfReader; use super::uri::{HfRepo, RepoType}; @@ -180,6 +181,7 @@ impl Builder for HfBuilder { stat: true, read: true, write: token.is_some(), + delete: token.is_some(), list: true, list_with_recursive: true, shared: true, @@ -212,7 +214,7 @@ impl Access for HfBackend { type Reader = HfReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; - type Deleter = (); + type Deleter = oio::OneShotDeleter; fn info(&self) -> Arc { self.core.info.clone() @@ -243,6 +245,13 @@ impl Access for HfBackend { let writer = HfWriter::new(&self.core, path, args); Ok((RpWrite::default(), oio::OneShotWriter::new(writer))) } + + async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { + Ok(( + RpDelete::default(), + oio::OneShotDeleter::new(HfDeleter::new(self.core.clone())), + )) + } } #[cfg(test)] diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index adfc3ee02272..ac45652f86ac 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use bytes::Buf; use bytes::Bytes; use http::Request; +use http::StatusCode; use http::header; use serde::Deserialize; @@ -63,6 +64,11 @@ pub(super) struct LfsFile { pub size: u64, } +#[derive(Debug, serde::Serialize)] +pub(super) struct DeletedFile { + pub path: String, +} + #[derive(serde::Serialize)] pub(super) struct MixedCommitPayload { pub summary: String, @@ -70,6 +76,8 @@ pub(super) struct MixedCommitPayload { pub files: Vec, #[serde(rename = "lfsFiles", skip_serializing_if = "Vec::is_empty")] pub lfs_files: Vec, + #[serde(rename = "deletedFiles", skip_serializing_if = "Vec::is_empty")] + pub deleted_files: Vec, } // API response types @@ -381,46 +389,37 @@ impl HfCore { Ok(resp) } - /// Commit uploaded files to the repository. + /// Commit file changes (uploads and/or deletions) to the repository. pub(super) async fn commit_files( &self, regular_files: Vec, lfs_files: Vec, - ) -> Result> { + deleted_files: Vec, + ) -> Result<()> { let _token = self.token.as_deref().ok_or_else(|| { Error::new( ErrorKind::PermissionDenied, - "token is required for write operations", + "token is required for commit operations", ) .with_operation("commit") })?; - let mut summary_paths = Vec::new(); - for file in ®ular_files { - summary_paths.push(file.path.clone()); - } - for file in &lfs_files { - summary_paths.push(file.path.clone()); - } - - let summary = if summary_paths.len() == 1 { - format!("Upload {} via OpenDAL", summary_paths[0]) - } else { - format!("Upload {} files via OpenDAL", summary_paths.len()) - }; - - let client = self.info.http_client(); - // Use the first file's path to determine the commit URL - let first_path = summary_paths + let first_path = regular_files .first() + .map(|f| f.path.as_str()) + .or_else(|| lfs_files.first().map(|f| f.path.as_str())) + .or_else(|| deleted_files.first().map(|f| f.path.as_str())) .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to commit"))?; + + let client = self.info.http_client(); let uri = self.repo.uri(&self.root, first_path); let url = uri.commit_url(&self.endpoint); let payload = MixedCommitPayload { - summary, + summary: "Commit via OpenDAL".to_string(), files: regular_files, lfs_files, + deleted_files, }; let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; @@ -432,7 +431,11 @@ impl HfCore { .body(Buffer::from(json_body)) .map_err(new_request_build_error)?; - client.send(req).await + let resp = client.send(req).await?; + match resp.status() { + StatusCode::OK | StatusCode::CREATED => Ok(()), + _ => Err(parse_error(resp)), + } } } diff --git a/core/services/hf/src/deleter.rs b/core/services/hf/src/deleter.rs new file mode 100644 index 000000000000..8d6cd15e0c88 --- /dev/null +++ b/core/services/hf/src/deleter.rs @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use super::core::{DeletedFile, HfCore}; +use opendal_core::raw::*; +use opendal_core::*; + +pub struct HfDeleter { + core: Arc, +} + +impl HfDeleter { + pub fn new(core: Arc) -> Self { + Self { core } + } +} + +impl oio::OneShotDelete for HfDeleter { + async fn delete_once(&self, path: String, _: OpDelete) -> Result<()> { + let deleted = vec![DeletedFile { path }]; + match self.core.commit_files(vec![], vec![], deleted).await { + Ok(()) => Ok(()), + Err(err) if err.kind() == ErrorKind::NotFound => Ok(()), + Err(err) => Err(err), + } + } +} + +#[cfg(test)] +mod tests { + use super::super::core::HfCore; + use super::super::uri::{HfRepo, RepoType}; + use super::super::writer::HfWriter; + use super::*; + use oio::OneShotDelete; + use oio::OneShotWrite; + + fn testing_core() -> HfCore { + let repo_id = std::env::var("HF_OPENDAL_DATASET").expect("HF_OPENDAL_DATASET must be set"); + + let info = AccessorInfo::default(); + info.set_scheme("huggingface") + .set_native_capability(Capability { + write: true, + delete: true, + ..Default::default() + }); + + HfCore { + info: info.into(), + repo: HfRepo::new(RepoType::Dataset, repo_id, Some("main".to_string())), + root: "/".to_string(), + token: std::env::var("HF_OPENDAL_TOKEN").ok(), + endpoint: "https://huggingface.co".to_string(), + #[cfg(feature = "xet")] + xet_enabled: false, + } + } + + #[tokio::test] + #[ignore] + async fn test_delete_once() { + let core = Arc::new(testing_core()); + + // First write a file so we have something to delete + let writer = HfWriter::new(&core, "delete-test.txt", OpWrite::default()); + writer + .write_once(Buffer::from("temporary content")) + .await + .expect("write should succeed"); + + // Now delete it + let deleter = HfDeleter::new(core); + deleter + .delete_once("delete-test.txt".to_string(), OpDelete::default()) + .await + .expect("delete should succeed"); + } + + #[tokio::test] + #[ignore] + async fn test_delete_nonexistent() { + let core = Arc::new(testing_core()); + + let deleter = HfDeleter::new(core); + deleter + .delete_once("nonexistent-file.txt".to_string(), OpDelete::default()) + .await + .expect("deleting nonexistent file should succeed"); + } +} diff --git a/core/services/hf/src/docs.md b/core/services/hf/src/docs.md index 5564419c1f29..f7620f0cd241 100644 --- a/core/services/hf/src/docs.md +++ b/core/services/hf/src/docs.md @@ -12,7 +12,7 @@ This service can be used to: - [x] stat - [x] read - [x] write -- [ ] delete +- [x] delete - [x] list - [ ] copy - [ ] rename diff --git a/core/services/hf/src/lib.rs b/core/services/hf/src/lib.rs index 018b8feb4675..0d4adbac9f1b 100644 --- a/core/services/hf/src/lib.rs +++ b/core/services/hf/src/lib.rs @@ -35,6 +35,7 @@ mod lister; mod reader; mod writer; mod uri; +mod deleter; pub use backend::HfBuilder as Hf; pub use config::HfConfig; diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 3ba955527e11..0d378e0eddbc 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -18,12 +18,11 @@ use std::sync::Arc; use base64::Engine; -use http::StatusCode; use sha2::{Digest, Sha256}; -use super::core::{CommitFile, HfCore, LfsFile, PreuploadFile}; +use super::core::{CommitFile, HfCore, PreuploadFile}; #[cfg(feature = "xet")] -use super::core::{XetTokenRefresher, map_xet_error}; +use super::core::{LfsFile, XetTokenRefresher, map_xet_error}; use opendal_core::raw::*; use opendal_core::*; @@ -181,12 +180,10 @@ impl HfWriter { // Commit the files let regular_files: Vec<_> = commit_file.into_iter().collect(); let lfs_files: Vec<_> = lfs_file.into_iter().collect(); - let resp = self.core.commit_files(regular_files, lfs_files).await?; - - match resp.status() { - StatusCode::OK | StatusCode::CREATED => Ok(Metadata::default()), - _ => Err(super::error::parse_error(resp)), - } + self.core + .commit_files(regular_files, lfs_files, vec![]) + .await?; + Ok(Metadata::default()) } } @@ -236,16 +233,9 @@ mod tests { .await .expect("upload should succeed"); - let resp = core - .commit_files(vec![commit_file], vec![]) + core.commit_files(vec![commit_file], vec![], vec![]) .await .expect("commit should succeed"); - - assert!( - resp.status() == StatusCode::OK || resp.status() == StatusCode::CREATED, - "expected OK or CREATED status, got {}", - resp.status() - ); } #[tokio::test] @@ -288,16 +278,9 @@ mod tests { .await .expect("xet upload should succeed"); - let resp = core - .commit_files(vec![], vec![lfs_file]) + core.commit_files(vec![], vec![lfs_file], vec![]) .await .expect("commit should succeed"); - - assert!( - resp.status() == StatusCode::OK || resp.status() == StatusCode::CREATED, - "expected OK or CREATED status, got {}", - resp.status() - ); } #[cfg(feature = "xet")] From 219a787d162606db17e7135417459d863f5ce365 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 10 Feb 2026 09:28:59 +0100 Subject: [PATCH 07/25] feat(hf): implement BatchDelete instead of OneShotDelete --- core/services/hf/src/backend.rs | 8 +++-- core/services/hf/src/core.rs | 2 +- core/services/hf/src/deleter.rs | 58 +++++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 762586fddc86..47b0159eaaf8 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -182,6 +182,7 @@ impl Builder for HfBuilder { read: true, write: token.is_some(), delete: token.is_some(), + delete_max_size: Some(100), list: true, list_with_recursive: true, shared: true, @@ -214,7 +215,7 @@ impl Access for HfBackend { type Reader = HfReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; - type Deleter = oio::OneShotDeleter; + type Deleter = oio::BatchDeleter; fn info(&self) -> Arc { self.core.info.clone() @@ -249,7 +250,10 @@ impl Access for HfBackend { async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { Ok(( RpDelete::default(), - oio::OneShotDeleter::new(HfDeleter::new(self.core.clone())), + oio::BatchDeleter::new( + HfDeleter::new(self.core.clone()), + self.core.info.full_capability().delete_max_size, + ), )) } } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index ac45652f86ac..66bc64899fea 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -64,7 +64,7 @@ pub(super) struct LfsFile { pub size: u64, } -#[derive(Debug, serde::Serialize)] +#[derive(Clone, Debug, serde::Serialize)] pub(super) struct DeletedFile { pub path: String, } diff --git a/core/services/hf/src/deleter.rs b/core/services/hf/src/deleter.rs index 8d6cd15e0c88..0c62fa87c6c5 100644 --- a/core/services/hf/src/deleter.rs +++ b/core/services/hf/src/deleter.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use super::core::{DeletedFile, HfCore}; +use opendal_core::raw::oio::BatchDeleteResult; use opendal_core::raw::*; use opendal_core::*; @@ -29,12 +30,9 @@ impl HfDeleter { pub fn new(core: Arc) -> Self { Self { core } } -} -impl oio::OneShotDelete for HfDeleter { - async fn delete_once(&self, path: String, _: OpDelete) -> Result<()> { - let deleted = vec![DeletedFile { path }]; - match self.core.commit_files(vec![], vec![], deleted).await { + async fn commit_delete(&self, deleted_files: Vec) -> Result<()> { + match self.core.commit_files(vec![], vec![], deleted_files).await { Ok(()) => Ok(()), Err(err) if err.kind() == ErrorKind::NotFound => Ok(()), Err(err) => Err(err), @@ -42,13 +40,32 @@ impl oio::OneShotDelete for HfDeleter { } } +impl oio::BatchDelete for HfDeleter { + async fn delete_once(&self, path: String, _: OpDelete) -> Result<()> { + self.commit_delete(vec![DeletedFile { path }]).await + } + + async fn delete_batch(&self, batch: Vec<(String, OpDelete)>) -> Result { + let deleted_files: Vec = batch + .iter() + .map(|(path, _)| DeletedFile { path: path.clone() }) + .collect(); + + self.commit_delete(deleted_files).await?; + Ok(BatchDeleteResult { + succeeded: batch, + failed: vec![], + }) + } +} + #[cfg(test)] mod tests { use super::super::core::HfCore; use super::super::uri::{HfRepo, RepoType}; use super::super::writer::HfWriter; use super::*; - use oio::OneShotDelete; + use oio::BatchDelete; use oio::OneShotWrite; fn testing_core() -> HfCore { @@ -104,4 +121,33 @@ mod tests { .await .expect("deleting nonexistent file should succeed"); } + + #[tokio::test] + #[ignore] + async fn test_delete_batch() { + let core = Arc::new(testing_core()); + + // Write two files + for name in &["batch-del-1.txt", "batch-del-2.txt"] { + let writer = HfWriter::new(&core, name, OpWrite::default()); + writer + .write_once(Buffer::from("temp")) + .await + .expect("write should succeed"); + } + + // Batch delete them in a single commit + let deleter = HfDeleter::new(core); + let batch = vec![ + ("batch-del-1.txt".to_string(), OpDelete::default()), + ("batch-del-2.txt".to_string(), OpDelete::default()), + ]; + let result = deleter + .delete_batch(batch) + .await + .expect("batch delete should succeed"); + + assert_eq!(result.succeeded.len(), 2); + assert!(result.failed.is_empty()); + } } From 82d5de4df84bc07e24831557cd24aabb2e079f88 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 10 Feb 2026 09:54:19 +0100 Subject: [PATCH 08/25] feat(hf): retry requests by default --- core/services/hf/src/backend.rs | 22 ++++++--- core/services/hf/src/config.rs | 5 +++ core/services/hf/src/core.rs | 80 ++++++++++++++++++++++++--------- core/services/hf/src/deleter.rs | 1 + core/services/hf/src/reader.rs | 1 + core/services/hf/src/writer.rs | 23 +--------- 6 files changed, 82 insertions(+), 50 deletions(-) diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 47b0159eaaf8..9ee49c266666 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -129,6 +129,15 @@ impl HfBuilder { self.config.xet = xet; self } + + /// Set the maximum number of retries for commit operations. + /// + /// Retries on commit conflicts (HTTP 412) and transient server + /// errors (HTTP 5xx). Default is 3. + pub fn max_retries(mut self, max_retries: usize) -> Self { + self.config.max_retries = Some(max_retries); + self + } } impl Builder for HfBuilder { @@ -198,6 +207,7 @@ impl Builder for HfBuilder { root, token, endpoint, + max_retries: self.config.max_retries.unwrap_or(3), #[cfg(feature = "xet")] xet_enabled: self.config.xet, }), @@ -237,9 +247,8 @@ impl Access for HfBackend { } async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let l = HfLister::new(self.core.clone(), path.to_string(), args.recursive()); - - Ok((RpList::default(), oio::PageLister::new(l))) + let lister = HfLister::new(self.core.clone(), path.to_string(), args.recursive()); + Ok((RpList::default(), oio::PageLister::new(lister))) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -248,12 +257,11 @@ impl Access for HfBackend { } async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { + let deleter = HfDeleter::new(self.core.clone()); + let delete_max_size = self.core.info.full_capability().delete_max_size; Ok(( RpDelete::default(), - oio::BatchDeleter::new( - HfDeleter::new(self.core.clone()), - self.core.info.full_capability().delete_max_size, - ), + oio::BatchDeleter::new(deleter, delete_max_size), )) } } diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index d4915a86e2cb..1bba36f37dc3 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -61,6 +61,11 @@ pub struct HfConfig { /// check for XET-backed files and use the XET protocol for /// downloading. Default is false. pub xet: bool, + /// Maximum number of retries for commit operations. + /// + /// Retries on commit conflicts (HTTP 412) and transient server + /// errors (HTTP 5xx). Default is 3. + pub max_retries: Option, } impl Debug for HfConfig { diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 66bc64899fea..3f48604a21be 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -21,7 +21,6 @@ use std::sync::Arc; use bytes::Buf; use bytes::Bytes; use http::Request; -use http::StatusCode; use http::header; use serde::Deserialize; @@ -184,6 +183,7 @@ pub struct HfCore { pub root: String, pub token: Option, pub endpoint: String, + pub max_retries: usize, #[cfg(feature = "xet")] pub xet_enabled: bool, @@ -238,17 +238,42 @@ impl HfCore { /// /// Returns the response parts (status, headers, etc.) alongside the /// deserialized body so callers can inspect headers when needed. + /// + /// When `max_retries` > 1, retries on commit conflicts (HTTP 412) and + /// transient server errors (HTTP 5xx), matching the behavior of the + /// official HuggingFace Hub client. async fn send_request( &self, req: Request, + max_retries: usize, ) -> Result<(http::response::Parts, T)> { - let resp = self.info.http_client().send(req).await?; - if !resp.status().is_success() { - return Err(parse_error(resp)); + let client = self.info.http_client(); + let mut attempt = 0; + loop { + match client.send(req.clone()).await { + Ok(resp) if resp.status().is_success() => { + let (parts, body) = resp.into_parts(); + let parsed = serde_json::from_reader(body.reader()) + .map_err(new_json_deserialize_error)?; + return Ok((parts, parsed)); + } + Ok(resp) => { + attempt += 1; + let err = parse_error(resp); + let retryable = + err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary(); + if attempt >= max_retries || !retryable { + return Err(err); + } + } + Err(err) => { + attempt += 1; + if attempt >= max_retries || !err.is_temporary() { + return Err(err); + } + } + } } - let (parts, body) = resp.into_parts(); - let parsed = serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; - Ok((parts, parsed)) } pub async fn path_info(&self, path: &str) -> Result { @@ -261,7 +286,7 @@ impl HfCore { .header(header::CONTENT_TYPE, "application/x-www-form-urlencoded") .body(Buffer::from(Bytes::from(form_body))) .map_err(new_request_build_error)?; - let (_, mut files) = self.send_request::>(req).await?; + let (_, mut files) = self.send_request::>(req, 1).await?; // NOTE: if the file is not found, the server will return 200 with an empty array if files.is_empty() { @@ -284,7 +309,7 @@ impl HfCore { .request(http::Method::GET, &url, Operation::List) .body(Buffer::new()) .map_err(new_request_build_error)?; - let (parts, files) = self.send_request::>(req).await?; + let (parts, files) = self.send_request::>(req, 1).await?; let next_cursor = parts .headers @@ -302,16 +327,17 @@ impl HfCore { .request(http::Method::GET, &url, Operation::Read) .body(Buffer::new()) .map_err(new_request_build_error)?; - let (_, token) = self.send_request(req).await?; + let (_, token) = self.send_request(req, 1).await?; Ok(token) } /// Issue a HEAD request and extract XET file info (hash and size). /// - /// Uses a custom HTTP client that does NOT follow redirects so we can - /// inspect response headers (e.g. `X-Xet-Hash`) from the 302 response. - /// /// Returns `None` if the `X-Xet-Hash` header is absent or empty. + /// + /// NOTE: Cannot use `send_request` here because we need a custom + /// no-redirect HTTP client to inspect headers (e.g. `X-Xet-Hash`) + /// from the 302 response, and the response is not JSON. #[cfg(feature = "xet")] pub(super) async fn get_xet_file(&self, path: &str) -> Result> { let uri = self.repo.uri(&self.root, path); @@ -330,7 +356,17 @@ impl HfCore { .body(Buffer::new()) .map_err(new_request_build_error)?; - let resp = client.send(req).await?; + // Retry on transient errors, same as send_request. + let mut attempt = 0; + let resp = loop { + let resp = client.send(req.clone()).await?; + + attempt += 1; + let retryable = resp.status().is_server_error(); + if attempt >= self.max_retries || !retryable { + break resp; + } + }; let hash = resp .headers() @@ -385,11 +421,15 @@ impl HfCore { .body(Buffer::from(json_body)) .map_err(new_request_build_error)?; - let (_, resp) = self.send_request(req).await?; + let (_, resp) = self.send_request(req, 1).await?; Ok(resp) } /// Commit file changes (uploads and/or deletions) to the repository. + /// + /// Retries on commit conflicts (HTTP 412) and transient server errors + /// (HTTP 5xx), matching the behavior of the official HuggingFace Hub + /// client. pub(super) async fn commit_files( &self, regular_files: Vec, @@ -411,7 +451,6 @@ impl HfCore { .or_else(|| deleted_files.first().map(|f| f.path.as_str())) .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to commit"))?; - let client = self.info.http_client(); let uri = self.repo.uri(&self.root, first_path); let url = uri.commit_url(&self.endpoint); @@ -431,11 +470,9 @@ impl HfCore { .body(Buffer::from(json_body)) .map_err(new_request_build_error)?; - let resp = client.send(req).await?; - match resp.status() { - StatusCode::OK | StatusCode::CREATED => Ok(()), - _ => Err(parse_error(resp)), - } + self.send_request::(req, self.max_retries) + .await?; + Ok(()) } } @@ -537,6 +574,7 @@ pub(crate) mod test_utils { root: "/".to_string(), token: None, endpoint: endpoint.to_string(), + max_retries: 3, #[cfg(feature = "xet")] xet_enabled: false, }; diff --git a/core/services/hf/src/deleter.rs b/core/services/hf/src/deleter.rs index 0c62fa87c6c5..d5dc5d7907b3 100644 --- a/core/services/hf/src/deleter.rs +++ b/core/services/hf/src/deleter.rs @@ -85,6 +85,7 @@ mod tests { root: "/".to_string(), token: std::env::var("HF_OPENDAL_TOKEN").ok(), endpoint: "https://huggingface.co".to_string(), + max_retries: 3, #[cfg(feature = "xet")] xet_enabled: false, } diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index 79ffcffd0f26..b219a2037ed2 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -166,6 +166,7 @@ mod tests { root: "/".to_string(), token: None, endpoint: "https://huggingface.co".to_string(), + max_retries: 3, #[cfg(feature = "xet")] xet_enabled: _xet, } diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 0d378e0eddbc..285f5a1180dc 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -132,29 +132,7 @@ impl HfWriter { }) } - /// Upload file and commit based on determined mode. - /// - /// Retries on commit conflicts (HTTP 412) and transient server errors - /// (HTTP 5xx), matching the behavior of the official HuggingFace Hub - /// client. async fn upload_and_commit(&self, body: Buffer) -> Result { - const MAX_RETRIES: usize = 3; - - let mut last_err = None; - for _ in 0..MAX_RETRIES { - match self.try_upload_and_commit(body.clone()).await { - Ok(meta) => return Ok(meta), - Err(err) if err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary() => { - last_err = Some(err); - continue; - } - Err(err) => return Err(err), - } - } - Err(last_err.unwrap()) - } - - async fn try_upload_and_commit(&self, body: Buffer) -> Result { #[cfg_attr(not(feature = "xet"), allow(unused_variables))] let mode = Self::determine_upload_mode(&self.core, &self.path, &body).await?; @@ -216,6 +194,7 @@ mod tests { root: "/".to_string(), token: std::env::var("HF_OPENDAL_TOKEN").ok(), endpoint: "https://huggingface.co".to_string(), + max_retries: 3, #[cfg(feature = "xet")] xet_enabled: _xet, } From d212bd3a0c8f6bfc751fbb9855f5b7a42a1819f6 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 10 Feb 2026 14:56:36 +0100 Subject: [PATCH 09/25] feat(hf): support multiple upload modes and more thorough testing --- core/services/hf/Cargo.toml | 6 +- core/services/hf/src/backend.rs | 260 ++++++++-- core/services/hf/src/core.rs | 525 +++++--------------- core/services/hf/src/deleter.rs | 90 ++-- core/services/hf/src/lister.rs | 110 ++++- core/services/hf/src/reader.rs | 230 ++++----- core/services/hf/src/uri.rs | 16 + core/services/hf/src/writer.rs | 827 ++++++++++++++++++++++++++------ 8 files changed, 1258 insertions(+), 806 deletions(-) diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index 303a305cffc3..ded041281dc0 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -37,7 +37,8 @@ xet = [ "dep:xet-data", "dep:cas_types", "dep:xet-utils", - "dep:tokio", + "tokio/sync", + "tokio/rt", "dep:futures", "dep:async-trait", ] @@ -53,6 +54,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } sha2 = "0.10" tempfile = "3" +tokio = { workspace = true, features = ["time"] } # XET storage protocol support (optional) async-trait = { version = "0.1", optional = true } @@ -61,7 +63,6 @@ futures = { workspace = true, optional = true } reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", ], optional = true } -tokio = { workspace = true, features = ["sync", "rt"], optional = true } xet-data = { package = "data", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } xet-utils = { package = "utils", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } @@ -70,5 +71,6 @@ futures = { workspace = true } opendal-core = { path = "../../core", version = "0.55.0", features = [ "reqwest-rustls-tls", ] } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } serde_json = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 9ee49c266666..84464fa88c45 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -122,11 +122,17 @@ impl HfBuilder { /// Enable XET storage protocol for reads. /// - /// When true and the `xet` feature is compiled in, reads will - /// check for XET-backed files and use the XET protocol for - /// downloading. Default is false. - pub fn xet(mut self, xet: bool) -> Self { - self.config.xet = xet; + /// When the `xet` feature is compiled in, reads will check for + /// XET-backed files and use the XET protocol for downloading. + /// Default is disabled. + pub fn enable_xet(mut self) -> Self { + self.config.xet = true; + self + } + + /// Disable XET storage protocol for reads. + pub fn disable_xet(mut self) -> Self { + self.config.xet = false; self } @@ -143,7 +149,6 @@ impl HfBuilder { impl Builder for HfBuilder { type Config = HfConfig; - /// Build a HfBackend. fn build(self) -> Result { debug!("backend build started: {:?}", &self); @@ -200,17 +205,23 @@ impl Builder for HfBuilder { am.into() }; + let repo = HfRepo::new(repo_type, repo_id, Some(revision.clone())); + debug!("backend repo uri: {:?}", repo.uri(&root, "")); + + let max_retries = self.config.max_retries.unwrap_or(3); + debug!("backend max_retries: {}", max_retries); + Ok(HfBackend { - core: Arc::new(HfCore { + core: Arc::new(HfCore::new( info, - repo: HfRepo::new(repo_type, repo_id, Some(revision)), + repo, root, token, endpoint, - max_retries: self.config.max_retries.unwrap_or(3), + max_retries, #[cfg(feature = "xet")] - xet_enabled: self.config.xet, - }), + self.config.xet, + )?), }) } } @@ -218,7 +229,7 @@ impl Builder for HfBuilder { /// Backend for Hugging Face service #[derive(Debug, Clone)] pub struct HfBackend { - core: Arc, + pub(crate) core: Arc, } impl Access for HfBackend { @@ -258,16 +269,108 @@ impl Access for HfBackend { async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { let deleter = HfDeleter::new(self.core.clone()); - let delete_max_size = self.core.info.full_capability().delete_max_size; + let max_batch_size = self.core.info.full_capability().delete_max_size; Ok(( RpDelete::default(), - oio::BatchDeleter::new(deleter, delete_max_size), + oio::BatchDeleter::new(deleter, max_batch_size), )) } } +#[cfg(test)] +pub(super) mod test_utils { + use super::HfBuilder; + use opendal_core::Operator; + use opendal_core::layers::HttpClientLayer; + use opendal_core::raw::HttpClient; + + /// Create an operator with a fresh HTTP client so parallel tests + /// don't share the global static reqwest client (which causes + /// "dispatch task is gone" errors when runtimes are dropped). + fn finish_operator(op: Operator) -> Operator { + let client = HttpClient::with(reqwest::Client::new()); + op.layer(HttpClientLayer::new(client)) + } + + pub fn testing_credentials() -> (String, String) { + let repo_id = std::env::var("HF_OPENDAL_DATASET").expect("HF_OPENDAL_DATASET must be set"); + let token = std::env::var("HF_OPENDAL_TOKEN").expect("HF_OPENDAL_TOKEN must be set"); + (repo_id, token) + } + + /// Operator for a private dataset requiring HF_OPENDAL_DATASET and HF_OPENDAL_TOKEN. + /// Uses higher max_retries to tolerate concurrent commit conflicts (412). + pub fn testing_operator() -> Operator { + let (repo_id, token) = testing_credentials(); + let op = Operator::new( + HfBuilder::default() + .repo_type("dataset") + .repo_id(&repo_id) + .token(&token) + .max_retries(10), + ) + .unwrap() + .finish(); + finish_operator(op) + } + + #[cfg(feature = "xet")] + pub fn testing_xet_operator() -> Operator { + let (repo_id, token) = testing_credentials(); + let op = Operator::new( + HfBuilder::default() + .repo_type("dataset") + .repo_id(&repo_id) + .token(&token) + .enable_xet() + .max_retries(10), + ) + .unwrap() + .finish(); + finish_operator(op) + } + + pub fn gpt2_operator() -> Operator { + let op = Operator::new( + HfBuilder::default() + .repo_type("model") + .repo_id("openai-community/gpt2"), + ) + .unwrap() + .finish(); + finish_operator(op) + } + + pub fn mbpp_operator() -> Operator { + let op = Operator::new( + HfBuilder::default() + .repo_type("dataset") + .repo_id("google-research-datasets/mbpp"), + ) + .unwrap() + .finish(); + finish_operator(op) + } + + #[cfg(feature = "xet")] + pub fn mbpp_xet_operator() -> Operator { + let mut builder = HfBuilder::default() + .repo_type("dataset") + .repo_id("google-research-datasets/mbpp") + .enable_xet(); + if let Ok(token) = std::env::var("HF_OPENDAL_TOKEN") { + builder = builder.token(&token); + } + let op = Operator::new(builder).unwrap().finish(); + finish_operator(op) + } +} + #[cfg(test)] mod tests { + use super::test_utils::mbpp_operator; + #[cfg(feature = "xet")] + use super::test_utils::mbpp_xet_operator; use super::*; #[test] @@ -311,16 +414,6 @@ mod tests { /// Parquet magic bytes: "PAR1" const PARQUET_MAGIC: &[u8] = b"PAR1"; - fn mbpp_operator() -> Operator { - let builder = HfBuilder::default() - .repo_type("dataset") - .repo_id("google-research-datasets/mbpp") - .revision("main") - .root("/"); - - Operator::new(builder).unwrap().finish() - } - #[tokio::test] #[ignore = "requires network access"] async fn test_read_parquet_http() { @@ -348,29 +441,11 @@ mod tests { assert_eq!(&footer.to_vec(), PARQUET_MAGIC); } - #[cfg(feature = "xet")] - fn mbpp_operator_xet() -> Operator { - let repo_id = std::env::var("HF_OPENDAL_DATASET") - .unwrap_or_else(|_| "google-research-datasets/mbpp".to_string()); - let mut builder = HfBuilder::default() - .repo_type("dataset") - .repo_id(&repo_id) - .revision("main") - .root("/") - .xet(true); - - if let Ok(token) = std::env::var("HF_OPENDAL_TOKEN") { - builder = builder.token(&token); - } - - Operator::new(builder).unwrap().finish() - } - #[cfg(feature = "xet")] #[tokio::test] #[ignore = "requires network access"] async fn test_read_parquet_xet() { - let op = mbpp_operator_xet(); + let op = mbpp_xet_operator(); let path = "full/train-00000-of-00001.parquet"; // Full read via XET and verify parquet magic at both ends @@ -380,4 +455,103 @@ mod tests { assert_eq!(&bytes[..4], PARQUET_MAGIC); assert_eq!(&bytes[bytes.len() - 4..], PARQUET_MAGIC); } + + /// List files in a known dataset directory. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_list_directory() { + let op = mbpp_operator(); + let entries = op.list("full/").await.expect("list should succeed"); + assert!(!entries.is_empty(), "directory should contain files"); + assert!( + entries.iter().any(|e| e.path().ends_with(".parquet")), + "should contain parquet files" + ); + } + + /// List files recursively from root. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_list_recursive() { + let op = mbpp_operator(); + let entries = op + .list_with("/") + .recursive(true) + .await + .expect("recursive list should succeed"); + assert!( + entries.len() > 1, + "recursive listing should find multiple files" + ); + } + + /// Stat a known file and verify metadata fields. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_stat_known_file() { + let op = mbpp_operator(); + let meta = op + .stat("full/train-00000-of-00001.parquet") + .await + .expect("stat should succeed"); + assert!(meta.content_length() > 0); + assert!(!meta.etag().unwrap_or_default().is_empty()); + } + + /// Stat a nonexistent path should return NotFound. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_stat_nonexistent() { + let op = mbpp_operator(); + let err = op + .stat("this/path/does/not/exist.txt") + .await + .expect_err("stat on nonexistent path should fail"); + assert_eq!(err.kind(), ErrorKind::NotFound); + } + + /// Read a nonexistent file should return NotFound. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_read_nonexistent() { + let op = mbpp_operator(); + let err = op + .read("this/path/does/not/exist.txt") + .await + .expect_err("read on nonexistent path should fail"); + assert_eq!(err.kind(), ErrorKind::NotFound); + } + + /// Read a middle range of a known file. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_read_range_middle() { + let op = mbpp_operator(); + let data = op + .read_with("full/train-00000-of-00001.parquet") + .range(100..200) + .await + .expect("range read should succeed"); + assert_eq!(data.to_bytes().len(), 100); + } + + /// Read the last N bytes of a file to exercise tail-range handling. + #[tokio::test] + #[ignore = "requires network access"] + async fn test_read_range_tail() { + let op = mbpp_operator(); + let path = "full/train-00000-of-00001.parquet"; + let meta = op.stat(path).await.expect("stat should succeed"); + let size = meta.content_length(); + + let data = op + .read_with(path) + .range(size - 100..size) + .await + .expect("tail range read should succeed"); + let bytes = data.to_bytes(); + assert_eq!(bytes.len(), 100); + // Parquet files end with "PAR1" magic + assert_eq!(&bytes[bytes.len() - 4..], PARQUET_MAGIC); + } } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 3f48604a21be..2345fe1c8a23 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use bytes::Buf; use bytes::Bytes; use http::Request; +use http::Response; use http::header; use serde::Deserialize; @@ -32,21 +33,6 @@ use super::uri::HfRepo; use opendal_core::raw::*; use opendal_core::*; -/// API payload structures for preupload operations -#[derive(serde::Serialize)] -pub(super) struct PreuploadFile { - pub path: String, - pub size: u64, - pub sample: String, - #[serde(rename = "sha256")] - pub sha256: String, -} - -#[derive(serde::Serialize)] -pub(super) struct PreuploadRequest { - pub files: Vec, -} - /// API payload structures for commit operations #[derive(Debug, serde::Serialize)] pub(super) struct CommitFile { @@ -82,16 +68,12 @@ pub(super) struct MixedCommitPayload { // API response types #[derive(serde::Deserialize, Debug)] -pub(super) struct PreuploadFileResponse { +pub(super) struct CommitResponse { + #[serde(rename = "commitOid")] + pub commit_oid: Option, #[allow(dead_code)] - pub path: String, - #[serde(rename = "uploadMode")] - pub upload_mode: String, -} - -#[derive(serde::Deserialize, Debug)] -pub(super) struct PreuploadResponse { - pub files: Vec, + #[serde(rename = "commitUrl")] + pub commit_url: Option, } #[derive(Deserialize, Eq, PartialEq, Debug)] @@ -149,13 +131,6 @@ pub(super) struct LastCommit { pub date: String, } -/// Response from the tree/list API endpoint -#[derive(Debug)] -pub(super) struct FileTree { - pub files: Vec, - pub next_cursor: Option, -} - #[cfg(feature = "xet")] #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -165,13 +140,6 @@ pub(super) struct XetToken { pub exp: u64, } -#[cfg(feature = "xet")] -#[derive(Clone, Debug)] -pub(super) struct XetFile { - pub hash: String, - pub size: u64, -} - // Core HuggingFace client that manages API interactions, authentication // and shared logic for reader/writer/lister. @@ -185,8 +153,16 @@ pub struct HfCore { pub endpoint: String, pub max_retries: usize, + // Whether XET storage protocol is enabled for reads. When true + // and the `xet` feature is compiled in, reads will check for + // XET-backed files and use the XET protocol for downloading. #[cfg(feature = "xet")] pub xet_enabled: bool, + + /// HTTP client with redirects disabled, used by XET probes to + /// inspect headers on 302 responses. + #[cfg(feature = "xet")] + pub no_redirect_client: HttpClient, } impl Debug for HfCore { @@ -201,23 +177,46 @@ impl Debug for HfCore { } } -/// Extract the cursor value from a Link header's "next" URL. -fn parse_next_cursor(link_str: &str) -> Option { - for link in link_str.split(',') { - if link.contains("rel=\"next\"") || link.contains("rel='next'") { - let (_, rest) = link.split_once('<')?; - let (url, _) = rest.split_once('>')?; - let query = url.split_once('?')?.1; - return query - .split('&') - .find_map(|p| p.strip_prefix("cursor=")) - .map(|v| v.to_string()); - } +impl HfCore { + pub fn new( + info: Arc, + repo: HfRepo, + root: String, + token: Option, + endpoint: String, + max_retries: usize, + #[cfg(feature = "xet")] xet_enabled: bool, + ) -> Result { + // When xet is enabled at runtime, use dedicated reqwest clients instead + // of the global one. This avoids "dispatch task is gone" errors when + // multiple tokio runtimes exist (e.g. in tests) and ensures the + // no-redirect client shares the same runtime as the standard client. + // When xet is disabled, preserve whatever HTTP client is already set + // on `info` (important for mock-based unit tests). + #[cfg(feature = "xet")] + let no_redirect_client = if xet_enabled { + let standard = HttpClient::with(build_reqwest(reqwest::redirect::Policy::default())?); + let no_redirect = HttpClient::with(build_reqwest(reqwest::redirect::Policy::none())?); + info.update_http_client(|_| standard); + no_redirect + } else { + info.http_client() + }; + + Ok(Self { + info, + repo, + root, + token, + endpoint, + max_retries, + #[cfg(feature = "xet")] + xet_enabled, + #[cfg(feature = "xet")] + no_redirect_client, + }) } - None -} -impl HfCore { /// Build an authenticated HTTP request. pub(super) fn request( &self, @@ -234,50 +233,64 @@ impl HfCore { req } - /// Send a request, check for success, and deserialize the JSON response. - /// - /// Returns the response parts (status, headers, etc.) alongside the - /// deserialized body so callers can inspect headers when needed. + pub(super) fn uri(&self, path: &str) -> super::uri::HfUri { + self.repo.uri(&self.root, path) + } + + /// Exponential backoff: 200ms, 400ms, 800ms, … capped at ~6s. + async fn backoff(attempt: usize) { + let millis = 200u64 * (1u64 << attempt.min(5)); + tokio::time::sleep(std::time::Duration::from_millis(millis)).await; + } + + /// Send a request with retries, returning the successful response. /// - /// When `max_retries` > 1, retries on commit conflicts (HTTP 412) and - /// transient server errors (HTTP 5xx), matching the behavior of the - /// official HuggingFace Hub client. - async fn send_request( - &self, - req: Request, - max_retries: usize, - ) -> Result<(http::response::Parts, T)> { + /// Retries on commit conflicts (HTTP 412) and transient server errors + /// (HTTP 5xx) up to `self.max_retries` attempts with exponential backoff. + pub(super) async fn send(&self, req: Request) -> Result> { let client = self.info.http_client(); let mut attempt = 0; loop { match client.send(req.clone()).await { Ok(resp) if resp.status().is_success() => { - let (parts, body) = resp.into_parts(); - let parsed = serde_json::from_reader(body.reader()) - .map_err(new_json_deserialize_error)?; - return Ok((parts, parsed)); + return Ok(resp); } Ok(resp) => { attempt += 1; let err = parse_error(resp); let retryable = err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary(); - if attempt >= max_retries || !retryable { + if attempt >= self.max_retries || !retryable { return Err(err); } + Self::backoff(attempt).await; } Err(err) => { attempt += 1; - if attempt >= max_retries || !err.is_temporary() { + if attempt >= self.max_retries || !err.is_temporary() { return Err(err); } + Self::backoff(attempt).await; } } } } - pub async fn path_info(&self, path: &str) -> Result { - let uri = self.repo.uri(&self.root, path); + /// Send a request, check for success, and deserialize the JSON response. + /// + /// Returns the response parts (status, headers, etc.) alongside the + /// deserialized body so callers can inspect headers when needed. + pub(super) async fn send_parse( + &self, + req: Request, + ) -> Result<(http::response::Parts, T)> { + let (parts, body) = self.send(req).await?.into_parts(); + let parsed = serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; + Ok((parts, parsed)) + } + + pub(super) async fn path_info(&self, path: &str) -> Result { + let uri = self.uri(path); let url = uri.paths_info_url(&self.endpoint); let form_body = format!("paths={}&expand=True", percent_encode_path(&uri.path)); @@ -286,7 +299,7 @@ impl HfCore { .header(header::CONTENT_TYPE, "application/x-www-form-urlencoded") .body(Buffer::from(Bytes::from(form_body))) .map_err(new_request_build_error)?; - let (_, mut files) = self.send_request::>(req, 1).await?; + let (_, mut files) = self.send_parse::>(req).await?; // NOTE: if the file is not found, the server will return 200 with an empty array if files.is_empty() { @@ -296,30 +309,6 @@ impl HfCore { Ok(files.remove(0)) } - pub async fn file_tree( - &self, - path: &str, - recursive: bool, - cursor: Option<&str>, - ) -> Result { - let uri = self.repo.uri(&self.root, path); - let url = uri.file_tree_url(&self.endpoint, recursive, cursor); - - let req = self - .request(http::Method::GET, &url, Operation::List) - .body(Buffer::new()) - .map_err(new_request_build_error)?; - let (parts, files) = self.send_request::>(req, 1).await?; - - let next_cursor = parts - .headers - .get(http::header::LINK) - .and_then(|v| v.to_str().ok()) - .and_then(parse_next_cursor); - - Ok(FileTree { files, next_cursor }) - } - #[cfg(feature = "xet")] pub(super) async fn get_xet_token(&self, token_type: &str) -> Result { let url = self.repo.xet_token_url(&self.endpoint, token_type); @@ -327,104 +316,10 @@ impl HfCore { .request(http::Method::GET, &url, Operation::Read) .body(Buffer::new()) .map_err(new_request_build_error)?; - let (_, token) = self.send_request(req, 1).await?; + let (_, token) = self.send_parse(req).await?; Ok(token) } - /// Issue a HEAD request and extract XET file info (hash and size). - /// - /// Returns `None` if the `X-Xet-Hash` header is absent or empty. - /// - /// NOTE: Cannot use `send_request` here because we need a custom - /// no-redirect HTTP client to inspect headers (e.g. `X-Xet-Hash`) - /// from the 302 response, and the response is not JSON. - #[cfg(feature = "xet")] - pub(super) async fn get_xet_file(&self, path: &str) -> Result> { - let uri = self.repo.uri(&self.root, path); - let url = uri.resolve_url(&self.endpoint); - - let reqwest_client = reqwest::Client::builder() - .redirect(reqwest::redirect::Policy::none()) - .build() - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "failed to build http client").set_source(err) - })?; - let client = HttpClient::with(reqwest_client); - - let req = self - .request(http::Method::HEAD, &url, Operation::Stat) - .body(Buffer::new()) - .map_err(new_request_build_error)?; - - // Retry on transient errors, same as send_request. - let mut attempt = 0; - let resp = loop { - let resp = client.send(req.clone()).await?; - - attempt += 1; - let retryable = resp.status().is_server_error(); - if attempt >= self.max_retries || !retryable { - break resp; - } - }; - - let hash = resp - .headers() - .get("X-Xet-Hash") - .and_then(|v| v.to_str().ok()) - .filter(|s| !s.is_empty()); - - let Some(hash) = hash else { - return Ok(None); - }; - - let size = resp - .headers() - .get("X-Linked-Size") - .or_else(|| resp.headers().get(header::CONTENT_LENGTH)) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .unwrap_or(0); - - Ok(Some(XetFile { - hash: hash.to_string(), - size, - })) - } - - /// Call the preupload API to determine upload strategy for files. - pub(super) async fn preupload_files( - &self, - files: Vec, - ) -> Result { - let _token = self.token.as_deref().ok_or_else(|| { - Error::new( - ErrorKind::PermissionDenied, - "token is required for write operations", - ) - .with_operation("preupload") - })?; - - let first_path = files - .first() - .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to preupload"))?; - - let uri = self.repo.uri(&self.root, &first_path.path); - let url = uri.preupload_url(&self.endpoint); - - let payload = PreuploadRequest { files }; - let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; - - let req = self - .request(http::Method::POST, &url, Operation::Write) - .header(header::CONTENT_TYPE, "application/json") - .body(Buffer::from(json_body)) - .map_err(new_request_build_error)?; - - let (_, resp) = self.send_request(req, 1).await?; - Ok(resp) - } - /// Commit file changes (uploads and/or deletions) to the repository. /// /// Retries on commit conflicts (HTTP 412) and transient server errors @@ -435,7 +330,7 @@ impl HfCore { regular_files: Vec, lfs_files: Vec, deleted_files: Vec, - ) -> Result<()> { + ) -> Result { let _token = self.token.as_deref().ok_or_else(|| { Error::new( ErrorKind::PermissionDenied, @@ -451,7 +346,7 @@ impl HfCore { .or_else(|| deleted_files.first().map(|f| f.path.as_str())) .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files to commit"))?; - let uri = self.repo.uri(&self.root, first_path); + let uri = self.uri(first_path); let url = uri.commit_url(&self.endpoint); let payload = MixedCommitPayload { @@ -470,9 +365,8 @@ impl HfCore { .body(Buffer::from(json_body)) .map_err(new_request_build_error)?; - self.send_request::(req, self.max_retries) - .await?; - Ok(()) + let (_, resp) = self.send_parse::(req).await?; + Ok(resp) } } @@ -568,16 +462,17 @@ pub(crate) mod test_utils { .set_native_capability(Capability::default()); info.update_http_client(|_| http_client); - let core = HfCore { - info: Arc::new(info), - repo: HfRepo::new(repo_type, repo_id.to_string(), Some(revision.to_string())), - root: "/".to_string(), - token: None, - endpoint: endpoint.to_string(), - max_retries: 3, + let core = HfCore::new( + Arc::new(info), + HfRepo::new(repo_type, repo_id.to_string(), Some(revision.to_string())), + "/".to_string(), + None, + endpoint.to_string(), + 3, #[cfg(feature = "xet")] - xet_enabled: false, - }; + false, + ) + .unwrap(); (core, mock_client) } @@ -585,8 +480,6 @@ pub(crate) mod test_utils { #[cfg(test)] mod tests { - use bytes::Bytes; - use super::super::uri::RepoType; use super::test_utils::create_test_core; use super::*; @@ -651,66 +544,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_hf_list_url_non_recursive() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Model, - "org/model", - "main", - "https://huggingface.co", - ); - - core.file_tree("path1", false, None).await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/api/models/org/model/tree/main/path1?expand=True" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_hf_list_url_recursive() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Model, - "org/model", - "main", - "https://huggingface.co", - ); - - core.file_tree("path2", true, None).await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/api/models/org/model/tree/main/path2?expand=True&recursive=True" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_hf_list_url_with_cursor() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Model, - "org/model", - "main", - "https://huggingface.co", - ); - - core.file_tree("path3", false, Some("abc123")).await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/api/models/org/model/tree/main/path3?expand=True&cursor=abc123" - ); - - Ok(()) - } - #[tokio::test] async fn test_hf_path_info_url_space() -> Result<()> { let (core, mock_client) = create_test_core( @@ -730,153 +563,19 @@ mod tests { Ok(()) } - - #[tokio::test] - async fn test_hf_list_url_space() -> Result<()> { - let (core, mock_client) = create_test_core( - RepoType::Space, - "org/space", - "main", - "https://huggingface.co", - ); - - core.file_tree("static", false, None).await?; - - let url = mock_client.get_captured_url(); - assert_eq!( - url, - "https://huggingface.co/api/spaces/org/space/tree/main/static?expand=True" - ); - - Ok(()) - } - - #[test] - fn parse_list_response_test() -> Result<()> { - let resp = Bytes::from( - r#" - [ - { - "type": "file", - "oid": "45fa7c3d85ee7dd4139adbc056da25ae136a65f2", - "size": 69512435, - "lfs": { - "oid": "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c", - "size": 69512435, - "pointerSize": 133 - }, - "path": "maelstrom/lib/maelstrom.jar" - }, - { - "type": "directory", - "oid": "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c", - "size": 69512435, - "path": "maelstrom/lib/plugins" - } - ] - "#, - ); - - let decoded_response = - serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; - - assert_eq!(decoded_response.len(), 2); - - let file_entry = PathInfo { - type_: "file".to_string(), - oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), - size: 69512435, - lfs: Some(LfsInfo { - oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), - }), - path: "maelstrom/lib/maelstrom.jar".to_string(), - last_commit: None, - }; - - assert_eq!(decoded_response[0], file_entry); - - let dir_entry = PathInfo { - type_: "directory".to_string(), - oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), - size: 69512435, - lfs: None, - path: "maelstrom/lib/plugins".to_string(), - last_commit: None, - }; - - assert_eq!(decoded_response[1], dir_entry); - - Ok(()) - } - - #[test] - fn parse_files_info_test() -> Result<()> { - let resp = Bytes::from( - r#" - [ - { - "type": "file", - "oid": "45fa7c3d85ee7dd4139adbc056da25ae136a65f2", - "size": 69512435, - "lfs": { - "oid": "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c", - "size": 69512435, - "pointerSize": 133 - }, - "path": "maelstrom/lib/maelstrom.jar", - "lastCommit": { - "id": "bc1ef030bf3743290d5e190695ab94582e51ae2f", - "title": "Upload 141 files", - "date": "2023-11-17T23:50:28.000Z" - }, - "security": { - "blobId": "45fa7c3d85ee7dd4139adbc056da25ae136a65f2", - "name": "maelstrom/lib/maelstrom.jar", - "safe": true, - "avScan": { - "virusFound": false, - "virusNames": null - }, - "pickleImportScan": { - "highestSafetyLevel": "innocuous", - "imports": [ - {"module": "torch", "name": "FloatStorage", "safety": "innocuous"}, - {"module": "collections", "name": "OrderedDict", "safety": "innocuous"}, - {"module": "torch", "name": "LongStorage", "safety": "innocuous"}, - {"module": "torch._utils", "name": "_rebuild_tensor_v2", "safety": "innocuous"} - ] - } - } - } - ] - "#, - ); - - let decoded_response = - serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; - - assert_eq!(decoded_response.len(), 1); - - let file_info = PathInfo { - type_: "file".to_string(), - oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), - size: 69512435, - lfs: Some(LfsInfo { - oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), - }), - path: "maelstrom/lib/maelstrom.jar".to_string(), - last_commit: Some(LastCommit { - date: "2023-11-17T23:50:28.000Z".to_string(), - }), - }; - - assert_eq!(decoded_response[0], file_info); - - Ok(()) - } } #[cfg(feature = "xet")] pub(super) fn map_xet_error(err: impl std::error::Error + Send + Sync + 'static) -> Error { Error::new(ErrorKind::Unexpected, "xet operation failed").set_source(err) } + +#[cfg(feature = "xet")] +fn build_reqwest(policy: reqwest::redirect::Policy) -> Result { + reqwest::Client::builder() + .redirect(policy) + .build() + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to build http client").set_source(err) + }) +} diff --git a/core/services/hf/src/deleter.rs b/core/services/hf/src/deleter.rs index d5dc5d7907b3..474f691697cf 100644 --- a/core/services/hf/src/deleter.rs +++ b/core/services/hf/src/deleter.rs @@ -33,7 +33,7 @@ impl HfDeleter { async fn commit_delete(&self, deleted_files: Vec) -> Result<()> { match self.core.commit_files(vec![], vec![], deleted_files).await { - Ok(()) => Ok(()), + Ok(_) => Ok(()), Err(err) if err.kind() == ErrorKind::NotFound => Ok(()), Err(err) => Err(err), } @@ -61,64 +61,34 @@ impl oio::BatchDelete for HfDeleter { #[cfg(test)] mod tests { - use super::super::core::HfCore; - use super::super::uri::{HfRepo, RepoType}; - use super::super::writer::HfWriter; - use super::*; - use oio::BatchDelete; - use oio::OneShotWrite; - - fn testing_core() -> HfCore { - let repo_id = std::env::var("HF_OPENDAL_DATASET").expect("HF_OPENDAL_DATASET must be set"); - - let info = AccessorInfo::default(); - info.set_scheme("huggingface") - .set_native_capability(Capability { - write: true, - delete: true, - ..Default::default() - }); - - HfCore { - info: info.into(), - repo: HfRepo::new(RepoType::Dataset, repo_id, Some("main".to_string())), - root: "/".to_string(), - token: std::env::var("HF_OPENDAL_TOKEN").ok(), - endpoint: "https://huggingface.co".to_string(), - max_retries: 3, - #[cfg(feature = "xet")] - xet_enabled: false, - } - } + use super::super::backend::test_utils::testing_operator; + use opendal_core::*; #[tokio::test] #[ignore] async fn test_delete_once() { - let core = Arc::new(testing_core()); + let op = testing_operator(); + let path = "tests/delete-test.txt"; - // First write a file so we have something to delete - let writer = HfWriter::new(&core, "delete-test.txt", OpWrite::default()); - writer - .write_once(Buffer::from("temporary content")) + op.write(path, b"temporary content".as_slice()) .await .expect("write should succeed"); - // Now delete it - let deleter = HfDeleter::new(core); - deleter - .delete_once("delete-test.txt".to_string(), OpDelete::default()) + op.delete(path).await.expect("delete should succeed"); + + let err = op + .stat(path) .await - .expect("delete should succeed"); + .expect_err("stat should fail after delete"); + assert_eq!(err.kind(), ErrorKind::NotFound); } #[tokio::test] #[ignore] async fn test_delete_nonexistent() { - let core = Arc::new(testing_core()); + let op = testing_operator(); - let deleter = HfDeleter::new(core); - deleter - .delete_once("nonexistent-file.txt".to_string(), OpDelete::default()) + op.delete("nonexistent-file.txt") .await .expect("deleting nonexistent file should succeed"); } @@ -126,29 +96,25 @@ mod tests { #[tokio::test] #[ignore] async fn test_delete_batch() { - let core = Arc::new(testing_core()); + let op = testing_operator(); + let paths = ["tests/batch-del-1.txt", "tests/batch-del-2.txt"]; - // Write two files - for name in &["batch-del-1.txt", "batch-del-2.txt"] { - let writer = HfWriter::new(&core, name, OpWrite::default()); - writer - .write_once(Buffer::from("temp")) + for path in &paths { + op.write(path, b"temp".as_slice()) .await .expect("write should succeed"); } - // Batch delete them in a single commit - let deleter = HfDeleter::new(core); - let batch = vec![ - ("batch-del-1.txt".to_string(), OpDelete::default()), - ("batch-del-2.txt".to_string(), OpDelete::default()), - ]; - let result = deleter - .delete_batch(batch) - .await - .expect("batch delete should succeed"); + for path in &paths { + op.delete(path).await.expect("delete should succeed"); + } - assert_eq!(result.succeeded.len(), 2); - assert!(result.failed.is_empty()); + for path in &paths { + let err = op + .stat(path) + .await + .expect_err("stat should fail after delete"); + assert_eq!(err.kind(), ErrorKind::NotFound); + } } } diff --git a/core/services/hf/src/lister.rs b/core/services/hf/src/lister.rs index f5de04cc3dc0..6cbe0710c141 100644 --- a/core/services/hf/src/lister.rs +++ b/core/services/hf/src/lister.rs @@ -17,10 +17,31 @@ use std::sync::Arc; -use super::core::HfCore; +use super::core::{HfCore, PathInfo}; use opendal_core::raw::*; use opendal_core::*; +struct FileTree { + files: Vec, + next_cursor: Option, +} + +/// Extract the cursor value from a Link header's "next" URL. +fn parse_next_cursor(link_str: &str) -> Option { + for link in link_str.split(',') { + if link.contains("rel=\"next\"") || link.contains("rel='next'") { + let (_, rest) = link.split_once('<')?; + let (url, _) = rest.split_once('>')?; + let query = url.split_once('?')?.1; + return query + .split('&') + .find_map(|p| p.strip_prefix("cursor=")) + .map(|v| v.to_string()); + } + } + None +} + pub struct HfLister { core: Arc, path: String, @@ -35,6 +56,31 @@ impl HfLister { recursive, } } + + async fn file_tree( + &self, + path: &str, + recursive: bool, + cursor: Option<&str>, + ) -> Result { + let uri = self.core.uri(path); + let url = uri.file_tree_url(&self.core.endpoint, recursive, cursor); + + let req = self + .core + .request(http::Method::GET, &url, Operation::List) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + let (parts, files) = self.core.send_parse::>(req).await?; + + let next_cursor = parts + .headers + .get(http::header::LINK) + .and_then(|v| v.to_str().ok()) + .and_then(parse_next_cursor); + + Ok(FileTree { files, next_cursor }) + } } impl oio::PageList for HfLister { @@ -44,10 +90,7 @@ impl oio::PageList for HfLister { } else { Some(ctx.token.as_str()) }; - let response = self - .core - .file_tree(&self.path, self.recursive, cursor) - .await?; + let response = self.file_tree(&self.path, self.recursive, cursor).await?; if let Some(next_cursor) = response.next_cursor { ctx.token = next_cursor; @@ -71,3 +114,60 @@ impl oio::PageList for HfLister { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::super::backend::test_utils::{gpt2_operator, mbpp_operator}; + use super::*; + + #[test] + fn test_parse_next_cursor() { + let link = + r#"; rel="next""#; + assert_eq!(parse_next_cursor(link), Some("abc123".to_string())); + } + + #[test] + fn test_parse_next_cursor_no_next() { + let link = + r#"; rel="prev""#; + assert_eq!(parse_next_cursor(link), None); + } + + #[tokio::test] + async fn test_list_model_root() { + let op = gpt2_operator(); + let entries = op.list("/").await.expect("list should succeed"); + let names: Vec<&str> = entries.iter().map(|e| e.name()).collect(); + assert!(names.contains(&"config.json")); + } + + #[tokio::test] + #[ignore = "requires network access"] + async fn test_list_dataset_root() { + let op = mbpp_operator(); + let entries = op.list("/").await.expect("list should succeed"); + let names: Vec<&str> = entries.iter().map(|e| e.name()).collect(); + assert!(names.contains(&"full/")); + } + + #[tokio::test] + #[ignore = "requires network access"] + async fn test_list_dataset_subdirectory() { + let op = mbpp_operator(); + let entries = op.list("full/").await.expect("list should succeed"); + let names: Vec<&str> = entries.iter().map(|e| e.name()).collect(); + assert!(names.iter().any(|n| n.ends_with(".parquet"))); + } + + #[tokio::test] + #[ignore = "requires network access"] + async fn test_list_nonexistent_directory() { + let op = gpt2_operator(); + let err = op + .list("nonexistent-dir/") + .await + .expect_err("list on nonexistent dir should fail"); + assert_eq!(err.kind(), opendal_core::ErrorKind::NotFound); + } +} diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index b219a2037ed2..1e0c44a6115b 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -33,10 +33,16 @@ use futures::StreamExt; use super::core::HfCore; #[cfg(feature = "xet")] -use super::core::{XetFile, XetTokenRefresher, map_xet_error}; +use super::core::{XetTokenRefresher, map_xet_error}; use opendal_core::raw::*; use opendal_core::*; +#[cfg(feature = "xet")] +struct XetFile { + hash: String, + size: u64, +} + #[cfg(feature = "xet")] type XetByteStream = Pin> + Send + Sync>>; @@ -56,7 +62,7 @@ impl HfReader { pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { #[cfg(feature = "xet")] if core.xet_enabled { - if let Some(xet_file) = core.get_xet_file(path).await? { + if let Some(xet_file) = Self::maybe_xet_file(core, path).await? { return Self::download_xet(core, &xet_file, range).await; } } @@ -64,9 +70,60 @@ impl HfReader { Self::download_http(core, path, range).await } + /// Issue a HEAD request and extract XET file info (hash and size). + /// + /// Returns `None` if the `X-Xet-Hash` header is absent or empty. + /// + /// Uses a dedicated no-redirect HTTP client so we can inspect + /// headers (e.g. `X-Xet-Hash`) on the 302 response. + #[cfg(feature = "xet")] + async fn maybe_xet_file(core: &HfCore, path: &str) -> Result> { + let uri = core.uri(path); + let url = uri.resolve_url(&core.endpoint); + + let req = core + .request(http::Method::HEAD, &url, Operation::Stat) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + + let mut attempt = 0; + let resp = loop { + let resp = core.no_redirect_client.send(req.clone()).await?; + + attempt += 1; + let retryable = resp.status().is_server_error(); + if attempt >= core.max_retries || !retryable { + break resp; + } + }; + + let hash = resp + .headers() + .get("X-Xet-Hash") + .and_then(|v| v.to_str().ok()) + .filter(|s| !s.is_empty()); + + let Some(hash) = hash else { + return Ok(None); + }; + + let size = resp + .headers() + .get("X-Linked-Size") + .or_else(|| resp.headers().get(http::header::CONTENT_LENGTH)) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + Ok(Some(XetFile { + hash: hash.to_string(), + size, + })) + } + pub async fn download_http(core: &HfCore, path: &str, range: BytesRange) -> Result { let client = core.info.http_client(); - let uri = core.repo.uri(&core.root, path); + let uri = core.uri(path); let url = uri.resolve_url(&core.endpoint); let mut req = core.request(http::Method::GET, &url, Operation::Read); @@ -91,11 +148,7 @@ impl HfReader { } #[cfg(feature = "xet")] - pub async fn download_xet( - core: &HfCore, - xet_file: &XetFile, - range: BytesRange, - ) -> Result { + async fn download_xet(core: &HfCore, xet_file: &XetFile, range: BytesRange) -> Result { let token = core.get_xet_token("read").await?; let file_info = xet_data::XetFileInfo::new(xet_file.hash.clone(), xet_file.size); @@ -145,151 +198,60 @@ impl oio::Read for HfReader { #[cfg(test)] mod tests { - use super::super::core::HfCore; - use super::super::uri::{HfRepo, RepoType}; - use super::*; + #[cfg(feature = "xet")] + use super::super::backend::test_utils::mbpp_xet_operator; + use super::super::backend::test_utils::{gpt2_operator, mbpp_operator}; /// Parquet magic bytes: "PAR1" const PARQUET_MAGIC: &[u8] = b"PAR1"; - fn testing_core(repo_type: RepoType, repo_id: &str, _xet: bool) -> HfCore { - let info = AccessorInfo::default(); - info.set_scheme("huggingface") - .set_native_capability(Capability { - read: true, - ..Default::default() - }); - - HfCore { - info: info.into(), - repo: HfRepo::new(repo_type, repo_id.to_string(), Some("main".to_string())), - root: "/".to_string(), - token: None, - endpoint: "https://huggingface.co".to_string(), - max_retries: 3, - #[cfg(feature = "xet")] - xet_enabled: _xet, - } - } - - async fn read_all(reader: &mut HfReader) -> Vec { - use oio::Read; - - let mut buf = Vec::new(); - loop { - let chunk = reader.read().await.expect("read should succeed"); - if chunk.is_empty() { - break; - } - buf.extend_from_slice(&chunk.to_bytes()); - } - buf - } - #[tokio::test] - async fn test_download_http_model() { - let core = testing_core(RepoType::Model, "openai-community/gpt2", false); - let mut reader = HfReader::download_http(&core, "config.json", BytesRange::default()) - .await - .expect("download should succeed"); - - let data = read_all(&mut reader).await; - serde_json::from_slice::(&data) + async fn test_read_model_config() { + let op = gpt2_operator(); + let data = op.read("config.json").await.expect("read should succeed"); + serde_json::from_slice::(&data.to_vec()) .expect("config.json should be valid JSON"); } #[tokio::test] - #[ignore] - async fn test_download_http_dataset_parquet() { - let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", false); - let range = BytesRange::new(0, Some(4)); - let mut reader = HfReader::download_http(&core, "full/train-00000-of-00001.parquet", range) - .await - .expect("download should succeed"); - - let data = read_all(&mut reader).await; - assert_eq!(&data, PARQUET_MAGIC); - } - - #[tokio::test] - #[ignore] - async fn test_download_http_range() { - let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", false); - let range = BytesRange::new(0, Some(4)); - let mut reader = HfReader::download_http(&core, "full/train-00000-of-00001.parquet", range) + #[ignore = "requires network access"] + async fn test_read_http_parquet_header() { + let op = mbpp_operator(); + let data = op + .read_with("full/train-00000-of-00001.parquet") + .range(0..4) .await - .expect("range download should succeed"); - - let data = read_all(&mut reader).await; - assert_eq!(data.len(), 4); - assert_eq!(&data, PARQUET_MAGIC); - } - - #[tokio::test] - async fn test_download_dispatches_to_http() { - let core = testing_core(RepoType::Model, "openai-community/gpt2", false); - let reader = HfReader::try_new(&core, "config.json", BytesRange::default()) - .await - .expect("download should succeed"); - - assert!(matches!(reader, HfReader::Http(_))); + .expect("read should succeed"); + assert_eq!(&data.to_vec(), PARQUET_MAGIC); } #[cfg(feature = "xet")] #[tokio::test] - #[ignore] - async fn test_download_xet_parquet() { - let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); - let xet_file = core - .get_xet_file("full/train-00000-of-00001.parquet") + #[ignore = "requires network access"] + async fn test_read_xet_parquet() { + let op = mbpp_xet_operator(); + let data = op + .read("full/train-00000-of-00001.parquet") .await - .expect("xet probe should succeed") - .expect("parquet file should be xet-backed"); - - let mut reader = HfReader::download_xet(&core, &xet_file, BytesRange::default()) - .await - .expect("xet download should succeed"); - - let data = read_all(&mut reader).await; - assert!(data.len() > 8); - assert_eq!(&data[..4], PARQUET_MAGIC); - assert_eq!(&data[data.len() - 4..], PARQUET_MAGIC); + .expect("xet read should succeed"); + let bytes = data.to_vec(); + assert!(bytes.len() > 8); + assert_eq!(&bytes[..4], PARQUET_MAGIC); + assert_eq!(&bytes[bytes.len() - 4..], PARQUET_MAGIC); } #[cfg(feature = "xet")] #[tokio::test] - #[ignore] - async fn test_download_xet_range() { - let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); - let xet_file = core - .get_xet_file("full/train-00000-of-00001.parquet") + #[ignore = "requires network access"] + async fn test_read_xet_range() { + let op = mbpp_xet_operator(); + let data = op + .read_with("full/train-00000-of-00001.parquet") + .range(0..4) .await - .expect("xet probe should succeed") - .expect("parquet file should be xet-backed"); - - let range = BytesRange::new(0, Some(4)); - let mut reader = HfReader::download_xet(&core, &xet_file, range) - .await - .expect("xet range download should succeed"); - - let data = read_all(&mut reader).await; - assert_eq!(data.len(), 4); - assert_eq!(&data, PARQUET_MAGIC); - } - - #[cfg(feature = "xet")] - #[tokio::test] - #[ignore] - async fn test_download_dispatches_to_xet() { - let core = testing_core(RepoType::Dataset, "google-research-datasets/mbpp", true); - let reader = HfReader::try_new( - &core, - "full/train-00000-of-00001.parquet", - BytesRange::default(), - ) - .await - .expect("download should succeed"); - - assert!(matches!(reader, HfReader::Xet(_))); + .expect("xet range read should succeed"); + let bytes = data.to_vec(); + assert_eq!(bytes.len(), 4); + assert_eq!(&bytes, PARQUET_MAGIC); } } diff --git a/core/services/hf/src/uri.rs b/core/services/hf/src/uri.rs index 7cc8a5e76f1b..f089ce72a8a1 100644 --- a/core/services/hf/src/uri.rs +++ b/core/services/hf/src/uri.rs @@ -107,6 +107,22 @@ impl HfRepo { ) } + /// Build the Git LFS batch API URL for this repository. + /// + /// Pattern: `{endpoint}/{type_prefix}{repo_id}.git/info/lfs/objects/batch` + /// where type_prefix is "" for models, "datasets/" for datasets, "spaces/" for spaces. + pub fn lfs_batch_url(&self, endpoint: &str) -> String { + let type_prefix = match self.repo_type { + RepoType::Model => "", + RepoType::Dataset => "datasets/", + RepoType::Space => "spaces/", + }; + format!( + "{}/{}{}.git/info/lfs/objects/batch", + endpoint, type_prefix, &self.repo_id, + ) + } + /// Build the XET token API URL for this repository. #[cfg(feature = "xet")] pub fn xet_token_url(&self, endpoint: &str, token_type: &str) -> String { diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 285f5a1180dc..45e4a16e523b 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -15,17 +15,120 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use std::sync::Arc; use base64::Engine; +use http::Request; +use http::header; use sha2::{Digest, Sha256}; -use super::core::{CommitFile, HfCore, PreuploadFile}; #[cfg(feature = "xet")] -use super::core::{LfsFile, XetTokenRefresher, map_xet_error}; +use super::core::XetTokenRefresher; +use super::core::{CommitFile, CommitResponse, HfCore, LfsFile}; use opendal_core::raw::*; use opendal_core::*; +#[derive(serde::Serialize)] +struct PreuploadFile { + path: String, + size: u64, + sample: String, + #[serde(rename = "sha256")] + sha256: String, +} + +#[derive(serde::Serialize)] +struct PreuploadRequest { + files: Vec, +} + +#[derive(serde::Deserialize, Debug)] +struct PreuploadFileResponse { + #[allow(dead_code)] + path: String, + #[serde(rename = "uploadMode")] + upload_mode: String, +} + +#[derive(serde::Deserialize, Debug)] +struct PreuploadResponse { + files: Vec, +} + +#[derive(serde::Serialize)] +struct LfsBatchRequest { + operation: String, + transfers: Vec, + objects: Vec, + hash_algo: String, +} + +#[derive(serde::Serialize)] +struct LfsBatchRequestObject { + oid: String, + size: u64, +} + +#[derive(serde::Deserialize)] +struct LfsBatchResponse { + transfer: Option, + #[serde(default)] + objects: Vec, +} + +#[derive(serde::Deserialize)] +struct LfsBatchResponseObject { + actions: Option, + error: Option, +} + +#[derive(serde::Deserialize)] +struct LfsBatchActions { + upload: LfsBatchAction, + verify: Option, +} + +#[derive(serde::Deserialize)] +struct LfsBatchAction { + href: String, + #[serde(default)] + header: HashMap, +} + +#[derive(serde::Deserialize)] +struct LfsBatchError { + message: String, +} + +#[derive(serde::Serialize)] +struct LfsVerifyRequest { + oid: String, + size: u64, +} + +/// Resolved upload strategy after consulting the preupload and LFS batch APIs. +enum UploadMode { + /// Small file: base64 encode inline in commit payload. + Regular, + /// File already exists in LFS storage, just commit pointer. + LfsExists, + /// Single-part LFS upload: PUT entire body to pre-signed URL. + LfsSinglepart { + upload: LfsBatchAction, + verify: Option, + }, + /// Multi-part LFS upload: PUT chunks to numbered pre-signed URLs. + LfsMultipart { + upload: LfsBatchAction, + verify: Option, + chunk_size: usize, + }, + /// XET transfer protocol. + #[cfg(feature = "xet")] + Xet, +} + pub struct HfWriter { core: Arc, #[allow(dead_code)] @@ -43,31 +146,51 @@ impl HfWriter { } } - /// Determine upload mode via preupload API. - async fn determine_upload_mode(core: &HfCore, path: &str, body: &Buffer) -> Result { + /// Determine the upload strategy for a file. + /// + /// Follows the HuggingFace Hub upload protocol: + /// 1. Compute SHA256 hash and a content sample for the preupload API. + /// 2. Call the preupload API to determine if the file should be uploaded + /// as "regular" (base64 inline in commit) or "lfs" (Git LFS). + /// 3. For LFS files, negotiate the transfer adapter with the LFS batch + /// API which returns pre-signed upload URLs or Xet. + /// + /// Returns the resolved upload mode and the SHA256 OID. + async fn determine_upload_mode(&self, body: &Buffer) -> Result<(UploadMode, String)> { let bytes = body.to_bytes(); let size = bytes.len() as u64; - // Compute SHA256 hash + // Step 1: compute SHA256 and content sample. let mut hasher = Sha256::new(); hasher.update(&bytes); - let sha256_hash = format!("{:x}", hasher.finalize()); + let oid = format!("{:x}", hasher.finalize()); - // Get sample (first 512 bytes, base64 encoded) let sample_size = std::cmp::min(512, bytes.len()); let sample = base64::engine::general_purpose::STANDARD.encode(&bytes[..sample_size]); - // Call preupload endpoint - let preupload_files = vec![PreuploadFile { - path: path.to_string(), - size, - sample, - sha256: sha256_hash, - }]; + // Step 2: call preupload API to get "regular" or "lfs". + let uri = self.core.uri(&self.path); + let preupload_url = uri.preupload_url(&self.core.endpoint); + + let preupload_payload = PreuploadRequest { + files: vec![PreuploadFile { + path: self.path.clone(), + size, + sample, + sha256: oid.clone(), + }], + }; + let json_body = serde_json::to_vec(&preupload_payload).map_err(new_json_serialize_error)?; + + let req = self + .core + .request(http::Method::POST, &preupload_url, Operation::Write) + .header(header::CONTENT_TYPE, "application/json") + .body(Buffer::from(json_body)) + .map_err(new_request_build_error)?; - let preupload_resp = core.preupload_files(preupload_files).await?; + let (_, preupload_resp): (_, PreuploadResponse) = self.core.send_parse(req).await?; - // Get upload mode from response let mode = preupload_resp .files .first() @@ -75,34 +198,244 @@ impl HfWriter { .upload_mode .clone(); - Ok(mode) + if mode != "lfs" { + return Ok((UploadMode::Regular, oid)); + } + + // Step 3: negotiate transfer adapter with the LFS batch API. + let url = self.core.repo.lfs_batch_url(&self.core.endpoint); + + #[allow(unused_mut)] + let mut transfers = vec!["basic".to_string(), "multipart".to_string()]; + #[cfg(feature = "xet")] + if self.core.xet_enabled { + transfers.push("xet".to_string()); + } + + let payload = LfsBatchRequest { + operation: "upload".to_string(), + transfers, + objects: vec![LfsBatchRequestObject { + oid: oid.clone(), + size, + }], + hash_algo: "sha256".to_string(), + }; + let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; + + let req = self + .core + .request(http::Method::POST, &url, Operation::Write) + .header(header::ACCEPT, "application/vnd.git-lfs+json") + .header(header::CONTENT_TYPE, "application/vnd.git-lfs+json") + .body(Buffer::from(json_body)) + .map_err(new_request_build_error)?; + + let (_, batch_resp): (_, LfsBatchResponse) = self.core.send_parse(req).await?; + + #[cfg_attr(not(feature = "xet"), allow(unused_variables))] + let chosen_transfer = batch_resp.transfer; + + let obj = batch_resp + .objects + .into_iter() + .next() + .ok_or_else(|| Error::new(ErrorKind::Unexpected, "empty LFS batch response"))?; + + if let Some(err) = obj.error { + return Err(Error::new(ErrorKind::Unexpected, err.message)); + } + + // No actions means the file already exists on the server. + let Some(actions) = obj.actions else { + return Ok((UploadMode::LfsExists, oid)); + }; + + // If the server chose XET transfer, delegate to the XET protocol. + #[cfg(feature = "xet")] + if self.core.xet_enabled && chosen_transfer.as_deref() == Some("xet") { + return Ok((UploadMode::Xet, oid)); + } + + // Decide singlepart vs multipart based on whether the server + // provided a chunk_size in the upload action headers (matches + // the huggingface_hub Python client detection logic). + let chunk_size = actions.upload.header.get("chunk_size").and_then(|v| { + v.as_u64() + .map(|n| n as usize) + .or_else(|| v.as_str().and_then(|s| s.parse().ok())) + }); + + let mode = if let Some(chunk_size) = chunk_size { + UploadMode::LfsMultipart { + upload: actions.upload, + verify: actions.verify, + chunk_size, + } + } else { + UploadMode::LfsSinglepart { + upload: actions.upload, + verify: actions.verify, + } + }; + + Ok((mode, oid)) } - /// Prepare file content for HTTP storage (base64 encode for regular upload). - async fn upload_http(path: &str, body: Buffer) -> Result { - let bytes = body.to_bytes(); - let content = base64::engine::general_purpose::STANDARD.encode(bytes); - Ok(CommitFile { + /// Prepare file content for regular HTTP commit (base64 encoded inline). + fn prepare_commit_file(path: &str, body: &Buffer) -> CommitFile { + let content = base64::engine::general_purpose::STANDARD.encode(body.to_bytes()); + CommitFile { path: path.to_string(), content, encoding: "base64".to_string(), - }) + } + } + + /// Singlepart LFS upload: PUT entire body to the upload URL. + async fn lfs_upload_singlepart(&self, upload: &LfsBatchAction, body: Buffer) -> Result<()> { + let req = Request::builder() + .method(http::Method::PUT) + .uri(&upload.href) + .extension(Operation::Write) + .body(body) + .map_err(new_request_build_error)?; + + self.core.send(req).await?; + Ok(()) + } + + /// Multi-part LFS upload: PUT chunks to numbered part URLs, then POST completion. + async fn lfs_upload_multipart( + &self, + upload: &LfsBatchAction, + oid: &str, + body: Buffer, + chunk_size: usize, + ) -> Result<()> { + let bytes = body.to_bytes(); + let total_parts = bytes.len().div_ceil(chunk_size); + + // Collect presigned part URLs from the upload header. The server + // stores them as digit-only keys (e.g. "1", "2", "3"). We collect + // all such keys, sort by numeric value, and use them in order — + // matching the huggingface_hub Python client's `_get_sorted_parts_urls`. + let mut part_urls: Vec<(usize, String)> = upload + .header + .iter() + .filter_map(|(k, v)| { + let num: usize = k.parse().ok()?; + let url = v.as_str()?; + Some((num, url.to_string())) + }) + .collect(); + part_urls.sort_by_key(|(num, _)| *num); + + let part_urls: Vec = part_urls.into_iter().map(|(_, url)| url).collect(); + if part_urls.len() != total_parts { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "expected {} part URLs but server returned {} \ + (file size: {}, chunk size: {}, header keys: {:?})", + total_parts, + part_urls.len(), + bytes.len(), + chunk_size, + upload.header.keys().collect::>(), + ), + )); + } + + let mut etags = Vec::with_capacity(total_parts); + + for (part_num, part_url) in part_urls.iter().enumerate() { + let start = part_num * chunk_size; + let end = std::cmp::min(start + chunk_size, bytes.len()); + let chunk = bytes.slice(start..end); + + let req = Request::builder() + .method(http::Method::PUT) + .uri(part_url.as_str()) + .extension(Operation::Write) + .body(Buffer::from(chunk)) + .map_err(new_request_build_error)?; + + let parts = self.core.send(req).await?.into_parts().0; + let etag = parts + .headers + .get(header::ETAG) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + etags.push(etag); + } + + let parts: Vec<_> = etags + .into_iter() + .enumerate() + .map(|(i, etag)| { + serde_json::json!({ + "partNumber": i + 1, + "etag": etag, + }) + }) + .collect(); + + let completion = serde_json::json!({ + "oid": oid, + "parts": parts, + }); + let completion_body = serde_json::to_vec(&completion).map_err(new_json_serialize_error)?; + + let req = self + .core + .request(http::Method::POST, &upload.href, Operation::Write) + .header(header::CONTENT_TYPE, "application/json") + .body(Buffer::from(completion_body)) + .map_err(new_request_build_error)?; + + self.core.send(req).await?; + Ok(()) + } + + /// Verify an LFS upload if the server requested verification. + async fn lfs_verify( + &self, + verify: &Option, + oid: &str, + size: u64, + ) -> Result<()> { + let Some(verify) = verify else { + return Ok(()); + }; + + let payload = LfsVerifyRequest { + oid: oid.to_string(), + size, + }; + let body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; + + let req = self + .core + .request(http::Method::POST, &verify.href, Operation::Write) + .header(header::CONTENT_TYPE, "application/vnd.git-lfs+json") + .body(Buffer::from(body)) + .map_err(new_request_build_error)?; + + self.core.send(req).await?; + Ok(()) } /// Upload file content to XET storage. #[cfg(feature = "xet")] - async fn upload_xet(core: &HfCore, path: &str, body: Buffer) -> Result { - let bytes = body.to_bytes(); - let size = bytes.len() as u64; + async fn xet_upload(&self, body: Buffer) -> Result<()> { + use super::core::map_xet_error; - // Compute SHA256 hash for LFS OID - let mut hasher = Sha256::new(); - hasher.update(&bytes); - let sha256_hash = format!("{:x}", hasher.finalize()); + let bytes = body.to_bytes(); - // Upload to XET storage - let token = core.get_xet_token("write").await?; - let refresher = Arc::new(XetTokenRefresher::new(core, "write")); + let token = self.core.get_xet_token("write").await?; + let refresher = Arc::new(XetTokenRefresher::new(&self.core, "write")); let file_contents = vec![bytes.to_vec()]; @@ -117,188 +450,388 @@ impl HfWriter { .await .map_err(map_xet_error)?; - let _file_info = results.first().ok_or_else(|| { + results.first().ok_or_else(|| { Error::new( ErrorKind::Unexpected, "No file info returned from XET upload", ) })?; - Ok(LfsFile { - path: path.to_string(), - oid: sha256_hash, - algo: "sha256".to_string(), - size, - }) + Ok(()) } - async fn upload_and_commit(&self, body: Buffer) -> Result { - #[cfg_attr(not(feature = "xet"), allow(unused_variables))] - let mode = Self::determine_upload_mode(&self.core, &self.path, &body).await?; + async fn upload_and_commit(&self, body: Buffer) -> Result { + let (mode, oid) = self.determine_upload_mode(&body).await?; + let size = body.len() as u64; - // Prepare file based on mode - let (commit_file, lfs_file) = { - #[cfg(feature = "xet")] - { - if self.core.xet_enabled && mode == "xet" { - let lfs = Self::upload_xet(&self.core, &self.path, body).await?; - (None, Some(lfs)) - } else { - let commit = Self::upload_http(&self.path, body).await?; - (Some(commit), None) - } + match mode { + UploadMode::Regular => { + let file = Self::prepare_commit_file(&self.path, &body); + return self.core.commit_files(vec![file], vec![], vec![]).await; } - #[cfg(not(feature = "xet"))] - { - let commit = Self::upload_http(&self.path, body).await?; - (Some(commit), None) + UploadMode::LfsExists => {} + UploadMode::LfsSinglepart { upload, verify } => { + self.lfs_upload_singlepart(&upload, body).await?; + self.lfs_verify(&verify, &oid, size).await?; } - }; + UploadMode::LfsMultipart { + upload, + verify, + chunk_size, + } => { + self.lfs_upload_multipart(&upload, &oid, body, chunk_size) + .await?; + self.lfs_verify(&verify, &oid, size).await?; + } + #[cfg(feature = "xet")] + UploadMode::Xet => { + self.xet_upload(body).await?; + } + } - // Commit the files - let regular_files: Vec<_> = commit_file.into_iter().collect(); - let lfs_files: Vec<_> = lfs_file.into_iter().collect(); - self.core - .commit_files(regular_files, lfs_files, vec![]) - .await?; - Ok(Metadata::default()) + let lfs_file = LfsFile { + path: self.path.clone(), + oid, + algo: "sha256".to_string(), + size, + }; + self.core.commit_files(vec![], vec![lfs_file], vec![]).await } } impl oio::OneShotWrite for HfWriter { async fn write_once(&self, bs: Buffer) -> Result { - self.upload_and_commit(bs).await + let size = bs.len() as u64; + let resp = self.upload_and_commit(bs).await?; + + let mut meta = Metadata::default().with_content_length(size); + if let Some(oid) = resp.commit_oid { + meta = meta.with_version(oid); + } + Ok(meta) } } #[cfg(test)] mod tests { - use super::super::core::HfCore; - use super::super::uri::{HfRepo, RepoType}; + use super::super::backend::test_utils::testing_operator; + #[cfg(feature = "xet")] + use super::super::backend::test_utils::testing_xet_operator; use super::*; - use oio::OneShotWrite; - - fn testing_core(_xet: bool) -> HfCore { - let repo_id = std::env::var("HF_OPENDAL_DATASET").expect("HF_OPENDAL_DATASET must be set"); - - let info = AccessorInfo::default(); - info.set_scheme("huggingface") - .set_native_capability(Capability { - write: true, - ..Default::default() - }); - - HfCore { - info: info.into(), - repo: HfRepo::new(RepoType::Dataset, repo_id, Some("main".to_string())), - root: "/".to_string(), - token: std::env::var("HF_OPENDAL_TOKEN").ok(), - endpoint: "https://huggingface.co".to_string(), - max_retries: 3, - #[cfg(feature = "xet")] - xet_enabled: _xet, - } + use base64::Engine; + + // --- Unit tests (no network required) --- + + #[test] + fn test_prepare_commit_file() { + let content = b"Hello, World!"; + let buf = Buffer::from(content.to_vec()); + let file = HfWriter::prepare_commit_file("data/test.txt", &buf); + + assert_eq!(file.path, "data/test.txt"); + assert_eq!(file.encoding, "base64"); + let decoded = base64::engine::general_purpose::STANDARD + .decode(&file.content) + .unwrap(); + assert_eq!(decoded, content); + } + + #[test] + fn test_prepare_commit_file_empty() { + let buf = Buffer::from(Vec::::new()); + let file = HfWriter::prepare_commit_file("empty.bin", &buf); + + assert_eq!(file.path, "empty.bin"); + assert_eq!(file.encoding, "base64"); + let decoded = base64::engine::general_purpose::STANDARD + .decode(&file.content) + .unwrap(); + assert!(decoded.is_empty()); } + // --- Integration tests (require HF_OPENDAL_DATASET and HF_OPENDAL_TOKEN) --- + #[tokio::test] #[ignore] - async fn test_upload_http() { - let core = testing_core(false); + async fn test_write_http() { + let op = testing_operator(); + op.write("test-file.txt", b"Hello, HuggingFace!".as_slice()) + .await + .expect("write should succeed"); + } - let test_data = b"Hello, HuggingFace!"; - let buffer = Buffer::from(test_data.as_slice()); + #[tokio::test] + #[ignore] + async fn test_write_http_with_content_type() { + let op = testing_operator(); + op.write_with("test.json", br#"{"test": "data"}"#.as_slice()) + .content_type("application/json") + .await + .expect("write with content type should succeed"); + } - let commit_file = HfWriter::upload_http("test-file.txt", buffer) + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_write_xet() { + let op = testing_xet_operator(); + op.write("test-xet.bin", b"Binary data for XET test".as_slice()) .await - .expect("upload should succeed"); + .expect("xet write should succeed"); + } - core.commit_files(vec![commit_file], vec![], vec![]) + /// Write a small text file (should use Regular upload mode — base64 inline + /// in commit) and verify the content roundtrips correctly. + #[tokio::test] + #[ignore] + async fn test_write_regular_roundtrip() { + let op = testing_operator(); + let path = "tests/regular-roundtrip.txt"; + let content = b"Small text file for regular upload."; + + op.write(path, content.as_slice()) .await - .expect("commit should succeed"); + .expect("write should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content); + + // Cleanup is best-effort; transient 500s from concurrent ops are OK. + let _ = op.delete(path).await; } + /// Write a 1 MB binary file with XET disabled. The preupload API should + /// classify this as LFS, and the LFS batch API should choose basic + /// (singlepart) transfer since the file is below the multipart threshold. #[tokio::test] #[ignore] - async fn test_write_once_http() { - let core = Arc::new(testing_core(false)); + async fn test_write_lfs_singlepart_roundtrip() { + let op = testing_operator(); + let path = "tests/lfs-singlepart.bin"; + let content: Vec = (0..1_048_576u32).map(|i| (i % 256) as u8).collect(); - let test_data = b"Test content for write_once"; - let buffer = Buffer::from(test_data.as_slice()); + op.write(path, content.clone()) + .await + .expect("LFS singlepart write should succeed"); - let writer = HfWriter::new(&core, "write-once-test.txt", OpWrite::default()); - let result = writer.write_once(buffer).await; + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content.as_slice()); - assert!(result.is_ok(), "write_once should succeed: {:?}", result); + let _ = op.delete(path).await; } - #[cfg(feature = "xet")] + /// Write a large binary file with XET disabled. The server decides + /// whether to use singlepart or multipart LFS transfer based on size. #[tokio::test] #[ignore] - async fn test_upload_xet() { - let core = testing_core(true); + async fn test_write_lfs_large_roundtrip() { + let op = testing_operator(); + let path = "tests/lfs-large.bin"; + // 12 MB of patterned data — above the ~10 MB multipart threshold. + let content: Vec = (0..12_000_000u32).map(|i| (i % 251) as u8).collect(); + + op.write(path, content.clone()) + .await + .expect("LFS large write should succeed"); - let test_data = b"Binary data for XET test"; - let buffer = Buffer::from(test_data.as_slice()); + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().len(), content.len()); + assert_eq!(data.to_bytes().as_ref(), content.as_slice()); - let result = HfWriter::upload_xet(&core, "test-xet.bin", buffer).await; - assert!(result.is_ok(), "xet upload should succeed: {:?}", result); + let _ = op.delete(path).await; } - #[cfg(feature = "xet")] + /// Verify stat returns correct metadata after writing. #[tokio::test] #[ignore] - async fn test_upload_and_commit_xet() { - let core = testing_core(true); + async fn test_write_and_stat() { + let op = testing_operator(); + let path = "tests/stat-after-write.txt"; + let content = b"Content for stat verification."; + + op.write(path, content.as_slice()) + .await + .expect("write should succeed"); + + let meta = op.stat(path).await.expect("stat should succeed"); + assert_eq!(meta.content_length(), content.len() as u64); + + let _ = op.delete(path).await; + } - let test_data = b"Binary data for XET commit test"; - let buffer = Buffer::from(test_data.as_slice()); + /// Overwriting an existing file should replace its content. + #[tokio::test] + #[ignore] + async fn test_write_overwrite() { + let op = testing_operator(); + let path = "tests/overwrite-test.txt"; - let lfs_file = HfWriter::upload_xet(&core, "test-xet.bin", buffer) + op.write(path, b"first version".as_slice()) .await - .expect("xet upload should succeed"); + .expect("first write should succeed"); - core.commit_files(vec![], vec![lfs_file], vec![]) + op.write(path, b"second version".as_slice()) .await - .expect("commit should succeed"); + .expect("overwrite should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), b"second version"); + + let _ = op.delete(path).await; } - #[cfg(feature = "xet")] + /// Full lifecycle: write → stat → read → delete → confirm gone. #[tokio::test] #[ignore] - async fn test_write_once_dispatches_to_xet() { - let core = Arc::new(testing_core(true)); + async fn test_write_delete_lifecycle() { + let op = testing_operator(); + let path = "tests/lifecycle-test.txt"; + + op.write(path, b"temporary file".as_slice()) + .await + .expect("write should succeed"); - let test_data = b"Binary content for XET dispatch"; - let buffer = Buffer::from(test_data.as_slice()); + assert!(op.stat(path).await.is_ok()); - let writer = HfWriter::new(&core, "test-file.bin", OpWrite::default()); - let result = writer.write_once(buffer).await; + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), b"temporary file"); - assert!( - result.is_ok(), - "write_once with binary file should use xet: {:?}", - result - ); + op.delete(path).await.expect("delete should succeed"); + assert!(op.stat(path).await.is_err()); } + /// Write an empty (0-byte) file and verify roundtrip. #[tokio::test] #[ignore] - async fn test_upload_with_content_type() { - let core = Arc::new(testing_core(false)); + async fn test_write_empty_file_roundtrip() { + let op = testing_operator(); + let path = "tests/empty-file.txt"; - let test_data = br#"{"test": "data"}"#; - let buffer = Buffer::from(test_data.as_slice()); + op.write(path, Vec::::new()) + .await + .expect("write empty file should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert!(data.to_bytes().is_empty()); + + let meta = op.stat(path).await.expect("stat should succeed"); + assert_eq!(meta.content_length(), 0); + + let _ = op.delete(path).await; + } + + /// Write a file in a deeply nested directory structure. + /// HuggingFace creates intermediate directories implicitly. + #[tokio::test] + #[ignore] + async fn test_write_nested_directory() { + let op = testing_operator(); + let path = "tests/deep/nested/dir/file.txt"; + let content = b"nested directory test"; + + op.write(path, content.as_slice()) + .await + .expect("write to nested path should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content); + + let _ = op.delete(path).await; + } + + /// Write a file with special characters in the path. + #[tokio::test] + #[ignore] + async fn test_write_special_characters_in_path() { + let op = testing_operator(); + let path = "tests/special chars (1).txt"; + let content = b"special character path test"; + + op.write(path, content.as_slice()) + .await + .expect("write with special chars should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content); + + let _ = op.delete(path).await; + } + + /// Upload identical LFS content to two different paths. The second + /// write should hit the LfsExists code path (LFS batch returns no + /// actions because the object already exists in storage). + #[tokio::test] + #[ignore] + async fn test_write_lfs_reupload() { + let op = testing_operator(); + let path1 = "tests/lfs-reupload-1.bin"; + let path2 = "tests/lfs-reupload-2.bin"; + let content: Vec = (0..1_048_576u32).map(|i| (i % 256) as u8).collect(); + + // First upload — should use LFS singlepart. + op.write(path1, content.clone()) + .await + .expect("first LFS write should succeed"); + + // Second upload of identical content to a different path — should hit LfsExists. + op.write(path2, content.clone()) + .await + .expect("LFS re-upload should succeed (LfsExists path)"); + + let data = op.read(path2).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content.as_slice()); + + let _ = op.delete(path1).await; + let _ = op.delete(path2).await; + } + + /// Delete a file and confirm read returns NotFound. + #[tokio::test] + #[ignore] + async fn test_delete_then_read() { + let op = testing_operator(); + let path = "tests/delete-then-read.txt"; - let mut op = OpWrite::default(); - op = op.with_content_type("application/json"); + op.write(path, b"will be deleted".as_slice()) + .await + .expect("write should succeed"); + + op.delete(path).await.expect("delete should succeed"); - let writer = HfWriter::new(&core, "test.json", op); - let result = writer.write_once(buffer).await; + let err = op + .read(path) + .await + .expect_err("read after delete should fail"); + assert_eq!(err.kind(), ErrorKind::NotFound); + } - assert!( - result.is_ok(), - "upload with content type should succeed: {:?}", - result - ); + /// Write multiple files, delete them all, and verify each is gone. + #[tokio::test] + #[ignore] + async fn test_batch_delete() { + let op = testing_operator(); + let paths = [ + "tests/batch-del-a.txt", + "tests/batch-del-b.txt", + "tests/batch-del-c.txt", + ]; + + for path in &paths { + op.write(path, b"batch delete test".as_slice()) + .await + .expect("write should succeed"); + } + + for path in &paths { + op.delete(path).await.expect("delete should succeed"); + } + + for path in &paths { + let err = op + .stat(path) + .await + .expect_err("stat should fail after delete"); + assert_eq!(err.kind(), ErrorKind::NotFound); + } } } From 24ea83f5a6e2f7716501fa1ded31375f40a5a751 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 00:39:08 +0100 Subject: [PATCH 10/25] style(hf): run taplo format --- core/services/hf/Cargo.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index ded041281dc0..1920722a748e 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -71,6 +71,8 @@ futures = { workspace = true } opendal-core = { path = "../../core", version = "0.55.0", features = [ "reqwest-rustls-tls", ] } -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = [ + "rustls-tls", +] } serde_json = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } From f4db6662c15b9c9bd4758f51e4ce0915d724155b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 09:27:29 +0100 Subject: [PATCH 11/25] chore(deps): remove tokio dependency and use backon for exponential backoff --- core/Cargo.lock | 1 + core/Cargo.toml | 2 +- core/services/hf/Cargo.toml | 5 +--- core/services/hf/src/core.rs | 49 ++++++++++++++---------------------- 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 5a0f8bec7158..393fca5e4afc 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -7275,6 +7275,7 @@ name = "opendal-service-hf" version = "0.55.0" dependencies = [ "async-trait", + "backon", "base64 0.22.1", "bytes", "cas_types", diff --git a/core/Cargo.toml b/core/Cargo.toml index 55e4754db988..08dbc67e3e80 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -147,7 +147,7 @@ services-gridfs = ["dep:opendal-service-gridfs"] services-hdfs = ["dep:opendal-service-hdfs"] services-hdfs-native = ["dep:opendal-service-hdfs-native"] services-hf = ["dep:opendal-service-hf"] -services-hf-xet = ["dep:opendal-service-hf", "opendal-service-hf?/xet"] +services-hf-xet = ["dep:opendal-service-hf", "opendal-service-hf/xet"] services-http = ["dep:opendal-service-http"] services-huggingface = ["services-hf"] services-huggingface-xet = ["services-hf-xet"] diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index 1920722a748e..8039f98065a9 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -37,13 +37,12 @@ xet = [ "dep:xet-data", "dep:cas_types", "dep:xet-utils", - "tokio/sync", - "tokio/rt", "dep:futures", "dep:async-trait", ] [dependencies] +backon = "1.6" base64 = { workspace = true } bytes = { workspace = true } http = { workspace = true } @@ -54,8 +53,6 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } sha2 = "0.10" tempfile = "3" -tokio = { workspace = true, features = ["time"] } - # XET storage protocol support (optional) async-trait = { version = "0.1", optional = true } cas_types = { git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 2345fe1c8a23..2b243aae186b 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -18,6 +18,8 @@ use std::fmt::Debug; use std::sync::Arc; +use backon::ExponentialBuilder; +use backon::Retryable; use bytes::Buf; use bytes::Bytes; use http::Request; @@ -237,43 +239,30 @@ impl HfCore { self.repo.uri(&self.root, path) } - /// Exponential backoff: 200ms, 400ms, 800ms, … capped at ~6s. - async fn backoff(attempt: usize) { - let millis = 200u64 * (1u64 << attempt.min(5)); - tokio::time::sleep(std::time::Duration::from_millis(millis)).await; - } - /// Send a request with retries, returning the successful response. /// /// Retries on commit conflicts (HTTP 412) and transient server errors /// (HTTP 5xx) up to `self.max_retries` attempts with exponential backoff. pub(super) async fn send(&self, req: Request) -> Result> { + let backoff = ExponentialBuilder::default() + .with_min_delay(std::time::Duration::from_millis(200)) + .with_max_delay(std::time::Duration::from_millis(6400)) + .with_max_times(self.max_retries.saturating_sub(1)); let client = self.info.http_client(); - let mut attempt = 0; - loop { - match client.send(req.clone()).await { - Ok(resp) if resp.status().is_success() => { - return Ok(resp); - } - Ok(resp) => { - attempt += 1; - let err = parse_error(resp); - let retryable = - err.kind() == ErrorKind::ConditionNotMatch || err.is_temporary(); - if attempt >= self.max_retries || !retryable { - return Err(err); - } - Self::backoff(attempt).await; - } - Err(err) => { - attempt += 1; - if attempt >= self.max_retries || !err.is_temporary() { - return Err(err); - } - Self::backoff(attempt).await; - } + + let send_once = || async { + let resp = client.send(req.clone()).await?; + if resp.status().is_success() { + Ok(resp) + } else { + Err(parse_error(resp)) } - } + }; + + send_once + .retry(backoff) + .when(|e: &Error| e.kind() == ErrorKind::ConditionNotMatch || e.is_temporary()) + .await } /// Send a request, check for success, and deserialize the JSON response. From a05b3103d76ebd24885a07bf39bf71fd1cc94a6a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 09:50:30 +0100 Subject: [PATCH 12/25] chore(deps): align ctor version in xet --- .github/workflows/ci_core.yml | 2 +- core/Cargo.lock | 53 ++++++++++++++++++++++------------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci_core.yml b/.github/workflows/ci_core.yml index 09d6f46ea66e..26466107755f 100644 --- a/.github/workflows/ci_core.yml +++ b/.github/workflows/ci_core.yml @@ -94,7 +94,7 @@ jobs: cargo update zerofrom --precise 0.1.5 cargo update idna_adapter --precise 1.2.0 cargo update litemap --precise 0.7.4 - cargo update ctor@0.6.3 --precise 0.6.1 + cargo update ctor --precise 0.6.1 cargo +${OPENDAL_MSRV} clippy -- -D warnings build_default_features: diff --git a/core/Cargo.lock b/core/Cargo.lock index 393fca5e4afc..f3c42e4a38c8 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1552,7 +1552,7 @@ dependencies = [ [[package]] name = "cas_client" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "anyhow", "async-trait", @@ -1601,7 +1601,7 @@ dependencies = [ [[package]] name = "cas_object" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "anyhow", "blake3", @@ -1628,7 +1628,7 @@ dependencies = [ [[package]] name = "cas_types" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "merklehash", "serde", @@ -1897,6 +1897,16 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + [[package]] name = "colored" version = "3.0.0" @@ -2577,7 +2587,7 @@ dependencies = [ [[package]] name = "data" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "anyhow", "async-trait", @@ -2622,7 +2632,7 @@ checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "deduplication" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "async-trait", "bytes", @@ -3178,7 +3188,7 @@ dependencies = [ [[package]] name = "error_printer" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "tracing", ] @@ -3351,7 +3361,7 @@ dependencies = [ [[package]] name = "file_reconstruction" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "async-trait", "bytes", @@ -3371,7 +3381,7 @@ dependencies = [ [[package]] name = "file_utils" version = "0.14.2" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "colored", "lazy_static", @@ -3948,7 +3958,7 @@ version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" dependencies = [ - "approx 0.5.1", + "approx 0.4.0", "num-traits", "rstar 0.10.0", "rstar 0.11.0", @@ -4580,7 +4590,7 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hub_client" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "anyhow", "async-trait", @@ -4758,7 +4768,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.62.2", + "windows-core 0.57.0", ] [[package]] @@ -5757,7 +5767,7 @@ dependencies = [ [[package]] name = "mdb_shard" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "anyhow", "async-trait", @@ -5838,7 +5848,7 @@ dependencies = [ [[package]] name = "merklehash" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "base64 0.22.1", "blake3", @@ -8580,7 +8590,7 @@ dependencies = [ [[package]] name = "progress_tracking" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "async-trait", "merklehash", @@ -8680,7 +8690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.10.5", "log", "multimap", "once_cell", @@ -8715,7 +8725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.111", @@ -8728,7 +8738,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.111", @@ -12263,12 +12273,15 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utils" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "async-trait", "bincode", "bytes", +<<<<<<< HEAD "chrono", +======= +>>>>>>> 9a258842f (chore(deps): align ctor version in xet) "ctor", "derivative", "duration-str", @@ -13448,7 +13461,7 @@ dependencies = [ [[package]] name = "xet_config" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "const-str", "konst", @@ -13458,7 +13471,7 @@ dependencies = [ [[package]] name = "xet_runtime" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#59afe24c7867cb22b4b630eb6ac22485ee6b0478" +source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" dependencies = [ "dirs", "error_printer", From 2a7229f52f33d9f0088b8e2e524616d4128ad49b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 11:01:15 +0100 Subject: [PATCH 13/25] chore(deps): remove not unused tempfile dependency --- core/Cargo.lock | 1 - core/services/hf/Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index f3c42e4a38c8..95e538342766 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -7299,7 +7299,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "tempfile", "tokio", "utils", ] diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index 8039f98065a9..a9b166f450bb 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -52,7 +52,6 @@ percent-encoding = "2" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } sha2 = "0.10" -tempfile = "3" # XET storage protocol support (optional) async-trait = { version = "0.1", optional = true } cas_types = { git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } From f651476e79a3882e4100f609fa151c65cde4e887 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 19:13:50 +0100 Subject: [PATCH 14/25] refactor(hf): use a more recent streaming xet client --- core/Cargo.lock | 1220 ++++++++++++++++--------------- core/services/hf/Cargo.toml | 7 +- core/services/hf/src/backend.rs | 8 +- core/services/hf/src/core.rs | 185 ++++- core/services/hf/src/reader.rs | 116 +-- core/services/hf/src/writer.rs | 671 ++++------------- 6 files changed, 943 insertions(+), 1264 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 95e538342766..747b75c56f16 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -7,10 +7,6 @@ name = "Inflector" version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" -dependencies = [ - "lazy_static", - "regex", -] [[package]] name = "addr" @@ -38,7 +34,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "version_check", ] @@ -157,9 +153,9 @@ checksum = "90c6333e01ba7235575b6ab53e5af10f1c327927fd97c36462917e289557ea64" [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] name = "approx" @@ -181,9 +177,9 @@ dependencies = [ [[package]] name = "ar_archive_writer" -version = "0.2.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" dependencies = [ "object", ] @@ -199,9 +195,12 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" +dependencies = [ + "rustversion", +] [[package]] name = "arcstr" @@ -278,7 +277,7 @@ checksum = "affbba0d438add06462a0371997575927bc05052f7ec486e7a4ca405c956c3d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -350,7 +349,7 @@ dependencies = [ "futures-timer", "futures-util", "http 1.4.0", - "indexmap 2.12.1", + "indexmap 2.13.0", "mime", "multer", "num-traits", @@ -365,26 +364,26 @@ dependencies = [ [[package]] name = "async-graphql-derive" -version = "7.0.16" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29db05b624fb6352fc11bfe30c54ab1b16a1fe937d7c05a783f4e88ef1292b3b" +checksum = "2e6cbeadc8515e66450fba0985ce722192e28443697799988265d86304d7cc68" dependencies = [ "Inflector", "async-graphql-parser", - "darling 0.20.11", + "darling 0.23.0", "proc-macro-crate", "proc-macro2", "quote", - "strum 0.26.3", - "syn 2.0.111", - "thiserror 1.0.69", + "strum", + "syn 2.0.114", + "thiserror 2.0.18", ] [[package]] name = "async-graphql-parser" -version = "7.0.16" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4904895044116aab098ca82c6cec831ec43ed99efd04db9b70a390419bc88c5b" +checksum = "e64ef70f77a1c689111e52076da1cd18f91834bcb847de0a9171f83624b07fbf" dependencies = [ "async-graphql-value", "pest", @@ -394,12 +393,12 @@ dependencies = [ [[package]] name = "async-graphql-value" -version = "7.0.16" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0cde74de18e3a00c5dd5cfa002ab6f532e1a06c2a79ee6671e2fc353b400b92" +checksum = "3e3ef112905abea9dea592fc868a6873b10ebd3f983e83308f995d6284e9ba41" dependencies = [ "bytes", - "indexmap 2.12.1", + "indexmap 2.13.0", "serde", "serde_json", ] @@ -424,9 +423,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.4.1" +version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ "event-listener 5.4.1", "event-listener-strategy", @@ -452,7 +451,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -500,7 +499,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -517,7 +516,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -563,7 +562,7 @@ checksum = "fd73835ad7deb4bd2b389e6f10333b143f025d607c55ca04c66a0bcc6bb2fc6d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -575,7 +574,7 @@ dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -663,9 +662,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.1" +version = "1.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" +checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" dependencies = [ "aws-lc-sys", "zeroize", @@ -673,9 +672,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.34.0" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" +checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" dependencies = [ "cc", "cmake", @@ -911,7 +910,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "h2 0.3.27", - "h2 0.4.12", + "h2 0.4.13", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -922,12 +921,12 @@ dependencies = [ "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", - "tower 0.5.2", + "tower 0.5.3", "tracing", ] @@ -1079,11 +1078,11 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ - "axum-core 0.5.5", + "axum-core 0.5.6", "bytes", "form_urlencoded", "futures-util", @@ -1104,7 +1103,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.2", "tokio", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -1129,9 +1128,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", @@ -1187,9 +1186,9 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.8.1" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bcrypt" @@ -1199,7 +1198,7 @@ checksum = "e65938ed058ef47d92cf8b346cc76ef48984572ade631927e9937b5ffc7662c7" dependencies = [ "base64 0.22.1", "blowfish", - "getrandom 0.2.16", + "getrandom 0.2.17", "subtle", "zeroize", ] @@ -1231,7 +1230,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1251,7 +1250,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1307,15 +1306,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if 1.0.4", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -1379,7 +1379,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1391,10 +1391,10 @@ dependencies = [ "ahash 0.8.12", "base64 0.22.1", "bitvec", - "getrandom 0.2.16", + "getrandom 0.2.17", "getrandom 0.3.4", "hex", - "indexmap 2.12.1", + "indexmap 2.13.0", "js-sys", "once_cell", "rand 0.9.2", @@ -1418,9 +1418,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytecheck" @@ -1452,9 +1452,9 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "byteorder" @@ -1464,9 +1464,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" dependencies = [ "serde", ] @@ -1520,9 +1520,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "276a59bf2b2c967788139340c9f0c5b12d7fd6630315c15c217e559de85d2609" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" dependencies = [ "serde_core", ] @@ -1552,11 +1552,11 @@ dependencies = [ [[package]] name = "cas_client" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "anyhow", "async-trait", - "axum 0.8.7", + "axum 0.8.8", "base64 0.22.1", "bytes", "cas_object", @@ -1584,7 +1584,7 @@ dependencies = [ "serde_json", "statrs", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-retry", "tower-http", @@ -1601,7 +1601,7 @@ dependencies = [ [[package]] name = "cas_object" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "anyhow", "blake3", @@ -1618,7 +1618,7 @@ dependencies = [ "more-asserts", "rand 0.9.2", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "utils", @@ -1628,12 +1628,12 @@ dependencies = [ [[package]] name = "cas_types" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "merklehash", "serde", "serde_repr", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -1662,9 +1662,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.49" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -1764,9 +1764,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -1826,9 +1826,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.53" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", "clap_derive", @@ -1836,9 +1836,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.53" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" dependencies = [ "anstream", "anstyle", @@ -1849,27 +1849,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "clap_lex" -version = "0.7.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" -version = "0.1.54" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" dependencies = [ "cc", ] @@ -1882,9 +1882,9 @@ checksum = "d7ee2cfacbd29706479902b06d75ad8f1362900836aa32799eabc7e004bfd854" [[package]] name = "coarsetime" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91849686042de1b41cd81490edc83afbcb0abe5a9b6f2c4114f23ce8cca1bcf4" +checksum = "e58eb270476aa4fc7843849f8a35063e8743b4dbcdf6dd0f8ea0886980c204c2" dependencies = [ "libc", "wasix", @@ -1909,11 +1909,11 @@ dependencies = [ [[package]] name = "colored" -version = "3.0.0" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1948,9 +1948,9 @@ dependencies = [ [[package]] name = "compio-buf" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa3ebe7f9830a33aa801a223411c8dc011c3271cd5beed56284c86d227bc32e" +checksum = "5ebb4036bf394915196c09362e4fd5581ee8bf0f3302ab598bff9d646aea2061" dependencies = [ "arrayvec", "bytes", @@ -1989,7 +1989,7 @@ dependencies = [ "paste", "polling", "slab", - "socket2 0.6.1", + "socket2 0.6.2", "windows-sys 0.61.2", ] @@ -2045,7 +2045,7 @@ dependencies = [ "either", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.6.2", "widestring", "windows-sys 0.61.2", ] @@ -2069,7 +2069,7 @@ dependencies = [ "pin-project-lite", "scoped-tls", "slab", - "socket2 0.6.1", + "socket2 0.6.2", "windows-sys 0.61.2", ] @@ -2120,7 +2120,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -2142,9 +2142,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "convert_case" @@ -2403,7 +2403,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2482,6 +2482,16 @@ dependencies = [ "darling_macro 0.21.3", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + [[package]] name = "darling_core" version = "0.14.4" @@ -2507,7 +2517,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2521,7 +2531,20 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.111", + "syn 2.0.114", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.114", ] [[package]] @@ -2543,7 +2566,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2554,7 +2577,18 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.111", + "syn 2.0.114", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn 2.0.114", ] [[package]] @@ -2587,7 +2621,7 @@ dependencies = [ [[package]] name = "data" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "anyhow", "async-trait", @@ -2614,7 +2648,7 @@ dependencies = [ "serde_json", "sha2", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "ulid", @@ -2625,14 +2659,14 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "deduplication" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "async-trait", "bytes", @@ -2669,9 +2703,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -2707,7 +2741,7 @@ checksum = "d65d7ce8132b7c0e54497a4d9a55a1c2a0912a0d786cf894472ba818fba45762" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2718,7 +2752,7 @@ checksum = "ef941ded77d15ca19b40374869ac6000af1c9f2a4c0f3d4c70926287e6364a8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2729,7 +2763,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2750,7 +2784,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2760,7 +2794,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2771,29 +2805,29 @@ checksum = "64b697ac90ff296f0fc031ee5a61c7ac31fb9fff50e3fb32873b09223613fc0c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "derive_more" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ "convert_case", "proc-macro2", "quote", "rustc_version", - "syn 2.0.111", + "syn 2.0.114", "unicode-xid", ] @@ -2805,7 +2839,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2891,7 +2925,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2916,7 +2950,7 @@ checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2946,7 +2980,7 @@ checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" dependencies = [ "cfg-if 1.0.4", "libc", - "socket2 0.6.1", + "socket2 0.6.2", "windows-sys 0.60.2", ] @@ -2973,9 +3007,9 @@ dependencies = [ [[package]] name = "dtoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" [[package]] name = "dtoa-short" @@ -3016,7 +3050,7 @@ dependencies = [ "chrono", "rust_decimal", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "winnow", ] @@ -3115,9 +3149,9 @@ dependencies = [ [[package]] name = "ena" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +checksum = "eabffdaee24bd1bf95c5ef7cec31260444317e72ea56c4c91750e8b7ee58d5f1" dependencies = [ "log", ] @@ -3146,7 +3180,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3188,7 +3222,7 @@ dependencies = [ [[package]] name = "error_printer" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "tracing", ] @@ -3206,14 +3240,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acfe553027cd07fc5fafa81a84f19a7a87eaffaccd2162b6db05e8d6ce98084" dependencies = [ "http 1.4.0", - "prost 0.14.1", + "prost 0.14.3", "tokio", "tokio-stream", - "tonic 0.14.2", + "tonic 0.14.3", "tonic-build", "tonic-prost", "tonic-prost-build", - "tower 0.5.2", + "tower 0.5.3", "tower-service", ] @@ -3293,11 +3327,11 @@ dependencies = [ [[package]] name = "fastpool" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6777b4743839a42fd32141d95d7adc4c98e3a1d5200a2598cf32ffd102c81e1" +checksum = "505402589aaeb2f89357bf8dfb259046c693a3c9a68b874a0ca8c0fb99e0fb4c" dependencies = [ - "mea 0.5.3", + "mea", "scopeguard", ] @@ -3336,7 +3370,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3345,7 +3379,7 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] @@ -3361,7 +3395,7 @@ dependencies = [ [[package]] name = "file_reconstruction" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "async-trait", "bytes", @@ -3370,7 +3404,7 @@ dependencies = [ "merklehash", "more-asserts", "progress_tracking", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "utils", @@ -3381,7 +3415,7 @@ dependencies = [ [[package]] name = "file_utils" version = "0.14.2" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "colored", "lazy_static", @@ -3394,9 +3428,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -3530,7 +3564,7 @@ checksum = "9be610412e5a92d89855fb15b099a57792b7dbdcf8ac74c5a0e24d9b7b1b6f7f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "try_map", ] @@ -3568,7 +3602,7 @@ dependencies = [ "mixtrics", "pin-project", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3588,7 +3622,7 @@ dependencies = [ "parking_lot 0.12.5", "pin-project", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "twox-hash", ] @@ -3621,7 +3655,7 @@ dependencies = [ "parking_lot 0.12.5", "pin-project", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3653,7 +3687,7 @@ dependencies = [ "pin-project", "rand 0.9.2", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "twox-hash", @@ -3788,7 +3822,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3798,7 +3832,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" dependencies = [ "futures-io", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", ] @@ -3865,7 +3899,7 @@ dependencies = [ "g2poly", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3958,7 +3992,7 @@ version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" dependencies = [ - "approx 0.4.0", + "approx 0.5.1", "num-traits", "rstar 0.10.0", "rstar 0.11.0", @@ -3970,9 +4004,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" dependencies = [ "libm", ] @@ -3990,9 +4024,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if 1.0.4", "js-sys", @@ -4022,12 +4056,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" dependencies = [ "cfg-if 1.0.4", - "js-sys", "libc", "r-efi", "wasip2", "wasip3", - "wasm-bindgen", ] [[package]] @@ -4116,7 +4148,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.12.1", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -4125,9 +4157,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -4135,7 +4167,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.12.1", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -4203,6 +4235,10 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", +] [[package]] name = "hashbrown" @@ -4263,13 +4299,13 @@ dependencies = [ "md-5", "num-traits", "once_cell", - "prost 0.14.1", + "prost 0.14.3", "prost-types", "rand 0.9.2", "regex", "roxmltree", - "socket2 0.6.1", - "thiserror 2.0.17", + "socket2 0.6.2", + "thiserror 2.0.18", "tokio", "url", "uuid", @@ -4439,7 +4475,7 @@ dependencies = [ "once_cell", "rand 0.9.2", "ring", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tokio", "tracing", @@ -4462,7 +4498,7 @@ dependencies = [ "rand 0.9.2", "resolv-conf", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -4496,18 +4532,18 @@ dependencies = [ [[package]] name = "hotpath" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d5a16b6aedd67d44fbeb619e63d9fa649d77cabb25a4154f87deec8eeaff4a" +checksum = "3554f9fc054c95f68e9f31196ca3aa77c6ce299f2e5877788e68168d01b7cfab" dependencies = [ "hotpath-macros", ] [[package]] name = "hotpath-macros" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0326ae0362362c976fba738ba2dd585e63bf92264b7070f20219453c9a8cd6ae" +checksum = "9e8cf5fa828dd9b99de52bb85f9027c0d5205971cb3c45842b4bf6d7c7f6c679" [[package]] name = "html5ever" @@ -4590,7 +4626,7 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hub_client" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "anyhow", "async-trait", @@ -4598,7 +4634,7 @@ dependencies = [ "reqwest 0.13.2", "reqwest-middleware", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "urlencoding", ] @@ -4642,7 +4678,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2 0.4.12", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -4680,13 +4716,13 @@ dependencies = [ "http 1.4.0", "hyper 1.8.1", "hyper-util", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] @@ -4732,14 +4768,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", @@ -4748,7 +4783,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.6.2", "system-configuration", "tokio", "tower-service", @@ -4758,9 +4793,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -4768,7 +4803,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.57.0", + "windows-core 0.62.2", ] [[package]] @@ -4828,9 +4863,9 @@ checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ "icu_collections", "icu_locale_core", @@ -4842,9 +4877,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" @@ -4907,9 +4942,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -4936,9 +4971,9 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "strum 0.27.2", - "syn 2.0.111", - "thiserror 2.0.17", + "strum", + "syn 2.0.114", + "thiserror 2.0.18", ] [[package]] @@ -4983,9 +5018,9 @@ dependencies = [ [[package]] name = "io_uring_buf_ring" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe9ac631d954bb17eee5c932bd71bce3d5726c949c27729dd5c946b0de65471" +checksum = "1838759bb8c2f24cf05a35429d83145c4aa6af43f8ad38477295e12a7320a80e" dependencies = [ "bytes", "io-uring 0.7.11", @@ -5012,9 +5047,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -5073,9 +5108,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "java-locator" @@ -5088,9 +5123,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a87d9b8105c23642f50cbbae03d1f75d8422c5cb98ce7ee9271f7ff7505be6b8" +checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -5105,20 +5140,20 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b787bebb543f8969132630c51fd0afab173a86c6abae56ff3b9e5e3e3f9f6e58" +checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "jiff-tzdb" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" [[package]] name = "jiff-tzdb-platform" @@ -5245,9 +5280,9 @@ dependencies = [ [[package]] name = "lazy-regex" -version = "3.4.2" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "191898e17ddee19e60bccb3945aa02339e81edd4a8c50e21fd4d48cdecda7b29" +checksum = "6bae91019476d3ec7147de9aa291cadb6d870abf2f3015d2da73a90325ac1496" dependencies = [ "lazy-regex-proc_macros", "once_cell", @@ -5256,14 +5291,14 @@ dependencies = [ [[package]] name = "lazy-regex-proc_macros" -version = "3.4.2" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35dc8b0da83d1a9507e12122c80dea71a9c7c613014347392483a83ea593e04" +checksum = "4de9c1e1439d8b7b3061b2d209809f447ca33241733d9a3c01eabf2dc8d94358" dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5298,15 +5333,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.182" +version = "0.2.181" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" [[package]] name = "libfuzzer-sys" -version = "0.4.10" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" dependencies = [ "arbitrary", "cc", @@ -5334,9 +5369,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" @@ -5346,7 +5381,7 @@ checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags 2.10.0", "libc", - "redox_syscall 0.7.1", + "redox_syscall 0.7.0", ] [[package]] @@ -5525,7 +5560,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2a4674e549a59eeac8e301584143186c433181bdc5460046a130becedef6a3d" dependencies = [ - "colored", + "colored 3.1.1", "jiff", "logforth-core", ] @@ -5601,7 +5636,7 @@ dependencies = [ "macro_magic_core", "macro_magic_macros", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5615,7 +5650,7 @@ dependencies = [ "macro_magic_core_macros", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5626,7 +5661,7 @@ checksum = "b02abfe41815b5bd98dbd4260173db2c116dda171dc0fe7838cb206333b83308" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5637,7 +5672,7 @@ checksum = "73ea28ee64b88876bf45277ed9a5817c1817df061a74f2b988971a12570e5869" dependencies = [ "macro_magic_core", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5720,7 +5755,7 @@ checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5767,7 +5802,7 @@ dependencies = [ [[package]] name = "mdb_shard" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "anyhow", "async-trait", @@ -5786,7 +5821,7 @@ dependencies = [ "serde", "static_assertions", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "utils", @@ -5796,27 +5831,18 @@ dependencies = [ [[package]] name = "mea" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef98beae251f03af02d54d7964b1f5fbc1a042bbf7e2296e5261d2962a762dda" -dependencies = [ - "slab", -] - -[[package]] -name = "mea" -version = "0.6.0" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe9d4ca3e05f8356153d218e0503d5866dcc214cccdb2ff80d42a023837343e" +checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" dependencies = [ "slab", ] [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" @@ -5848,7 +5874,7 @@ dependencies = [ [[package]] name = "merklehash" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "base64 0.22.1", "blake3", @@ -5890,7 +5916,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -5975,9 +6001,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.12" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "b4ac832c50ced444ef6be0767a008b02c106a909ba79d1d830501e94b96f6b7e" dependencies = [ "async-lock", "crossbeam-channel", @@ -6013,9 +6039,9 @@ checksum = "224484c5d09285a7b8cb0a0c117e847ebd14cb6e4470ecf68cdb89c503b0edb9" [[package]] name = "mongodb" -version = "3.4.1" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12f5c20217413bed97c613714e6d6dfe39ef59dd79a68999f1043b0566192975" +checksum = "803dd859e8afa084c255a8effd8000ff86f7c8076a50cd6d8c99e8f3496f75c2" dependencies = [ "base64 0.22.1", "bitflags 2.10.0", @@ -6037,36 +6063,36 @@ dependencies = [ "percent-encoding", "rand 0.9.2", "rustc_version_runtime", - "rustls 0.23.35", + "rustls 0.23.36", "rustversion", "serde", "serde_bytes", "serde_with", "sha1", "sha2", - "socket2 0.6.1", + "socket2 0.6.2", "stringprep", "strsim 0.11.1", "take_mut", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-rustls 0.26.4", "tokio-util", "typed-builder", "uuid", - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] name = "mongodb-internal-macros" -version = "3.4.1" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20033442aa13664e70bc9f8be1bacabebf6a31b6d4bb5608ceb99c4ec96e9951" +checksum = "a973ef3dd3dbc6f6e65bbdecfd9ec5e781b9e7493b0f369a7c62e35d8e5ae2c8" dependencies = [ "macro_magic", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6100,7 +6126,7 @@ checksum = "176a5f5e69613d9e88337cf2a65e11135332b4efbcc628404a7c555e4452084c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6170,7 +6196,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] @@ -6249,9 +6275,9 @@ dependencies = [ [[package]] name = "noisy_float" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978fe6e6ebc0bf53de533cd456ca2d9de13de13856eda1518a285d7705a213af" +checksum = "c16843be85dd410c6a12251c4eca0dd1d3ee8c5725f746c4d5e0fdcec0a864b2" dependencies = [ "num-traits", ] @@ -6274,9 +6300,9 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "ntapi" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" dependencies = [ "winapi", ] @@ -6339,7 +6365,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -6413,18 +6439,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", "bytes", @@ -6435,7 +6461,7 @@ dependencies = [ "itertools 0.14.0", "parking_lot 0.12.5", "percent-encoding", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -6621,7 +6647,7 @@ dependencies = [ "log", "logforth", "md-5", - "mea 0.6.0", + "mea", "moka", "percent-encoding", "pretty_assertions", @@ -6712,7 +6738,7 @@ version = "0.55.0" dependencies = [ "futures", "http 1.4.0", - "mea 0.6.0", + "mea", "opendal-core", "tokio", ] @@ -6937,7 +6963,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7024,7 +7050,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7122,7 +7148,7 @@ version = "0.55.0" dependencies = [ "bytes", "http 1.4.0", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7204,7 +7230,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7248,7 +7274,7 @@ name = "opendal-service-gridfs" version = "0.55.0" dependencies = [ "futures", - "mea 0.6.0", + "mea", "mongodb", "opendal-core", "serde", @@ -7298,7 +7324,6 @@ dependencies = [ "reqwest 0.12.24", "serde", "serde_json", - "sha2", "tokio", "utils", ] @@ -7347,7 +7372,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7404,7 +7429,7 @@ name = "opendal-service-mongodb" version = "0.55.0" dependencies = [ "anyhow", - "mea 0.6.0", + "mea", "mongodb", "opendal-core", "serde", @@ -7428,7 +7453,7 @@ dependencies = [ name = "opendal-service-mysql" version = "0.55.0" dependencies = [ - "mea 0.6.0", + "mea", "opendal-core", "serde", "sqlx", @@ -7456,7 +7481,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7520,7 +7545,7 @@ dependencies = [ name = "opendal-service-postgresql" version = "0.55.0" dependencies = [ - "mea 0.6.0", + "mea", "opendal-core", "serde", "sqlx", @@ -7590,7 +7615,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7627,7 +7652,7 @@ dependencies = [ name = "opendal-service-sqlite" version = "0.55.0" dependencies = [ - "mea 0.6.0", + "mea", "opendal-core", "serde", "sqlx", @@ -7638,7 +7663,7 @@ dependencies = [ name = "opendal-service-surrealdb" version = "0.55.0" dependencies = [ - "mea 0.6.0", + "mea", "opendal-core", "serde", "surrealdb", @@ -7663,7 +7688,7 @@ dependencies = [ name = "opendal-service-tikv" version = "0.55.0" dependencies = [ - "mea 0.6.0", + "mea", "opendal-core", "serde", "tikv-client", @@ -7724,7 +7749,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "quick-xml", "serde", @@ -7739,7 +7764,7 @@ dependencies = [ "bytes", "http 1.4.0", "log", - "mea 0.6.0", + "mea", "opendal-core", "serde", "serde_json", @@ -7787,7 +7812,7 @@ dependencies = [ "once_cell", "shell-escape", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", ] @@ -7839,7 +7864,7 @@ dependencies = [ "openssh", "openssh-sftp-protocol-error", "ssh_format_error", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", ] @@ -7865,7 +7890,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42b54df62ccfd9a7708a83a9d60c46293837e478f9f4c0829360dcfa60ede8d2" dependencies = [ "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "vec-strings", ] @@ -7892,7 +7917,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -7903,9 +7928,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-probe" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-sys" @@ -7929,7 +7954,7 @@ dependencies = [ "futures-sink", "js-sys", "pin-project-lite", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] @@ -7957,11 +7982,11 @@ dependencies = [ "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", - "prost 0.14.1", - "reqwest 0.12.24", - "thiserror 2.0.17", + "prost 0.14.3", + "reqwest", + "thiserror 2.0.18", "tokio", - "tonic 0.14.2", + "tonic 0.14.3", "tracing", ] @@ -7973,8 +7998,8 @@ checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost 0.14.1", - "tonic 0.14.2", + "prost 0.14.3", + "tonic 0.14.3", "tonic-prost", ] @@ -7990,7 +8015,7 @@ dependencies = [ "opentelemetry", "percent-encoding", "rand 0.9.2", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", ] @@ -8209,16 +8234,16 @@ dependencies = [ "fs2", "linked-hash-map", "rand 0.9.2", - "thiserror 2.0.17", + "thiserror 2.0.18", "unsigned-varint", "zigzag", ] [[package]] name = "pest" -version = "2.8.4" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -8231,17 +8256,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset 0.4.2", - "indexmap 2.12.1", + "indexmap 2.13.0", ] [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset 0.5.7", - "indexmap 2.12.1", + "hashbrown 0.15.5", + "indexmap 2.13.0", ] [[package]] @@ -8294,7 +8320,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "unicase", ] @@ -8331,7 +8357,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -8455,15 +8481,15 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" dependencies = [ "portable-atomic", ] @@ -8515,7 +8541,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -8552,14 +8578,14 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -8589,7 +8615,7 @@ dependencies = [ [[package]] name = "progress_tracking" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "async-trait", "merklehash", @@ -8626,7 +8652,7 @@ dependencies = [ "parking_lot 0.12.5", "procfs", "protobuf", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -8649,7 +8675,7 @@ checksum = "9adf1691c04c0a5ff46ff8f262b58beb07b0dbb61f96f9f54f6cbd82106ed87f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -8674,33 +8700,32 @@ dependencies = [ [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", - "prost-derive 0.14.1", + "prost-derive 0.14.3", ] [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.10.5", + "itertools 0.14.0", "log", "multimap", - "once_cell", - "petgraph 0.7.1", + "petgraph 0.8.3", "prettyplease", - "prost 0.14.1", + "prost 0.14.3", "prost-types", "pulldown-cmark 0.13.0", "pulldown-cmark-to-cmark", "regex", - "syn 2.0.111", + "syn 2.0.114", "tempfile", ] @@ -8714,7 +8739,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -8724,32 +8749,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "prost 0.14.1", + "prost 0.14.3", ] [[package]] @@ -8780,9 +8805,9 @@ checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" [[package]] name = "psm" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" dependencies = [ "ar_archive_writer", "cc", @@ -8832,9 +8857,9 @@ dependencies = [ [[package]] name = "pulldown-cmark-to-cmark" -version = "21.1.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark 0.13.0", ] @@ -8888,9 +8913,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.1", - "rustls 0.23.35", - "socket2 0.6.1", - "thiserror 2.0.17", + "rustls 0.23.36", + "socket2 0.6.2", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -8909,10 +8934,10 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash 2.1.1", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -8927,16 +8952,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.6.2", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -8995,7 +9020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -9025,7 +9050,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -9043,14 +9068,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -9135,9 +9160,9 @@ dependencies = [ [[package]] name = "redis" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfe20977fe93830c0e9817a16fbf1ed1cfd8d4bba366087a1841d2c6033c251" +checksum = "e969d1d702793536d5fda739a82b88ad7cbe7d04f8386ee8cd16ad3eff4854a5" dependencies = [ "arc-swap", "arcstr", @@ -9155,11 +9180,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rand 0.9.2", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "ryu", "sha1_smol", - "socket2 0.6.1", + "socket2 0.6.2", "tokio", "tokio-native-tls", "tokio-rustls 0.26.4", @@ -9188,9 +9213,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ "bitflags 2.10.0", ] @@ -9201,7 +9226,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", "thiserror 1.0.69", ] @@ -9212,9 +9237,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -9234,7 +9259,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -9251,9 +9276,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -9263,9 +9288,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -9274,15 +9299,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "rend" @@ -9304,7 +9329,7 @@ dependencies = [ "base64 0.22.1", "chrono", "form_urlencoded", - "getrandom 0.2.16", + "getrandom 0.2.17", "hex", "hmac", "home", @@ -9341,9 +9366,9 @@ dependencies = [ [[package]] name = "reqsign-aws-v4" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4510c2a3e42b653cf788d560a3d54b0ae4cc315a62aaba773554f18319c0db0b" +checksum = "ab367a07c335a3eaa22395a9d9b0031ac73aee5893573281b2fa27bf97dc94f2" dependencies = [ "anyhow", "async-trait", @@ -9385,9 +9410,9 @@ dependencies = [ [[package]] name = "reqsign-file-read-tokio" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "669ea66036266a9ac371d2e63cc7d345e69994da0168b4e6f3487fe21e126f76" +checksum = "702f12a867bf8e507de907fa0f4d75b96469ace7edd33fcc1fc8a8ef58f3c8d2" dependencies = [ "anyhow", "async-trait", @@ -9414,9 +9439,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64 0.22.1", "bytes", @@ -9424,7 +9449,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.4.12", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "http-body-util", @@ -9440,7 +9465,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.35", + "rustls 0.23.36", + "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", "serde_json", @@ -9450,7 +9476,7 @@ dependencies = [ "tokio-native-tls", "tokio-rustls 0.26.4", "tokio-util", - "tower 0.5.2", + "tower 0.5.3", "tower-http", "tower-service", "url", @@ -9458,7 +9484,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] @@ -9524,7 +9550,7 @@ dependencies = [ "anyhow", "async-trait", "futures", - "getrandom 0.2.16", + "getrandom 0.2.17", "http 1.4.0", "hyper 1.8.1", "reqwest 0.13.2", @@ -9574,7 +9600,7 @@ checksum = "d3415e1bc838c36f9a0a2ac60c0fa0851c72297685e66592c44870d82834dfa2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -9596,7 +9622,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if 1.0.4", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -9604,9 +9630,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" dependencies = [ "bitvec", "bytecheck", @@ -9622,9 +9648,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", "quote", @@ -9633,22 +9659,19 @@ dependencies = [ [[package]] name = "rmp" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" dependencies = [ - "byteorder", "num-traits", - "paste", ] [[package]] name = "rmpv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58450723cd9ee93273ce44a20b6ec4efe17f8ed2e3631474387bfdecf18bb2a9" +checksum = "7a4e1d4b9b938a26d2996af33229f0ca0956c652c1375067f0b45291c1df8417" dependencies = [ - "num-traits", "rmp", ] @@ -9690,9 +9713,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ "const-oid", "digest", @@ -9798,9 +9821,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.39.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" +checksum = "61f703d19852dbf87cbc513643fa81428361eb6940f1ac14fd58155d295a3eb0" dependencies = [ "arrayvec", "borsh", @@ -9892,16 +9915,16 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] @@ -9924,7 +9947,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.0", + "openssl-probe 0.2.1", "rustls-pki-types", "schannel", "security-framework 3.5.1", @@ -9941,9 +9964,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.1" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -9988,9 +10011,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", "ring", @@ -10006,9 +10029,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "safe-transmute" @@ -10066,9 +10089,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -10237,7 +10260,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -10251,11 +10274,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ - "indexmap 2.12.1", + "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -10282,7 +10305,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -10316,9 +10339,9 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.1", + "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.1.0", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -10334,7 +10357,7 @@ dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -10420,10 +10443,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -10474,15 +10498,15 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", ] [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "size" @@ -10507,9 +10531,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "sled" @@ -10569,9 +10593,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" dependencies = [ "libc", "windows-sys 0.60.2", @@ -10658,17 +10682,17 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.12.1", + "indexmap 2.13.0", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.35", + "rustls 0.23.36", "serde", "serde_json", "sha2", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tracing", @@ -10686,7 +10710,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -10709,7 +10733,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.111", + "syn 2.0.114", "tokio", "url", ] @@ -10751,7 +10775,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "whoami 1.6.1", ] @@ -10788,7 +10812,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "whoami 1.6.1", ] @@ -10812,7 +10836,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "url", ] @@ -10862,9 +10886,9 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", "cfg-if 1.0.4", @@ -10957,35 +10981,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros 0.26.4", -] - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.111", + "strum_macros", ] [[package]] @@ -10997,7 +10999,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -11020,16 +11022,16 @@ dependencies = [ "lazy-regex", "log", "pin-project", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "surrealdb" -version = "2.4.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4636ac0af4dd619a66d55d8b5c0d1a0965ac1fe417c6a39dbc1d3db16588b969" +checksum = "62b7720b39ce2985efbfa10858b7397ffd95655a9bab6d9dfaa03622bbdc3bc2" dependencies = [ "arrayvec", "async-channel 2.5.0", @@ -11039,7 +11041,7 @@ dependencies = [ "futures", "geo", "getrandom 0.3.4", - "indexmap 2.12.1", + "indexmap 2.13.0", "path-clean", "pharos", "reblessive", @@ -11047,7 +11049,7 @@ dependencies = [ "revision", "ring", "rust_decimal", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "semver", "serde", @@ -11069,9 +11071,9 @@ dependencies = [ [[package]] name = "surrealdb-core" -version = "2.4.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b99720b7f5119785b065d235705ca95f568a9a89745d1221871e845eedf424d" +checksum = "c48e42c81713be2f9b3dae64328999eafe8b8060dd584059445a908748b39787" dependencies = [ "addr", "ahash 0.8.12", @@ -11099,6 +11101,7 @@ dependencies = [ "geo", "geo-types", "getrandom 0.3.4", + "hashbrown 0.14.5", "hex", "http 1.4.0", "ipnet", @@ -11157,15 +11160,15 @@ dependencies = [ [[package]] name = "sval" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "502b8906c4736190684646827fbab1e954357dfe541013bbd7994d033d53a1ca" +checksum = "c1aaf178a50bbdd86043fce9bf0a5867007d9b382db89d1c96ccae4601ff1ff9" [[package]] name = "sval_buffer" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b854348b15b6c441bdd27ce9053569b016a0723eab2d015b1fd8e6abe4f708" +checksum = "f89273e48f03807ebf51c4d81c52f28d35ffa18a593edf97e041b52de143df89" dependencies = [ "sval", "sval_ref", @@ -11173,18 +11176,18 @@ dependencies = [ [[package]] name = "sval_dynamic" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0bd9e8b74410ddad37c6962587c5f9801a2caadba9e11f3f916ee3f31ae4a1f" +checksum = "0430f4e18e7eba21a49d10d25a8dec3ce0e044af40b162347e99a8e3c3ced864" dependencies = [ "sval", ] [[package]] name = "sval_fmt" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fe17b8deb33a9441280b4266c2d257e166bafbaea6e66b4b34ca139c91766d9" +checksum = "835f51b9d7331b9d7fc48fc716c02306fa88c4a076b1573531910c91a525882d" dependencies = [ "itoa", "ryu", @@ -11193,9 +11196,9 @@ dependencies = [ [[package]] name = "sval_json" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854addb048a5bafb1f496c98e0ab5b9b581c3843f03ca07c034ae110d3b7c623" +checksum = "13cbfe3ef406ee2366e7e8ab3678426362085fa9eaedf28cb878a967159dced3" dependencies = [ "itoa", "ryu", @@ -11204,9 +11207,9 @@ dependencies = [ [[package]] name = "sval_nested" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96cf068f482108ff44ae8013477cb047a1665d5f1a635ad7cf79582c1845dce9" +checksum = "8b20358af4af787c34321a86618c3cae12eabdd0e9df22cd9dd2c6834214c518" dependencies = [ "sval", "sval_buffer", @@ -11215,18 +11218,18 @@ dependencies = [ [[package]] name = "sval_ref" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed02126365ffe5ab8faa0abd9be54fbe68d03d607cd623725b0a71541f8aaa6f" +checksum = "fb5e500f8eb2efa84f75e7090f7fc43f621b9f8b6cde571c635b3855f97b332a" dependencies = [ "sval", ] [[package]] name = "sval_serde" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a263383c6aa2076c4ef6011d3bae1b356edf6ea2613e3d8e8ebaa7b57dd707d5" +checksum = "ca2032ae39b11dcc6c18d5fbc50a661ea191cac96484c59ccf49b002261ca2c1" dependencies = [ "serde_core", "sval", @@ -11246,9 +11249,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -11287,7 +11290,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -11306,9 +11309,9 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ "bitflags 2.10.0", "core-foundation 0.9.4", @@ -11359,7 +11362,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix 1.1.3", "windows-sys 0.61.2", @@ -11414,11 +11417,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -11429,18 +11432,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -11500,30 +11503,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" dependencies = [ "num-conv", "time-core", @@ -11585,7 +11588,7 @@ dependencies = [ "parking_lot 0.12.5", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2 0.6.2", "tokio-macros", "windows-sys 0.61.2", ] @@ -11618,7 +11621,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -11658,15 +11661,15 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.35", + "rustls 0.23.36", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -11681,7 +11684,7 @@ checksum = "c6989540ced10490aaf14e6bad2e3d33728a2813310a0c71d1574304c49631cd" dependencies = [ "futures-util", "log", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", @@ -11691,9 +11694,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -11705,11 +11708,11 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.10+spec-1.1.0" +version = "0.9.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" dependencies = [ - "indexmap 2.12.1", + "indexmap 2.13.0", "serde_core", "serde_spanned", "toml_datetime", @@ -11729,11 +11732,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.9" +version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ - "indexmap 2.12.1", + "indexmap 2.13.0", "toml_datetime", "toml_parser", "winnow", @@ -11741,9 +11744,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.7+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "247eaa3197818b831697600aadf81514e577e0cba5eab10f7e064e78ae154df1" dependencies = [ "winnow", ] @@ -11786,15 +11789,15 @@ dependencies = [ [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "a286e33f82f8a1ee2df63f4fa35c0becf4a85a0cb03091a15fd7bf0b402dc94a" dependencies = [ "async-trait", - "axum 0.8.7", + "axum 0.8.8", "base64 0.22.1", "bytes", - "h2 0.4.12", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "http-body-util", @@ -11803,12 +11806,12 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "socket2 0.6.1", + "socket2 0.6.2", "sync_wrapper 1.0.2", "tokio", "tokio-rustls 0.26.4", "tokio-stream", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -11816,39 +11819,39 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +checksum = "27aac809edf60b741e2d7db6367214d078856b8a5bff0087e94ff330fb97b6fc" dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "d6c55a2d6a14174563de34409c9f92ff981d006f56da9c6ecd40d9d4a31500b0" dependencies = [ "bytes", - "prost 0.14.1", - "tonic 0.14.2", + "prost 0.14.3", + "tonic 0.14.3", ] [[package]] name = "tonic-prost-build" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +checksum = "a4556786613791cfef4ed134aa670b61a85cfcacf71543ef33e8d801abae988f" dependencies = [ "prettyplease", "proc-macro2", "prost-build", "prost-types", "quote", - "syn 2.0.111", + "syn 2.0.114", "tempfile", "tonic-build", ] @@ -11875,13 +11878,13 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", - "indexmap 2.12.1", + "indexmap 2.13.0", "pin-project-lite", "slab", "sync_wrapper 1.0.2", @@ -11905,7 +11908,7 @@ dependencies = [ "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", ] @@ -11942,7 +11945,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -11968,16 +11971,13 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6e5658463dd88089aba75c7791e1d3120633b1bfde22478b28f625a9bb1b8e" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" dependencies = [ "js-sys", "opentelemetry", - "opentelemetry_sdk", - "rustversion", "smallvec", - "thiserror 2.0.17", "tracing", "tracing-core", "tracing-log", @@ -12082,7 +12082,7 @@ dependencies = [ "httparse", "log", "rand 0.8.5", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "sha1", "thiserror 1.0.69", @@ -12116,7 +12116,7 @@ checksum = "0e48cea23f68d1f78eb7bc092881b6bb88d3d6b5b7e6234f6f9c911da1ffb221" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -12156,9 +12156,9 @@ dependencies = [ [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-bidi" @@ -12168,9 +12168,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" [[package]] name = "unicode-normalization" @@ -12235,9 +12235,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -12272,15 +12272,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utils" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "async-trait", "bincode", "bytes", -<<<<<<< HEAD - "chrono", -======= ->>>>>>> 9a258842f (chore(deps): align ctor version in xet) "ctor", "derivative", "duration-str", @@ -12292,7 +12288,7 @@ dependencies = [ "rand 0.9.2", "serde", "shellexpand", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-util", "tracing", @@ -12301,9 +12297,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" dependencies = [ "getrandom 0.3.4", "js-sys", @@ -12492,9 +12488,9 @@ dependencies = [ [[package]] name = "wasix" -version = "0.12.21" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1fbb4ef9bbca0c1170e0b00dd28abc9e3b68669821600cad1caaed606583c6d" +checksum = "1757e0d1f8456693c7e5c6c629bdb54884e032aa0bb53c155f6a39f94440d332" dependencies = [ "wasi 0.11.1+wasi-snapshot-preview1", ] @@ -12545,7 +12541,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "wasm-bindgen-shared", ] @@ -12588,7 +12584,35 @@ checksum = "f579cdd0123ac74b94e1a4a72bd963cf30ebac343f2df347da0b8df24cdebed2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", +] + +[[package]] +name = "wasm-bindgen-test-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8145dd1593bf0fb137dbfa85b8be79ec560a447298955877804640e40c2d6ea" + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", ] [[package]] @@ -12653,7 +12677,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags 2.10.0", "hashbrown 0.15.5", - "indexmap 2.12.1", + "indexmap 2.13.0", "semver", ] @@ -12737,14 +12761,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -12904,7 +12928,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -12915,7 +12939,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -12926,7 +12950,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -12937,7 +12961,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -13353,9 +13377,9 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.12.1", + "indexmap 2.13.0", "prettyplease", - "syn 2.0.111", + "syn 2.0.114", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -13371,7 +13395,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -13384,7 +13408,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags 2.10.0", - "indexmap 2.12.1", + "indexmap 2.13.0", "log", "serde", "serde_derive", @@ -13403,7 +13427,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.12.1", + "indexmap 2.13.0", "log", "semver", "serde", @@ -13432,7 +13456,7 @@ dependencies = [ "pharos", "rustc_version", "send_wrapper", - "thiserror 2.0.17", + "thiserror 2.0.18", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -13460,7 +13484,7 @@ dependencies = [ [[package]] name = "xet_config" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "const-str", "konst", @@ -13470,14 +13494,14 @@ dependencies = [ [[package]] name = "xet_runtime" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core?branch=download_bytes#744b564500a98b76b6790e17a854497376c87235" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" dependencies = [ "dirs", "error_printer", "libc", "oneshot", - "reqwest 0.13.2", - "thiserror 2.0.17", + "reqwest", + "thiserror 2.0.18", "tokio", "tracing", "utils", @@ -13527,28 +13551,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -13568,7 +13592,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] @@ -13608,7 +13632,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -13622,9 +13646,9 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.10" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e0d8dffbae3d840f64bda38e28391faef673a7b5a6017840f2a106c8145868" +checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" [[package]] name = "zstd" diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index a9b166f450bb..2dad87be93fe 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -51,16 +51,15 @@ opendal-core = { path = "../../core", version = "0.55.0", default-features = fal percent-encoding = "2" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -sha2 = "0.10" # XET storage protocol support (optional) async-trait = { version = "0.1", optional = true } -cas_types = { git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } +cas_types = { git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } futures = { workspace = true, optional = true } reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", ], optional = true } -xet-data = { package = "data", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } -xet-utils = { package = "utils", git = "https://github.com/kszucs/xet-core", branch = "download_bytes", optional = true } +xet-data = { package = "data", git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } +xet-utils = { package = "utils", git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } [dev-dependencies] futures = { workspace = true } diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 84464fa88c45..fdb0fd996e4f 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -234,7 +234,7 @@ pub struct HfBackend { impl Access for HfBackend { type Reader = HfReader; - type Writer = oio::OneShotWriter; + type Writer = HfWriter; type Lister = oio::PageLister; type Deleter = oio::BatchDeleter; @@ -262,9 +262,9 @@ impl Access for HfBackend { Ok((RpList::default(), oio::PageLister::new(lister))) } - async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { - let writer = HfWriter::new(&self.core, path, args); - Ok((RpWrite::default(), oio::OneShotWriter::new(writer))) + async fn write(&self, path: &str, _args: OpWrite) -> Result<(RpWrite, Self::Writer)> { + let writer = HfWriter::try_new(self.core.clone(), path.to_string()).await?; + Ok((RpWrite::default(), writer)) } async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 2b243aae186b..f03d71afdc92 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -27,6 +27,10 @@ use http::Response; use http::header; use serde::Deserialize; +#[cfg(feature = "xet")] +use xet_data::XetFileInfo; +#[cfg(feature = "xet")] +use xet_data::streaming::XetClient; #[cfg(feature = "xet")] use xet_utils::auth::TokenRefresher; @@ -35,6 +39,32 @@ use super::uri::HfRepo; use opendal_core::raw::*; use opendal_core::*; +/// API payload structures for preupload operations +#[derive(serde::Serialize)] +struct PreuploadFile { + path: String, + size: i64, + sample: String, +} + +#[derive(serde::Serialize)] +struct PreuploadRequest { + files: Vec, +} + +#[derive(serde::Deserialize, Debug)] +struct PreuploadFileResponse { + #[allow(dead_code)] + path: String, + #[serde(rename = "uploadMode")] + upload_mode: String, +} + +#[derive(serde::Deserialize, Debug)] +struct PreuploadResponse { + files: Vec, +} + /// API payload structures for commit operations #[derive(Debug, serde::Serialize)] pub(super) struct CommitFile { @@ -142,6 +172,35 @@ pub(super) struct XetToken { pub exp: u64, } +#[cfg(feature = "xet")] +pub(super) struct XetTokenRefresher { + core: HfCore, + token_type: &'static str, +} + +#[cfg(feature = "xet")] +impl XetTokenRefresher { + pub(super) fn new(core: &HfCore, token_type: &'static str) -> Self { + Self { + core: core.clone(), + token_type, + } + } +} + +#[cfg(feature = "xet")] +#[async_trait::async_trait] +impl TokenRefresher for XetTokenRefresher { + async fn refresh(&self) -> std::result::Result<(String, u64), xet_utils::errors::AuthError> { + let token = self + .core + .xet_token(self.token_type) + .await + .map_err(xet_utils::errors::AuthError::token_refresh_failure)?; + Ok((token.access_token, token.exp)) + } +} + // Core HuggingFace client that manages API interactions, authentication // and shared logic for reader/writer/lister. @@ -299,7 +358,7 @@ impl HfCore { } #[cfg(feature = "xet")] - pub(super) async fn get_xet_token(&self, token_type: &str) -> Result { + pub(super) async fn xet_token(&self, token_type: &str) -> Result { let url = self.repo.xet_token_url(&self.endpoint, token_type); let req = self .request(http::Method::GET, &url, Operation::Read) @@ -309,11 +368,106 @@ impl HfCore { Ok(token) } + #[cfg(feature = "xet")] + pub(super) async fn xet_client(&self, token_type: &'static str) -> Result { + let token = self.xet_token(token_type).await?; + let refresher = Arc::new(XetTokenRefresher::new(self, token_type)); + XetClient::new( + Some(token.cas_url), + Some((token.access_token, token.exp)), + Some(refresher), + "opendal/1.0".to_string(), + ) + .map_err(map_xet_error) + } + + /// Issue a HEAD request and extract XET file info (hash and size). + /// + /// Returns `None` if the `X-Xet-Hash` header is absent or empty. + /// + /// Uses a dedicated no-redirect HTTP client so we can inspect + /// headers (e.g. `X-Xet-Hash`) on the 302 response. + #[cfg(feature = "xet")] + pub(super) async fn maybe_xet_file(&self, path: &str) -> Result> { + let uri = self.uri(path); + let url = uri.resolve_url(&self.endpoint); + + let req = self + .request(http::Method::HEAD, &url, Operation::Stat) + .body(Buffer::new()) + .map_err(new_request_build_error)?; + + let mut attempt = 0; + let resp = loop { + let resp = self.no_redirect_client.send(req.clone()).await?; + + attempt += 1; + let retryable = resp.status().is_server_error(); + if attempt >= self.max_retries || !retryable { + break resp; + } + }; + + let hash = resp + .headers() + .get("X-Xet-Hash") + .and_then(|v| v.to_str().ok()) + .filter(|s| !s.is_empty()); + + let Some(hash) = hash else { + return Ok(None); + }; + + let size = resp + .headers() + .get("X-Linked-Size") + .or_else(|| resp.headers().get(header::CONTENT_LENGTH)) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + Ok(Some(XetFileInfo::new(hash.to_string(), size))) + } + /// Commit file changes (uploads and/or deletions) to the repository. /// /// Retries on commit conflicts (HTTP 412) and transient server errors /// (HTTP 5xx), matching the behavior of the official HuggingFace Hub /// client. + /// Determine upload mode by calling the preupload API. + /// + /// Returns the upload mode string from the API (e.g., "regular" or "lfs"). + pub(super) async fn determine_upload_mode(&self, path: &str) -> Result { + let uri = self.uri(path); + let preupload_url = uri.preupload_url(&self.endpoint); + + let preupload_payload = PreuploadRequest { + files: vec![PreuploadFile { + path: path.to_string(), + size: -1, + sample: String::new(), + }], + }; + let json_body = serde_json::to_vec(&preupload_payload).map_err(new_json_serialize_error)?; + + let req = self + .request(http::Method::POST, &preupload_url, Operation::Write) + .header(header::CONTENT_TYPE, "application/json") + .body(Buffer::from(json_body)) + .map_err(new_request_build_error)?; + + let (_, preupload_resp): (_, PreuploadResponse) = self.send_parse(req).await?; + + let mode = preupload_resp + .files + .first() + .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files in preupload response"))? + .upload_mode + .clone(); + + Ok(mode) + } + pub(super) async fn commit_files( &self, regular_files: Vec, @@ -359,35 +513,6 @@ impl HfCore { } } -#[cfg(feature = "xet")] -pub(super) struct XetTokenRefresher { - core: HfCore, - token_type: &'static str, -} - -#[cfg(feature = "xet")] -impl XetTokenRefresher { - pub(super) fn new(core: &HfCore, token_type: &'static str) -> Self { - Self { - core: core.clone(), - token_type, - } - } -} - -#[cfg(feature = "xet")] -#[async_trait::async_trait] -impl TokenRefresher for XetTokenRefresher { - async fn refresh(&self) -> std::result::Result<(String, u64), xet_utils::errors::AuthError> { - let token = self - .core - .get_xet_token(self.token_type) - .await - .map_err(xet_utils::errors::AuthError::token_refresh_failure)?; - Ok((token.access_token, token.exp)) - } -} - #[cfg(test)] pub(crate) mod test_utils { use http::{Request, Response, StatusCode}; diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index 1e0c44a6115b..aeb034cc0b96 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -15,13 +15,6 @@ // specific language governing permissions and limitations // under the License. -#[cfg(feature = "xet")] -use std::pin::Pin; -#[cfg(feature = "xet")] -use std::sync::Arc; - -#[cfg(feature = "xet")] -use bytes::Bytes; use http::Response; use http::StatusCode; use http::header; @@ -33,24 +26,18 @@ use futures::StreamExt; use super::core::HfCore; #[cfg(feature = "xet")] -use super::core::{XetTokenRefresher, map_xet_error}; +use super::core::map_xet_error; use opendal_core::raw::*; use opendal_core::*; - #[cfg(feature = "xet")] -struct XetFile { - hash: String, - size: u64, -} - +use xet_data::XetFileInfo; #[cfg(feature = "xet")] -type XetByteStream = - Pin> + Send + Sync>>; +use xet_data::streaming::XetReader; pub enum HfReader { Http(HttpBody), #[cfg(feature = "xet")] - Xet(XetByteStream), + Xet(XetReader), } impl HfReader { @@ -62,66 +49,15 @@ impl HfReader { pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { #[cfg(feature = "xet")] if core.xet_enabled { - if let Some(xet_file) = Self::maybe_xet_file(core, path).await? { - return Self::download_xet(core, &xet_file, range).await; + if let Some(xet_file) = core.maybe_xet_file(path).await? { + return Self::try_new_xet(core, &xet_file, range).await; } } - Self::download_http(core, path, range).await + Self::try_new_http(core, path, range).await } - /// Issue a HEAD request and extract XET file info (hash and size). - /// - /// Returns `None` if the `X-Xet-Hash` header is absent or empty. - /// - /// Uses a dedicated no-redirect HTTP client so we can inspect - /// headers (e.g. `X-Xet-Hash`) on the 302 response. - #[cfg(feature = "xet")] - async fn maybe_xet_file(core: &HfCore, path: &str) -> Result> { - let uri = core.uri(path); - let url = uri.resolve_url(&core.endpoint); - - let req = core - .request(http::Method::HEAD, &url, Operation::Stat) - .body(Buffer::new()) - .map_err(new_request_build_error)?; - - let mut attempt = 0; - let resp = loop { - let resp = core.no_redirect_client.send(req.clone()).await?; - - attempt += 1; - let retryable = resp.status().is_server_error(); - if attempt >= core.max_retries || !retryable { - break resp; - } - }; - - let hash = resp - .headers() - .get("X-Xet-Hash") - .and_then(|v| v.to_str().ok()) - .filter(|s| !s.is_empty()); - - let Some(hash) = hash else { - return Ok(None); - }; - - let size = resp - .headers() - .get("X-Linked-Size") - .or_else(|| resp.headers().get(http::header::CONTENT_LENGTH)) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .unwrap_or(0); - - Ok(Some(XetFile { - hash: hash.to_string(), - size, - })) - } - - pub async fn download_http(core: &HfCore, path: &str, range: BytesRange) -> Result { + pub async fn try_new_http(core: &HfCore, path: &str, range: BytesRange) -> Result { let client = core.info.http_client(); let uri = core.uri(path); let url = uri.resolve_url(&core.endpoint); @@ -148,37 +84,25 @@ impl HfReader { } #[cfg(feature = "xet")] - async fn download_xet(core: &HfCore, xet_file: &XetFile, range: BytesRange) -> Result { - let token = core.get_xet_token("read").await?; - - let file_info = xet_data::XetFileInfo::new(xet_file.hash.clone(), xet_file.size); + async fn try_new_xet( + core: &HfCore, + file_info: &XetFileInfo, + range: BytesRange, + ) -> Result { + let client = core.xet_client("read").await?; let file_range = if !range.is_full() { let offset = range.offset(); - let size = range.size().unwrap_or(xet_file.size - offset); - let end = offset + size; - Some(FileRange::new(offset, end)) + let size = range.size().unwrap_or(file_info.file_size() - offset); + Some(FileRange::new(offset, offset + size)) } else { None }; - let refresher = Arc::new(XetTokenRefresher::new(core, "read")); - - let mut streams = xet_data::data_client::download_bytes_async( - vec![file_info], - Some(vec![file_range]), - Some(token.cas_url), - Some((token.access_token, token.exp)), - Some(refresher), - None, - "opendal/1.0".to_string(), - 256, - ) - .await - .map_err(map_xet_error)?; - - let stream = streams.remove(0); - Ok(Self::Xet(Box::pin(stream))) + let reader = client + .read(file_info.clone(), file_range, None, 256) + .map_err(map_xet_error)?; + Ok(Self::Xet(reader)) } } diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 45e4a16e523b..2b93711e73ed 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -15,500 +15,178 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashMap; use std::sync::Arc; +#[cfg(feature = "xet")] +use std::sync::Mutex; use base64::Engine; -use http::Request; -use http::header; -use sha2::{Digest, Sha256}; +use super::core::{CommitFile, HfCore}; #[cfg(feature = "xet")] -use super::core::XetTokenRefresher; -use super::core::{CommitFile, CommitResponse, HfCore, LfsFile}; +use super::core::{LfsFile, map_xet_error}; use opendal_core::raw::*; use opendal_core::*; - -#[derive(serde::Serialize)] -struct PreuploadFile { - path: String, - size: u64, - sample: String, - #[serde(rename = "sha256")] - sha256: String, -} - -#[derive(serde::Serialize)] -struct PreuploadRequest { - files: Vec, -} - -#[derive(serde::Deserialize, Debug)] -struct PreuploadFileResponse { - #[allow(dead_code)] - path: String, - #[serde(rename = "uploadMode")] - upload_mode: String, -} - -#[derive(serde::Deserialize, Debug)] -struct PreuploadResponse { - files: Vec, -} - -#[derive(serde::Serialize)] -struct LfsBatchRequest { - operation: String, - transfers: Vec, - objects: Vec, - hash_algo: String, -} - -#[derive(serde::Serialize)] -struct LfsBatchRequestObject { - oid: String, - size: u64, -} - -#[derive(serde::Deserialize)] -struct LfsBatchResponse { - transfer: Option, - #[serde(default)] - objects: Vec, -} - -#[derive(serde::Deserialize)] -struct LfsBatchResponseObject { - actions: Option, - error: Option, -} - -#[derive(serde::Deserialize)] -struct LfsBatchActions { - upload: LfsBatchAction, - verify: Option, -} - -#[derive(serde::Deserialize)] -struct LfsBatchAction { - href: String, - #[serde(default)] - header: HashMap, -} - -#[derive(serde::Deserialize)] -struct LfsBatchError { - message: String, -} - -#[derive(serde::Serialize)] -struct LfsVerifyRequest { - oid: String, - size: u64, -} - -/// Resolved upload strategy after consulting the preupload and LFS batch APIs. -enum UploadMode { - /// Small file: base64 encode inline in commit payload. - Regular, - /// File already exists in LFS storage, just commit pointer. - LfsExists, - /// Single-part LFS upload: PUT entire body to pre-signed URL. - LfsSinglepart { - upload: LfsBatchAction, - verify: Option, - }, - /// Multi-part LFS upload: PUT chunks to numbered pre-signed URLs. - LfsMultipart { - upload: LfsBatchAction, - verify: Option, - chunk_size: usize, +#[cfg(feature = "xet")] +use xet_data::streaming::XetWriter; + +/// Writer that handles both regular (small) and XET (large) file uploads. +pub enum HfWriter { + /// Regular writer for small files using base64 inline commit. + Regular { + core: Arc, + path: String, + size: u64, + buf: Vec, }, - /// XET transfer protocol. + /// XET writer for large files using streaming protocol. #[cfg(feature = "xet")] - Xet, -} - -pub struct HfWriter { - core: Arc, - #[allow(dead_code)] - op: OpWrite, - path: String, + Xet { + core: Arc, + path: String, + size: u64, + writer: Mutex, + }, } impl HfWriter { - /// Create a writer. - pub fn new(core: &Arc, path: &str, op: OpWrite) -> Self { - Self { - core: core.clone(), - op, - path: path.to_string(), - } - } - - /// Determine the upload strategy for a file. - /// - /// Follows the HuggingFace Hub upload protocol: - /// 1. Compute SHA256 hash and a content sample for the preupload API. - /// 2. Call the preupload API to determine if the file should be uploaded - /// as "regular" (base64 inline in commit) or "lfs" (Git LFS). - /// 3. For LFS files, negotiate the transfer adapter with the LFS batch - /// API which returns pre-signed upload URLs or Xet. - /// - /// Returns the resolved upload mode and the SHA256 OID. - async fn determine_upload_mode(&self, body: &Buffer) -> Result<(UploadMode, String)> { - let bytes = body.to_bytes(); - let size = bytes.len() as u64; - - // Step 1: compute SHA256 and content sample. - let mut hasher = Sha256::new(); - hasher.update(&bytes); - let oid = format!("{:x}", hasher.finalize()); - - let sample_size = std::cmp::min(512, bytes.len()); - let sample = base64::engine::general_purpose::STANDARD.encode(&bytes[..sample_size]); - - // Step 2: call preupload API to get "regular" or "lfs". - let uri = self.core.uri(&self.path); - let preupload_url = uri.preupload_url(&self.core.endpoint); - - let preupload_payload = PreuploadRequest { - files: vec![PreuploadFile { - path: self.path.clone(), - size, - sample, - sha256: oid.clone(), - }], - }; - let json_body = serde_json::to_vec(&preupload_payload).map_err(new_json_serialize_error)?; - - let req = self - .core - .request(http::Method::POST, &preupload_url, Operation::Write) - .header(header::CONTENT_TYPE, "application/json") - .body(Buffer::from(json_body)) - .map_err(new_request_build_error)?; - - let (_, preupload_resp): (_, PreuploadResponse) = self.core.send_parse(req).await?; - - let mode = preupload_resp - .files - .first() - .ok_or_else(|| Error::new(ErrorKind::Unexpected, "no files in preupload response"))? - .upload_mode - .clone(); - - if mode != "lfs" { - return Ok((UploadMode::Regular, oid)); - } - - // Step 3: negotiate transfer adapter with the LFS batch API. - let url = self.core.repo.lfs_batch_url(&self.core.endpoint); - - #[allow(unused_mut)] - let mut transfers = vec!["basic".to_string(), "multipart".to_string()]; - #[cfg(feature = "xet")] - if self.core.xet_enabled { - transfers.push("xet".to_string()); - } - - let payload = LfsBatchRequest { - operation: "upload".to_string(), - transfers, - objects: vec![LfsBatchRequestObject { - oid: oid.clone(), - size, - }], - hash_algo: "sha256".to_string(), - }; - let json_body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; - - let req = self - .core - .request(http::Method::POST, &url, Operation::Write) - .header(header::ACCEPT, "application/vnd.git-lfs+json") - .header(header::CONTENT_TYPE, "application/vnd.git-lfs+json") - .body(Buffer::from(json_body)) - .map_err(new_request_build_error)?; - - let (_, batch_resp): (_, LfsBatchResponse) = self.core.send_parse(req).await?; - - #[cfg_attr(not(feature = "xet"), allow(unused_variables))] - let chosen_transfer = batch_resp.transfer; - - let obj = batch_resp - .objects - .into_iter() - .next() - .ok_or_else(|| Error::new(ErrorKind::Unexpected, "empty LFS batch response"))?; - - if let Some(err) = obj.error { - return Err(Error::new(ErrorKind::Unexpected, err.message)); - } - - // No actions means the file already exists on the server. - let Some(actions) = obj.actions else { - return Ok((UploadMode::LfsExists, oid)); - }; + /// Create a new writer by determining the upload mode from the API. + pub async fn try_new(core: Arc, path: String) -> Result { + let mode_str = core.determine_upload_mode(&path).await?; - // If the server chose XET transfer, delegate to the XET protocol. - #[cfg(feature = "xet")] - if self.core.xet_enabled && chosen_transfer.as_deref() == Some("xet") { - return Ok((UploadMode::Xet, oid)); - } - - // Decide singlepart vs multipart based on whether the server - // provided a chunk_size in the upload action headers (matches - // the huggingface_hub Python client detection logic). - let chunk_size = actions.upload.header.get("chunk_size").and_then(|v| { - v.as_u64() - .map(|n| n as usize) - .or_else(|| v.as_str().and_then(|s| s.parse().ok())) - }); - - let mode = if let Some(chunk_size) = chunk_size { - UploadMode::LfsMultipart { - upload: actions.upload, - verify: actions.verify, - chunk_size, - } - } else { - UploadMode::LfsSinglepart { - upload: actions.upload, - verify: actions.verify, + if mode_str == "lfs" { + #[cfg(feature = "xet")] + if core.xet_enabled { + let client = core.xet_client("write").await?; + let writer = client + .write(None, None, None) + .await + .map_err(map_xet_error)?; + return Ok(HfWriter::Xet { + core, + path, + size: 0, + writer: Mutex::new(writer), + }); } - }; + return Err(Error::new( + ErrorKind::Unsupported, + "file requires LFS; enable the xet feature for large file support", + )); + } - Ok((mode, oid)) + Ok(HfWriter::Regular { + core, + path, + size: 0, + buf: Vec::new(), + }) } - /// Prepare file content for regular HTTP commit (base64 encoded inline). - fn prepare_commit_file(path: &str, body: &Buffer) -> CommitFile { - let content = base64::engine::general_purpose::STANDARD.encode(body.to_bytes()); + fn prepare_commit_file(path: &str, body: &[u8]) -> CommitFile { + let content = base64::engine::general_purpose::STANDARD.encode(body); CommitFile { path: path.to_string(), content, encoding: "base64".to_string(), } } +} - /// Singlepart LFS upload: PUT entire body to the upload URL. - async fn lfs_upload_singlepart(&self, upload: &LfsBatchAction, body: Buffer) -> Result<()> { - let req = Request::builder() - .method(http::Method::PUT) - .uri(&upload.href) - .extension(Operation::Write) - .body(body) - .map_err(new_request_build_error)?; - - self.core.send(req).await?; - Ok(()) - } - - /// Multi-part LFS upload: PUT chunks to numbered part URLs, then POST completion. - async fn lfs_upload_multipart( - &self, - upload: &LfsBatchAction, - oid: &str, - body: Buffer, - chunk_size: usize, - ) -> Result<()> { - let bytes = body.to_bytes(); - let total_parts = bytes.len().div_ceil(chunk_size); - - // Collect presigned part URLs from the upload header. The server - // stores them as digit-only keys (e.g. "1", "2", "3"). We collect - // all such keys, sort by numeric value, and use them in order — - // matching the huggingface_hub Python client's `_get_sorted_parts_urls`. - let mut part_urls: Vec<(usize, String)> = upload - .header - .iter() - .filter_map(|(k, v)| { - let num: usize = k.parse().ok()?; - let url = v.as_str()?; - Some((num, url.to_string())) - }) - .collect(); - part_urls.sort_by_key(|(num, _)| *num); - - let part_urls: Vec = part_urls.into_iter().map(|(_, url)| url).collect(); - if part_urls.len() != total_parts { - return Err(Error::new( - ErrorKind::Unexpected, - format!( - "expected {} part URLs but server returned {} \ - (file size: {}, chunk size: {}, header keys: {:?})", - total_parts, - part_urls.len(), - bytes.len(), - chunk_size, - upload.header.keys().collect::>(), - ), - )); - } - - let mut etags = Vec::with_capacity(total_parts); - - for (part_num, part_url) in part_urls.iter().enumerate() { - let start = part_num * chunk_size; - let end = std::cmp::min(start + chunk_size, bytes.len()); - let chunk = bytes.slice(start..end); - - let req = Request::builder() - .method(http::Method::PUT) - .uri(part_url.as_str()) - .extension(Operation::Write) - .body(Buffer::from(chunk)) - .map_err(new_request_build_error)?; - - let parts = self.core.send(req).await?.into_parts().0; - let etag = parts - .headers - .get(header::ETAG) - .and_then(|v| v.to_str().ok()) - .unwrap_or("") - .to_string(); - etags.push(etag); +impl oio::Write for HfWriter { + async fn write(&mut self, bs: Buffer) -> Result<()> { + match self { + HfWriter::Regular { size, buf, .. } => { + *size += bs.len() as u64; + buf.push(bs); + Ok(()) + } + #[cfg(feature = "xet")] + HfWriter::Xet { size, writer, .. } => { + *size += bs.len() as u64; + writer + .get_mut() + .unwrap() + .write(bs.to_bytes()) + .await + .map_err(map_xet_error) + } } - - let parts: Vec<_> = etags - .into_iter() - .enumerate() - .map(|(i, etag)| { - serde_json::json!({ - "partNumber": i + 1, - "etag": etag, - }) - }) - .collect(); - - let completion = serde_json::json!({ - "oid": oid, - "parts": parts, - }); - let completion_body = serde_json::to_vec(&completion).map_err(new_json_serialize_error)?; - - let req = self - .core - .request(http::Method::POST, &upload.href, Operation::Write) - .header(header::CONTENT_TYPE, "application/json") - .body(Buffer::from(completion_body)) - .map_err(new_request_build_error)?; - - self.core.send(req).await?; - Ok(()) - } - - /// Verify an LFS upload if the server requested verification. - async fn lfs_verify( - &self, - verify: &Option, - oid: &str, - size: u64, - ) -> Result<()> { - let Some(verify) = verify else { - return Ok(()); - }; - - let payload = LfsVerifyRequest { - oid: oid.to_string(), - size, - }; - let body = serde_json::to_vec(&payload).map_err(new_json_serialize_error)?; - - let req = self - .core - .request(http::Method::POST, &verify.href, Operation::Write) - .header(header::CONTENT_TYPE, "application/vnd.git-lfs+json") - .body(Buffer::from(body)) - .map_err(new_request_build_error)?; - - self.core.send(req).await?; - Ok(()) - } - - /// Upload file content to XET storage. - #[cfg(feature = "xet")] - async fn xet_upload(&self, body: Buffer) -> Result<()> { - use super::core::map_xet_error; - - let bytes = body.to_bytes(); - - let token = self.core.get_xet_token("write").await?; - let refresher = Arc::new(XetTokenRefresher::new(&self.core, "write")); - - let file_contents = vec![bytes.to_vec()]; - - let results = xet_data::data_client::upload_bytes_async( - file_contents, - Some(token.cas_url), - Some((token.access_token, token.exp)), - Some(refresher), - None, - "opendal/1.0".to_string(), - ) - .await - .map_err(map_xet_error)?; - - results.first().ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - "No file info returned from XET upload", - ) - })?; - - Ok(()) } - async fn upload_and_commit(&self, body: Buffer) -> Result { - let (mode, oid) = self.determine_upload_mode(&body).await?; - let size = body.len() as u64; - - match mode { - UploadMode::Regular => { - let file = Self::prepare_commit_file(&self.path, &body); - return self.core.commit_files(vec![file], vec![], vec![]).await; - } - UploadMode::LfsExists => {} - UploadMode::LfsSinglepart { upload, verify } => { - self.lfs_upload_singlepart(&upload, body).await?; - self.lfs_verify(&verify, &oid, size).await?; - } - UploadMode::LfsMultipart { - upload, - verify, - chunk_size, + async fn close(&mut self) -> Result { + match self { + HfWriter::Regular { + core, + path, + size, + buf, + .. } => { - self.lfs_upload_multipart(&upload, &oid, body, chunk_size) - .await?; - self.lfs_verify(&verify, &oid, size).await?; + let content_length = *size; + + // Flatten buffer + let mut data = Vec::new(); + for buf in std::mem::take(buf) { + data.extend_from_slice(&buf.to_bytes()); + } + + let file = Self::prepare_commit_file(path, &data); + let resp = core.commit_files(vec![file], vec![], vec![]).await?; + + let mut meta = Metadata::default().with_content_length(content_length); + if let Some(commit_oid) = resp.commit_oid { + meta = meta.with_version(commit_oid); + } + Ok(meta) } #[cfg(feature = "xet")] - UploadMode::Xet => { - self.xet_upload(body).await?; + HfWriter::Xet { + core, + path, + size, + writer, + } => { + let content_length = *size; + let file_info = writer + .get_mut() + .unwrap() + .close() + .await + .map_err(map_xet_error)?; + let sha256 = file_info.sha256().ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + "xet upload did not return sha256 hash", + ) + })?; + let lfs_file = LfsFile { + path: path.clone(), + oid: sha256.to_string(), + algo: "sha256".to_string(), + size: content_length, + }; + let resp = core.commit_files(vec![], vec![lfs_file], vec![]).await?; + + let mut meta = Metadata::default().with_content_length(content_length); + if let Some(commit_oid) = resp.commit_oid { + meta = meta.with_version(commit_oid); + } + Ok(meta) } } - - let lfs_file = LfsFile { - path: self.path.clone(), - oid, - algo: "sha256".to_string(), - size, - }; - self.core.commit_files(vec![], vec![lfs_file], vec![]).await } -} - -impl oio::OneShotWrite for HfWriter { - async fn write_once(&self, bs: Buffer) -> Result { - let size = bs.len() as u64; - let resp = self.upload_and_commit(bs).await?; - let mut meta = Metadata::default().with_content_length(size); - if let Some(oid) = resp.commit_oid { - meta = meta.with_version(oid); + async fn abort(&mut self) -> Result<()> { + match self { + HfWriter::Regular { buf, .. } => { + buf.clear(); + } + #[cfg(feature = "xet")] + HfWriter::Xet { writer, .. } => { + let _ = writer.get_mut().unwrap().abort().await; + } } - Ok(meta) + Ok(()) } } @@ -525,8 +203,7 @@ mod tests { #[test] fn test_prepare_commit_file() { let content = b"Hello, World!"; - let buf = Buffer::from(content.to_vec()); - let file = HfWriter::prepare_commit_file("data/test.txt", &buf); + let file = HfWriter::prepare_commit_file("data/test.txt", content); assert_eq!(file.path, "data/test.txt"); assert_eq!(file.encoding, "base64"); @@ -538,8 +215,7 @@ mod tests { #[test] fn test_prepare_commit_file_empty() { - let buf = Buffer::from(Vec::::new()); - let file = HfWriter::prepare_commit_file("empty.bin", &buf); + let file = HfWriter::prepare_commit_file("empty.bin", &[]); assert_eq!(file.path, "empty.bin"); assert_eq!(file.encoding, "base64"); @@ -600,47 +276,6 @@ mod tests { let _ = op.delete(path).await; } - /// Write a 1 MB binary file with XET disabled. The preupload API should - /// classify this as LFS, and the LFS batch API should choose basic - /// (singlepart) transfer since the file is below the multipart threshold. - #[tokio::test] - #[ignore] - async fn test_write_lfs_singlepart_roundtrip() { - let op = testing_operator(); - let path = "tests/lfs-singlepart.bin"; - let content: Vec = (0..1_048_576u32).map(|i| (i % 256) as u8).collect(); - - op.write(path, content.clone()) - .await - .expect("LFS singlepart write should succeed"); - - let data = op.read(path).await.expect("read should succeed"); - assert_eq!(data.to_bytes().as_ref(), content.as_slice()); - - let _ = op.delete(path).await; - } - - /// Write a large binary file with XET disabled. The server decides - /// whether to use singlepart or multipart LFS transfer based on size. - #[tokio::test] - #[ignore] - async fn test_write_lfs_large_roundtrip() { - let op = testing_operator(); - let path = "tests/lfs-large.bin"; - // 12 MB of patterned data — above the ~10 MB multipart threshold. - let content: Vec = (0..12_000_000u32).map(|i| (i % 251) as u8).collect(); - - op.write(path, content.clone()) - .await - .expect("LFS large write should succeed"); - - let data = op.read(path).await.expect("read should succeed"); - assert_eq!(data.to_bytes().len(), content.len()); - assert_eq!(data.to_bytes().as_ref(), content.as_slice()); - - let _ = op.delete(path).await; - } - /// Verify stat returns correct metadata after writing. #[tokio::test] #[ignore] @@ -757,34 +392,6 @@ mod tests { let _ = op.delete(path).await; } - /// Upload identical LFS content to two different paths. The second - /// write should hit the LfsExists code path (LFS batch returns no - /// actions because the object already exists in storage). - #[tokio::test] - #[ignore] - async fn test_write_lfs_reupload() { - let op = testing_operator(); - let path1 = "tests/lfs-reupload-1.bin"; - let path2 = "tests/lfs-reupload-2.bin"; - let content: Vec = (0..1_048_576u32).map(|i| (i % 256) as u8).collect(); - - // First upload — should use LFS singlepart. - op.write(path1, content.clone()) - .await - .expect("first LFS write should succeed"); - - // Second upload of identical content to a different path — should hit LfsExists. - op.write(path2, content.clone()) - .await - .expect("LFS re-upload should succeed (LfsExists path)"); - - let data = op.read(path2).await.expect("read should succeed"); - assert_eq!(data.to_bytes().as_ref(), content.as_slice()); - - let _ = op.delete(path1).await; - let _ = op.delete(path2).await; - } - /// Delete a file and confirm read returns NotFound. #[tokio::test] #[ignore] From 3cc3b65cfbe25d90ec76d56ee29cc47bd07c91be Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 19:16:41 +0100 Subject: [PATCH 15/25] chore(hf): remove duplicate deleter tests --- core/services/hf/src/deleter.rs | 40 ++++++++++++++------------ core/services/hf/src/writer.rs | 50 --------------------------------- 2 files changed, 22 insertions(+), 68 deletions(-) diff --git a/core/services/hf/src/deleter.rs b/core/services/hf/src/deleter.rs index 474f691697cf..5e2340a3b397 100644 --- a/core/services/hf/src/deleter.rs +++ b/core/services/hf/src/deleter.rs @@ -66,41 +66,45 @@ mod tests { #[tokio::test] #[ignore] - async fn test_delete_once() { + async fn test_delete_nonexistent() { let op = testing_operator(); - let path = "tests/delete-test.txt"; - - op.write(path, b"temporary content".as_slice()) - .await - .expect("write should succeed"); - - op.delete(path).await.expect("delete should succeed"); - let err = op - .stat(path) + op.delete("nonexistent-file.txt") .await - .expect_err("stat should fail after delete"); - assert_eq!(err.kind(), ErrorKind::NotFound); + .expect("deleting nonexistent file should succeed"); } #[tokio::test] #[ignore] - async fn test_delete_nonexistent() { + async fn test_delete_then_read() { let op = testing_operator(); + let path = "tests/delete-then-read.txt"; - op.delete("nonexistent-file.txt") + op.write(path, b"will be deleted".as_slice()) .await - .expect("deleting nonexistent file should succeed"); + .expect("write should succeed"); + + op.delete(path).await.expect("delete should succeed"); + + let err = op + .read(path) + .await + .expect_err("read after delete should fail"); + assert_eq!(err.kind(), ErrorKind::NotFound); } #[tokio::test] #[ignore] - async fn test_delete_batch() { + async fn test_batch_delete() { let op = testing_operator(); - let paths = ["tests/batch-del-1.txt", "tests/batch-del-2.txt"]; + let paths = [ + "tests/batch-del-a.txt", + "tests/batch-del-b.txt", + "tests/batch-del-c.txt", + ]; for path in &paths { - op.write(path, b"temp".as_slice()) + op.write(path, b"batch delete test".as_slice()) .await .expect("write should succeed"); } diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 2b93711e73ed..e9d36baccb65 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -391,54 +391,4 @@ mod tests { let _ = op.delete(path).await; } - - /// Delete a file and confirm read returns NotFound. - #[tokio::test] - #[ignore] - async fn test_delete_then_read() { - let op = testing_operator(); - let path = "tests/delete-then-read.txt"; - - op.write(path, b"will be deleted".as_slice()) - .await - .expect("write should succeed"); - - op.delete(path).await.expect("delete should succeed"); - - let err = op - .read(path) - .await - .expect_err("read after delete should fail"); - assert_eq!(err.kind(), ErrorKind::NotFound); - } - - /// Write multiple files, delete them all, and verify each is gone. - #[tokio::test] - #[ignore] - async fn test_batch_delete() { - let op = testing_operator(); - let paths = [ - "tests/batch-del-a.txt", - "tests/batch-del-b.txt", - "tests/batch-del-c.txt", - ]; - - for path in &paths { - op.write(path, b"batch delete test".as_slice()) - .await - .expect("write should succeed"); - } - - for path in &paths { - op.delete(path).await.expect("delete should succeed"); - } - - for path in &paths { - let err = op - .stat(path) - .await - .expect_err("stat should fail after delete"); - assert_eq!(err.kind(), ErrorKind::NotFound); - } - } } From 07ed71e9e410079dc49930ccebde7f0298a5218a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 19:21:13 +0100 Subject: [PATCH 16/25] chore(hf): remove essentially unused size field from HfWriter --- core/services/hf/src/writer.rs | 44 ++++++++++------------------------ 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index e9d36baccb65..cb6430963d03 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -35,7 +35,6 @@ pub enum HfWriter { Regular { core: Arc, path: String, - size: u64, buf: Vec, }, /// XET writer for large files using streaming protocol. @@ -43,7 +42,6 @@ pub enum HfWriter { Xet { core: Arc, path: String, - size: u64, writer: Mutex, }, } @@ -64,7 +62,6 @@ impl HfWriter { return Ok(HfWriter::Xet { core, path, - size: 0, writer: Mutex::new(writer), }); } @@ -77,7 +74,6 @@ impl HfWriter { Ok(HfWriter::Regular { core, path, - size: 0, buf: Vec::new(), }) } @@ -95,35 +91,25 @@ impl HfWriter { impl oio::Write for HfWriter { async fn write(&mut self, bs: Buffer) -> Result<()> { match self { - HfWriter::Regular { size, buf, .. } => { - *size += bs.len() as u64; + HfWriter::Regular { buf, .. } => { buf.push(bs); Ok(()) } #[cfg(feature = "xet")] - HfWriter::Xet { size, writer, .. } => { - *size += bs.len() as u64; - writer - .get_mut() - .unwrap() - .write(bs.to_bytes()) - .await - .map_err(map_xet_error) - } + HfWriter::Xet { writer, .. } => writer + .get_mut() + .unwrap() + .write(bs.to_bytes()) + .await + .map_err(map_xet_error), } } async fn close(&mut self) -> Result { match self { HfWriter::Regular { - core, - path, - size, - buf, - .. + core, path, buf, .. } => { - let content_length = *size; - // Flatten buffer let mut data = Vec::new(); for buf in std::mem::take(buf) { @@ -133,20 +119,14 @@ impl oio::Write for HfWriter { let file = Self::prepare_commit_file(path, &data); let resp = core.commit_files(vec![file], vec![], vec![]).await?; - let mut meta = Metadata::default().with_content_length(content_length); + let mut meta = Metadata::default().with_content_length(data.len() as u64); if let Some(commit_oid) = resp.commit_oid { meta = meta.with_version(commit_oid); } Ok(meta) } #[cfg(feature = "xet")] - HfWriter::Xet { - core, - path, - size, - writer, - } => { - let content_length = *size; + HfWriter::Xet { core, path, writer } => { let file_info = writer .get_mut() .unwrap() @@ -163,11 +143,11 @@ impl oio::Write for HfWriter { path: path.clone(), oid: sha256.to_string(), algo: "sha256".to_string(), - size: content_length, + size: file_info.file_size(), }; let resp = core.commit_files(vec![], vec![lfs_file], vec![]).await?; - let mut meta = Metadata::default().with_content_length(content_length); + let mut meta = Metadata::default().with_content_length(file_info.file_size()); if let Some(commit_oid) = resp.commit_oid { meta = meta.with_version(commit_oid); } From 3ad06142b8d9d5d3a88a0cd9c324da24261797ba Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 11 Feb 2026 23:14:16 +0100 Subject: [PATCH 17/25] chore(hf): update XetClient.write() call after removing progress updater arguments --- core/Cargo.lock | 48 +++++++++++++++++----------------- core/services/hf/src/writer.rs | 5 +--- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 747b75c56f16..574c22dc7b01 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1552,7 +1552,7 @@ dependencies = [ [[package]] name = "cas_client" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "anyhow", "async-trait", @@ -1601,7 +1601,7 @@ dependencies = [ [[package]] name = "cas_object" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "anyhow", "blake3", @@ -1628,7 +1628,7 @@ dependencies = [ [[package]] name = "cas_types" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "merklehash", "serde", @@ -1904,7 +1904,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -1913,7 +1913,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "data" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "anyhow", "async-trait", @@ -2666,7 +2666,7 @@ checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "deduplication" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "async-trait", "bytes", @@ -3222,7 +3222,7 @@ dependencies = [ [[package]] name = "error_printer" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "tracing", ] @@ -3395,7 +3395,7 @@ dependencies = [ [[package]] name = "file_reconstruction" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "async-trait", "bytes", @@ -3415,7 +3415,7 @@ dependencies = [ [[package]] name = "file_utils" version = "0.14.2" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "colored", "lazy_static", @@ -3992,7 +3992,7 @@ version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" dependencies = [ - "approx 0.5.1", + "approx 0.4.0", "num-traits", "rstar 0.10.0", "rstar 0.11.0", @@ -4626,7 +4626,7 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hub_client" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "anyhow", "async-trait", @@ -4803,7 +4803,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.62.2", + "windows-core 0.57.0", ] [[package]] @@ -5802,7 +5802,7 @@ dependencies = [ [[package]] name = "mdb_shard" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "anyhow", "async-trait", @@ -5874,7 +5874,7 @@ dependencies = [ [[package]] name = "merklehash" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "base64 0.22.1", "blake3", @@ -8052,7 +8052,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -8615,7 +8615,7 @@ dependencies = [ [[package]] name = "progress_tracking" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "async-trait", "merklehash", @@ -8715,7 +8715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.10.5", "log", "multimap", "petgraph 0.8.3", @@ -8749,7 +8749,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.114", @@ -8762,7 +8762,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.114", @@ -12272,7 +12272,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utils" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "async-trait", "bincode", @@ -12835,7 +12835,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -13484,7 +13484,7 @@ dependencies = [ [[package]] name = "xet_config" version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "const-str", "konst", @@ -13494,7 +13494,7 @@ dependencies = [ [[package]] name = "xet_runtime" version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#311799ec0ef23f2603925dd3b8d6c1d3d663b8a1" +source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" dependencies = [ "dirs", "error_printer", diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index cb6430963d03..4dcb082e7229 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -55,10 +55,7 @@ impl HfWriter { #[cfg(feature = "xet")] if core.xet_enabled { let client = core.xet_client("write").await?; - let writer = client - .write(None, None, None) - .await - .map_err(map_xet_error)?; + let writer = client.write(None).await.map_err(map_xet_error)?; return Ok(HfWriter::Xet { core, path, From 9727494e4c490cf598d2fcbb1caec54591923796 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 13 Feb 2026 11:49:53 +0100 Subject: [PATCH 18/25] feat(hf): add support for buckets repository type --- core/services/hf/src/backend.rs | 26 ++++++ core/services/hf/src/core.rs | 67 ++++++++++++++-- core/services/hf/src/reader.rs | 20 ++++- core/services/hf/src/uri.rs | 133 +++++++++++++++++++++++-------- core/services/hf/src/writer.rs | 135 +++++++++++++++++++++++++++----- 5 files changed, 319 insertions(+), 62 deletions(-) diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index fdb0fd996e4f..2927a9763859 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -45,6 +45,7 @@ impl HfBuilder { /// - dataset /// - datasets (alias for dataset) /// - space + /// - bucket /// /// [Reference](https://huggingface.co/docs/hub/repositories) pub fn repo_type(mut self, repo_type: &str) -> Self { @@ -298,6 +299,13 @@ pub(super) mod test_utils { (repo_id, token) } + #[cfg(feature = "xet")] + pub fn testing_bucket_credentials() -> (String, String) { + let repo_id = std::env::var("HF_OPENDAL_BUCKET").expect("HF_OPENDAL_BUCKET must be set"); + let token = std::env::var("HF_OPENDAL_TOKEN").expect("HF_OPENDAL_TOKEN must be set"); + (repo_id, token) + } + /// Operator for a private dataset requiring HF_OPENDAL_DATASET and HF_OPENDAL_TOKEN. /// Uses higher max_retries to tolerate concurrent commit conflicts (412). pub fn testing_operator() -> Operator { @@ -330,6 +338,24 @@ pub(super) mod test_utils { finish_operator(op) } + /// Operator for a bucket requiring HF_OPENDAL_BUCKET and HF_OPENDAL_TOKEN. + /// Buckets always use XET for writes. + #[cfg(feature = "xet")] + pub fn testing_bucket_operator() -> Operator { + let (repo_id, token) = testing_bucket_credentials(); + let op = Operator::new( + HfBuilder::default() + .repo_type("bucket") + .repo_id(&repo_id) + .token(&token) + .enable_xet() + .max_retries(10), + ) + .unwrap() + .finish(); + finish_operator(op) + } + pub fn gpt2_operator() -> Operator { let op = Operator::new( HfBuilder::default() diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index f03d71afdc92..e00769ef7398 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -86,6 +86,18 @@ pub(super) struct DeletedFile { pub path: String, } +/// Bucket batch operation payload structures +#[cfg(feature = "xet")] +#[derive(Debug, serde::Serialize)] +#[serde(tag = "type", rename_all = "camelCase")] +pub(super) enum BucketOperation { + #[serde(rename_all = "camelCase")] + AddFile { path: String, xet_hash: String }, + #[serde(rename_all = "camelCase")] + #[allow(dead_code)] + DeleteFile { path: String }, +} + #[derive(serde::Serialize)] pub(super) struct MixedCommitPayload { pub summary: String, @@ -113,7 +125,8 @@ pub(super) struct CommitResponse { pub(super) struct PathInfo { #[serde(rename = "type")] pub type_: String, - pub oid: String, + #[serde(default)] + pub oid: Option, pub size: u64, #[serde(default)] pub lfs: Option, @@ -141,12 +154,12 @@ impl PathInfo { if mode == EntryMode::FILE { meta.set_content_length(self.size); - let etag = if let Some(lfs) = &self.lfs { - &lfs.oid - } else { - &self.oid - }; - meta.set_etag(etag); + // For buckets, oid may be None; for regular repos, prefer lfs.oid then oid + if let Some(lfs) = &self.lfs { + meta.set_etag(&lfs.oid); + } else if let Some(oid) = &self.oid { + meta.set_etag(oid); + } } Ok(meta) @@ -511,6 +524,46 @@ impl HfCore { let (_, resp) = self.send_parse::(req).await?; Ok(resp) } + + /// Upload files to a bucket using the batch API. + /// + /// Sends operations as JSON lines (one operation per line). + #[cfg(feature = "xet")] + pub(super) async fn bucket_batch(&self, operations: Vec) -> Result<()> { + let _token = self.token.as_deref().ok_or_else(|| { + Error::new( + ErrorKind::PermissionDenied, + "token is required for bucket operations", + ) + .with_operation("bucket_batch") + })?; + + if operations.is_empty() { + return Err(Error::new( + ErrorKind::Unexpected, + "no operations to perform", + )); + } + + let url = self.repo.bucket_batch_url(&self.endpoint); + + let mut body = String::new(); + for op in operations { + let json = serde_json::to_string(&op).map_err(new_json_serialize_error)?; + body.push_str(&json); + body.push('\n'); + } + + let req = self + .request(http::Method::POST, &url, Operation::Write) + .header(header::CONTENT_TYPE, "application/x-ndjson") + .header(header::CONTENT_LENGTH, body.len()) + .body(Buffer::from(Bytes::from(body))) + .map_err(new_request_build_error)?; + + self.send(req).await?; + Ok(()) + } } #[cfg(test)] diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index aeb034cc0b96..09434ceee12d 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -27,6 +27,8 @@ use futures::StreamExt; use super::core::HfCore; #[cfg(feature = "xet")] use super::core::map_xet_error; +#[cfg(feature = "xet")] +use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; #[cfg(feature = "xet")] @@ -43,12 +45,24 @@ pub enum HfReader { impl HfReader { /// Create a reader, automatically choosing between XET and HTTP. /// - /// When XET is enabled a HEAD request probes for the `X-Xet-Hash` - /// header. Files stored on XET are downloaded via the CAS protocol; - /// all others fall back to a regular HTTP GET. + /// Buckets always use XET. For other repo types, when XET is enabled + /// a HEAD request probes for the `X-Xet-Hash` header. Files stored on + /// XET are downloaded via the CAS protocol; all others fall back to HTTP GET. pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { #[cfg(feature = "xet")] if core.xet_enabled { + // Buckets always use XET + if core.repo.repo_type == RepoType::Bucket { + if let Some(xet_file) = core.maybe_xet_file(path).await? { + return Self::try_new_xet(core, &xet_file, range).await; + } + return Err(Error::new( + ErrorKind::Unexpected, + "bucket file is missing XET metadata", + )); + } + + // For other repos, probe for XET if let Some(xet_file) = core.maybe_xet_file(path).await? { return Self::try_new_xet(core, &xet_file, range).await; } diff --git a/core/services/hf/src/uri.rs b/core/services/hf/src/uri.rs index f089ce72a8a1..c29d34431dfc 100644 --- a/core/services/hf/src/uri.rs +++ b/core/services/hf/src/uri.rs @@ -22,7 +22,7 @@ use serde::Serialize; use super::HUGGINGFACE_SCHEME; use opendal_core::raw::*; -/// Repository type of Huggingface. Supports `model`, `dataset`, and `space`. +/// Repository type of Huggingface. Supports `model`, `dataset`, `space`, and `bucket`. /// [Reference](https://huggingface.co/docs/hub/repositories) #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "lowercase")] @@ -31,6 +31,7 @@ pub enum RepoType { Model, Dataset, Space, + Bucket, } impl RepoType { @@ -39,6 +40,7 @@ impl RepoType { "model" | "models" => Ok(Self::Model), "dataset" | "datasets" => Ok(Self::Dataset), "space" | "spaces" => Ok(Self::Space), + "bucket" | "buckets" => Ok(Self::Bucket), other => Err(opendal_core::Error::new( opendal_core::ErrorKind::ConfigInvalid, format!("unknown repo type: {other}"), @@ -52,6 +54,7 @@ impl RepoType { Self::Model => "model", Self::Dataset => "dataset", Self::Space => "space", + Self::Bucket => "bucket", } } @@ -60,6 +63,7 @@ impl RepoType { Self::Model => "models", Self::Dataset => "datasets", Self::Space => "spaces", + Self::Bucket => "buckets", } } } @@ -98,42 +102,49 @@ impl HfRepo { /// Build the paths-info API URL for this repository. pub fn paths_info_url(&self, endpoint: &str) -> String { - format!( - "{}/api/{}/{}/paths-info/{}", - endpoint, - self.repo_type.as_plural_str(), - &self.repo_id, - percent_encode_revision(self.revision()), - ) - } - - /// Build the Git LFS batch API URL for this repository. - /// - /// Pattern: `{endpoint}/{type_prefix}{repo_id}.git/info/lfs/objects/batch` - /// where type_prefix is "" for models, "datasets/" for datasets, "spaces/" for spaces. - pub fn lfs_batch_url(&self, endpoint: &str) -> String { - let type_prefix = match self.repo_type { - RepoType::Model => "", - RepoType::Dataset => "datasets/", - RepoType::Space => "spaces/", - }; - format!( - "{}/{}{}.git/info/lfs/objects/batch", - endpoint, type_prefix, &self.repo_id, - ) + match self.repo_type { + RepoType::Bucket => { + format!("{}/api/buckets/{}/paths-info", endpoint, &self.repo_id) + } + _ => { + format!( + "{}/api/{}/{}/paths-info/{}", + endpoint, + self.repo_type.as_plural_str(), + &self.repo_id, + percent_encode_revision(self.revision()), + ) + } + } } /// Build the XET token API URL for this repository. #[cfg(feature = "xet")] pub fn xet_token_url(&self, endpoint: &str, token_type: &str) -> String { - format!( - "{}/api/{}/{}/xet-{}-token/{}", - endpoint, - self.repo_type.as_plural_str(), - &self.repo_id, - token_type, - self.revision(), - ) + match self.repo_type { + RepoType::Bucket => { + format!( + "{}/api/buckets/{}/xet-{}-token", + endpoint, &self.repo_id, token_type + ) + } + _ => { + format!( + "{}/api/{}/{}/xet-{}-token/{}", + endpoint, + self.repo_type.as_plural_str(), + &self.repo_id, + token_type, + self.revision(), + ) + } + } + } + + /// Build the bucket batch API URL for this repository. + #[cfg(feature = "xet")] + pub fn bucket_batch_url(&self, endpoint: &str) -> String { + format!("{}/api/buckets/{}/batch", endpoint, &self.repo_id) } } @@ -275,6 +286,12 @@ impl HfUri { endpoint, &self.repo.repo_id, revision, path ) } + RepoType::Bucket => { + format!( + "{}/buckets/{}/resolve/{}", + endpoint, &self.repo.repo_id, path + ) + } } } @@ -535,4 +552,56 @@ mod tests { assert!(p.repo.revision.is_none()); assert_eq!(p.path, ""); } + + #[test] + fn resolve_buckets_prefix() { + let p = resolve("buckets/username/my_bucket"); + assert_eq!(p.repo.repo_type, RepoType::Bucket); + assert_eq!(p.repo.repo_id, "username/my_bucket"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, ""); + } + + #[test] + fn resolve_buckets_with_path() { + let p = resolve("buckets/username/my_bucket/data/file.txt"); + assert_eq!(p.repo.repo_type, RepoType::Bucket); + assert_eq!(p.repo.repo_id, "username/my_bucket"); + assert!(p.repo.revision.is_none()); + assert_eq!(p.path, "data/file.txt"); + } + + #[test] + fn test_bucket_resolve_url() { + let p = resolve("buckets/user/bucket/file.txt"); + let url = p.resolve_url("https://huggingface.co"); + assert_eq!( + url, + "https://huggingface.co/buckets/user/bucket/resolve/file.txt" + ); + } + + #[test] + #[cfg(feature = "xet")] + fn test_bucket_xet_token_urls() { + let p = resolve("buckets/user/bucket"); + let read_url = p.repo.xet_token_url("https://huggingface.co", "read"); + let write_url = p.repo.xet_token_url("https://huggingface.co", "write"); + assert_eq!( + read_url, + "https://huggingface.co/api/buckets/user/bucket/xet-read-token" + ); + assert_eq!( + write_url, + "https://huggingface.co/api/buckets/user/bucket/xet-write-token" + ); + } + + #[test] + #[cfg(feature = "xet")] + fn test_bucket_batch_url() { + let p = resolve("buckets/user/bucket"); + let url = p.repo.bucket_batch_url("https://huggingface.co"); + assert_eq!(url, "https://huggingface.co/api/buckets/user/bucket/batch"); + } } diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 4dcb082e7229..10605905bde9 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -21,9 +21,11 @@ use std::sync::Mutex; use base64::Engine; +#[cfg(feature = "xet")] +use super::core::{BucketOperation, LfsFile, map_xet_error}; use super::core::{CommitFile, HfCore}; #[cfg(feature = "xet")] -use super::core::{LfsFile, map_xet_error}; +use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; #[cfg(feature = "xet")] @@ -49,6 +51,24 @@ pub enum HfWriter { impl HfWriter { /// Create a new writer by determining the upload mode from the API. pub async fn try_new(core: Arc, path: String) -> Result { + // Buckets always use XET and don't have a preupload endpoint + #[cfg(feature = "xet")] + if core.repo.repo_type == RepoType::Bucket { + if !core.xet_enabled { + return Err(Error::new( + ErrorKind::Unsupported, + "buckets require XET to be enabled", + )); + } + let client = core.xet_client("write").await?; + let writer = client.write(None).await.map_err(map_xet_error)?; + return Ok(HfWriter::Xet { + core, + path, + writer: Mutex::new(writer), + }); + } + let mode_str = core.determine_upload_mode(&path).await?; if mode_str == "lfs" { @@ -130,25 +150,38 @@ impl oio::Write for HfWriter { .close() .await .map_err(map_xet_error)?; - let sha256 = file_info.sha256().ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - "xet upload did not return sha256 hash", - ) - })?; - let lfs_file = LfsFile { - path: path.clone(), - oid: sha256.to_string(), - algo: "sha256".to_string(), - size: file_info.file_size(), - }; - let resp = core.commit_files(vec![], vec![lfs_file], vec![]).await?; - - let mut meta = Metadata::default().with_content_length(file_info.file_size()); - if let Some(commit_oid) = resp.commit_oid { - meta = meta.with_version(commit_oid); + + let meta = Metadata::default().with_content_length(file_info.file_size()); + + if core.repo.repo_type == RepoType::Bucket { + let xet_hash = file_info.hash().to_string(); + let operation = BucketOperation::AddFile { + path: path.clone(), + xet_hash, + }; + core.bucket_batch(vec![operation]).await?; + Ok(meta) + } else { + let sha256 = file_info.sha256().ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + "xet upload did not return sha256 hash", + ) + })?; + let lfs_file = LfsFile { + path: path.clone(), + oid: sha256.to_string(), + algo: "sha256".to_string(), + size: file_info.file_size(), + }; + let resp = core.commit_files(vec![], vec![lfs_file], vec![]).await?; + + if let Some(commit_oid) = resp.commit_oid { + Ok(meta.with_version(commit_oid)) + } else { + Ok(meta) + } } - Ok(meta) } } } @@ -171,7 +204,7 @@ impl oio::Write for HfWriter { mod tests { use super::super::backend::test_utils::testing_operator; #[cfg(feature = "xet")] - use super::super::backend::test_utils::testing_xet_operator; + use super::super::backend::test_utils::{testing_bucket_operator, testing_xet_operator}; use super::*; use base64::Engine; @@ -368,4 +401,66 @@ mod tests { let _ = op.delete(path).await; } + + // --- Bucket tests (require HF_OPENDAL_BUCKET and HF_OPENDAL_TOKEN) --- + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_bucket_write() { + let op = testing_bucket_operator(); + let path = "test-bucket-file.txt"; + let content = b"Hello from bucket!"; + + op.write(path, content.as_slice()) + .await + .expect("bucket write should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content); + + let _ = op.delete(path).await; + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_bucket_write_roundtrip() { + let op = testing_bucket_operator(); + let path = "tests/bucket-roundtrip.bin"; + let content = b"Binary content for bucket roundtrip test"; + + op.write(path, content.as_slice()) + .await + .expect("bucket write should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), content); + + let meta = op.stat(path).await.expect("stat should succeed"); + assert_eq!(meta.content_length(), content.len() as u64); + + let _ = op.delete(path).await; + } + + #[cfg(feature = "xet")] + #[tokio::test] + #[ignore] + async fn test_bucket_overwrite() { + let op = testing_bucket_operator(); + let path = "tests/bucket-overwrite.txt"; + + op.write(path, b"first content".as_slice()) + .await + .expect("first write should succeed"); + + op.write(path, b"second content".as_slice()) + .await + .expect("overwrite should succeed"); + + let data = op.read(path).await.expect("read should succeed"); + assert_eq!(data.to_bytes().as_ref(), b"second content"); + + let _ = op.delete(path).await; + } } From 4a1d0a00dc44e86ac6a878a11072c84eea0f418a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 13 Feb 2026 14:42:01 +0100 Subject: [PATCH 19/25] build(hf): switch to depend on subxet - a tree-shaken single crate version of xet-core --- core/Cargo.lock | 511 ++++++++------------------------- core/services/hf/Cargo.toml | 8 +- core/services/hf/src/core.rs | 12 +- core/services/hf/src/reader.rs | 8 +- core/services/hf/src/writer.rs | 2 +- 5 files changed, 127 insertions(+), 414 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 574c22dc7b01..56e405baf7d7 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -1549,93 +1549,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "cas_client" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "anyhow", - "async-trait", - "axum 0.8.8", - "base64 0.22.1", - "bytes", - "cas_object", - "cas_types", - "chrono", - "clap", - "deduplication", - "error_printer", - "file_utils", - "futures", - "futures-util", - "heed", - "http 1.4.0", - "hyper 1.8.1", - "lazy_static", - "mdb_shard", - "merklehash", - "more-asserts", - "progress_tracking", - "rand 0.9.2", - "reqwest 0.13.2", - "reqwest-middleware", - "reqwest-retry", - "serde", - "serde_json", - "statrs", - "tempfile", - "thiserror 2.0.18", - "tokio", - "tokio-retry", - "tower-http", - "tracing", - "tracing-log", - "tracing-subscriber", - "url", - "utils", - "warp", - "web-time", - "xet_runtime", -] - -[[package]] -name = "cas_object" -version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "anyhow", - "blake3", - "bytes", - "clap", - "countio", - "csv", - "deduplication", - "futures", - "half", - "lz4_flex", - "mdb_shard", - "merklehash", - "more-asserts", - "rand 0.9.2", - "serde", - "thiserror 2.0.18", - "tokio", - "tracing", - "utils", - "xet_runtime", -] - -[[package]] -name = "cas_types" -version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "merklehash", - "serde", - "serde_repr", - "thiserror 2.0.18", -] - [[package]] name = "cast" version = "0.3.0" @@ -1897,23 +1810,13 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" -[[package]] -name = "colored" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" -dependencies = [ - "lazy_static", - "windows-sys 0.48.0", -] - [[package]] name = "colored" version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -2618,68 +2521,12 @@ dependencies = [ "parking_lot_core 0.9.12", ] -[[package]] -name = "data" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "anyhow", - "async-trait", - "bytes", - "cas_client", - "cas_object", - "cas_types", - "chrono", - "clap", - "deduplication", - "error_printer", - "file_reconstruction", - "futures", - "hub_client", - "lazy_static", - "mdb_shard", - "merklehash", - "more-asserts", - "progress_tracking", - "prometheus 0.14.0", - "rand 0.9.2", - "regex", - "serde", - "serde_json", - "sha2", - "tempfile", - "thiserror 2.0.18", - "tokio", - "tracing", - "ulid", - "utils", - "walkdir", - "xet_runtime", -] - [[package]] name = "data-encoding" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" -[[package]] -name = "deduplication" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "async-trait", - "bytes", - "gearhash", - "lazy_static", - "mdb_shard", - "merklehash", - "more-asserts", - "progress_tracking", - "utils", - "xet_runtime", -] - [[package]] name = "der" version = "0.6.1" @@ -3219,14 +3066,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "error_printer" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "tracing", -] - [[package]] name = "escape8259" version = "0.5.3" @@ -3392,40 +3231,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "file_reconstruction" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "async-trait", - "bytes", - "cas_client", - "cas_types", - "merklehash", - "more-asserts", - "progress_tracking", - "thiserror 2.0.18", - "tokio", - "tracing", - "utils", - "xet_config", - "xet_runtime", -] - -[[package]] -name = "file_utils" -version = "0.14.2" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "colored", - "lazy_static", - "libc", - "rand 0.9.2", - "tracing", - "whoami 2.1.1", - "winapi", -] - [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -3992,7 +3797,7 @@ version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" dependencies = [ - "approx 0.4.0", + "approx 0.5.1", "num-traits", "rstar 0.10.0", "rstar 0.11.0", @@ -4056,10 +3861,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" dependencies = [ "cfg-if 1.0.4", + "js-sys", "libc", "r-efi", "wasip2", "wasip3", + "wasm-bindgen", ] [[package]] @@ -4623,21 +4430,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "hub_client" -version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "anyhow", - "async-trait", - "cas_client", - "reqwest 0.13.2", - "reqwest-middleware", - "serde", - "thiserror 2.0.18", - "urlencoding", -] - [[package]] name = "humantime" version = "2.3.0" @@ -4803,7 +4595,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.57.0", + "windows-core 0.62.2", ] [[package]] @@ -5560,7 +5352,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2a4674e549a59eeac8e301584143186c433181bdc5460046a130becedef6a3d" dependencies = [ - "colored 3.1.1", + "colored", "jiff", "logforth-core", ] @@ -5799,36 +5591,6 @@ dependencies = [ "digest", ] -[[package]] -name = "mdb_shard" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "anyhow", - "async-trait", - "blake3", - "bytes", - "clap", - "futures", - "futures-util", - "heapify", - "itertools 0.14.0", - "lazy_static", - "merklehash", - "more-asserts", - "rand 0.9.2", - "regex", - "serde", - "static_assertions", - "tempfile", - "thiserror 2.0.18", - "tokio", - "tracing", - "utils", - "uuid", - "xet_runtime", -] - [[package]] name = "mea" version = "0.6.3" @@ -5871,21 +5633,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "merklehash" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "base64 0.22.1", - "blake3", - "bytemuck", - "getrandom 0.4.1", - "heed", - "rand 0.9.2", - "safe-transmute", - "serde", -] - [[package]] name = "metrics" version = "0.24.3" @@ -6562,8 +6309,8 @@ dependencies = [ "opendal-service-gridfs", "opendal-service-hdfs", "opendal-service-hdfs-native", - "opendal-service-hf", "opendal-service-http", + "opendal-service-huggingface", "opendal-service-ipfs", "opendal-service-ipmfs", "opendal-service-koofr", @@ -6600,7 +6347,7 @@ dependencies = [ "opendal-service-yandex-disk", "opendal-testkit", "rand 0.8.5", - "reqwest 0.12.24", + "reqwest 0.12.28", "sha2", "size", "tokio", @@ -6653,7 +6400,7 @@ dependencies = [ "pretty_assertions", "quick-xml", "rand 0.8.5", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", "sha2", @@ -7100,7 +6847,7 @@ dependencies = [ "opendal-core", "quick-xml", "reqsign", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "tokio", ] @@ -7217,7 +6964,7 @@ dependencies = [ "percent-encoding", "quick-xml", "reqsign", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", "tokio", @@ -7307,35 +7054,33 @@ dependencies = [ ] [[package]] -name = "opendal-service-hf" +name = "opendal-service-http" version = "0.55.0" dependencies = [ - "async-trait", - "backon", - "base64 0.22.1", - "bytes", - "cas_types", - "data", - "futures", "http 1.4.0", "log", "opendal-core", - "percent-encoding", - "reqwest 0.12.24", "serde", - "serde_json", "tokio", - "utils", ] [[package]] -name = "opendal-service-http" +name = "opendal-service-huggingface" version = "0.55.0" dependencies = [ + "async-trait", + "backon", + "base64 0.22.1", + "bytes", + "futures", "http 1.4.0", "log", "opendal-core", + "percent-encoding", + "reqwest 0.12.28", "serde", + "serde_json", + "subxet", "tokio", ] @@ -7602,7 +7347,7 @@ dependencies = [ "reqsign-core", "reqsign-file-read-tokio", "reqsign-http-send-reqwest", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", "tokio", @@ -7968,7 +7713,7 @@ dependencies = [ "bytes", "http 1.4.0", "opentelemetry", - "reqwest 0.12.24", + "reqwest 0.12.28", ] [[package]] @@ -7983,7 +7728,7 @@ dependencies = [ "opentelemetry-proto", "opentelemetry_sdk", "prost 0.14.3", - "reqwest", + "reqwest 0.12.28", "thiserror 2.0.18", "tokio", "tonic 0.14.3", @@ -8052,7 +7797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -8612,18 +8357,6 @@ dependencies = [ "hex", ] -[[package]] -name = "progress_tracking" -version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "async-trait", - "merklehash", - "more-asserts", - "tokio", - "utils", -] - [[package]] name = "prometheus" version = "0.13.4" @@ -8715,7 +8448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.10.5", + "itertools 0.14.0", "log", "multimap", "petgraph 0.8.3", @@ -8749,7 +8482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.114", @@ -8762,7 +8495,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.114", @@ -9339,7 +9072,7 @@ dependencies = [ "once_cell", "percent-encoding", "rand 0.8.5", - "reqwest 0.12.24", + "reqwest 0.12.28", "rsa", "serde", "serde_json", @@ -9433,7 +9166,7 @@ dependencies = [ "http 1.4.0", "http-body-util", "reqsign-core", - "reqwest 0.12.24", + "reqwest 0.12.28", "wasm-bindgen-futures", ] @@ -9466,7 +9199,6 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.36", - "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", "serde_json", @@ -9508,7 +9240,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "rustls-platform-verifier", "serde", @@ -9517,7 +9249,7 @@ dependencies = [ "tokio", "tokio-rustls 0.26.4", "tokio-util", - "tower 0.5.2", + "tower 0.5.3", "tower-http", "tower-service", "url", @@ -9537,7 +9269,7 @@ dependencies = [ "async-trait", "http 1.4.0", "reqwest 0.13.2", - "thiserror 2.0.17", + "thiserror 2.0.18", "tower-service", ] @@ -9556,7 +9288,7 @@ dependencies = [ "reqwest 0.13.2", "reqwest-middleware", "retry-policies", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "wasmtimer 0.4.3", @@ -9983,10 +9715,10 @@ dependencies = [ "jni", "log", "once_cell", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-platform-verifier-android", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", @@ -10397,16 +10129,6 @@ dependencies = [ "cfg-if 1.0.4", "cpufeatures", "digest", - "sha2-asm", -] - -[[package]] -name = "sha2-asm" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab" -dependencies = [ - "cc", ] [[package]] @@ -11008,6 +10730,80 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "subxet" +version = "0.1.0" +source = "git+https://github.com/kszucs/subxet#c7aea507b6848d25ce404cf83a569fe4c1c88352" +dependencies = [ + "anyhow", + "async-trait", + "axum 0.8.8", + "base64 0.22.1", + "bincode", + "blake3", + "bytemuck", + "bytes", + "chrono", + "clap", + "colored", + "const-str", + "countio", + "csv", + "ctor", + "derivative", + "dirs", + "duration-str", + "futures", + "futures-util", + "gearhash", + "getrandom 0.4.1", + "half", + "heapify", + "heed", + "http 1.4.0", + "hyper 1.8.1", + "itertools 0.14.0", + "konst", + "lazy_static", + "libc", + "lz4_flex", + "more-asserts", + "oneshot", + "pin-project", + "prometheus 0.14.0", + "rand 0.9.2", + "regex", + "reqwest 0.13.2", + "reqwest-middleware", + "reqwest-retry", + "safe-transmute", + "serde", + "serde_json", + "serde_repr", + "sha2", + "shellexpand", + "static_assertions", + "statrs", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-retry", + "tokio-util", + "tower-http", + "tracing", + "tracing-log", + "tracing-subscriber", + "ulid", + "url", + "urlencoding", + "uuid", + "walkdir", + "warp", + "web-time", + "whoami 2.1.1", + "winapi", +] + [[package]] name = "suppaftp" version = "6.3.0" @@ -11045,7 +10841,7 @@ dependencies = [ "path-clean", "pharos", "reblessive", - "reqwest 0.12.24", + "reqwest 0.12.28", "revision", "ring", "rust_decimal", @@ -12269,32 +12065,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "utils" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "async-trait", - "bincode", - "bytes", - "ctor", - "derivative", - "duration-str", - "error_printer", - "futures", - "lazy_static", - "merklehash", - "pin-project", - "rand 0.9.2", - "serde", - "shellexpand", - "thiserror 2.0.18", - "tokio", - "tokio-util", - "tracing", - "web-time", -] - [[package]] name = "uuid" version = "1.20.0" @@ -12615,34 +12385,6 @@ dependencies = [ "wasmparser", ] -[[package]] -name = "wasm-bindgen-test-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8145dd1593bf0fb137dbfa85b8be79ec560a447298955877804640e40c2d6ea" - -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap 2.12.1", - "wasm-encoder", - "wasmparser", -] - [[package]] name = "wasm-streams" version = "0.4.2" @@ -12835,7 +12577,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -13481,33 +13223,6 @@ dependencies = [ "rustix 1.1.3", ] -[[package]] -name = "xet_config" -version = "0.14.5" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "const-str", - "konst", - "utils", -] - -[[package]] -name = "xet_runtime" -version = "0.1.0" -source = "git+https://github.com/kszucs/xet-core.git?branch=download_bytes#c9e8747cba6755c47289e2b0bdcf2e887b76abda" -dependencies = [ - "dirs", - "error_printer", - "libc", - "oneshot", - "reqwest", - "thiserror 2.0.18", - "tokio", - "tracing", - "utils", - "xet_config", -] - [[package]] name = "xml-rs" version = "0.8.28" diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index 2dad87be93fe..b8ac0475ecdc 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -34,9 +34,7 @@ all-features = true default = [] xet = [ "dep:reqwest", - "dep:xet-data", - "dep:cas_types", - "dep:xet-utils", + "dep:subxet", "dep:futures", "dep:async-trait", ] @@ -53,13 +51,11 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } # XET storage protocol support (optional) async-trait = { version = "0.1", optional = true } -cas_types = { git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } futures = { workspace = true, optional = true } reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", ], optional = true } -xet-data = { package = "data", git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } -xet-utils = { package = "utils", git = "https://github.com/kszucs/xet-core.git", branch = "download_bytes", optional = true } +subxet = { git = "https://github.com/kszucs/subxet", optional = true } [dev-dependencies] futures = { workspace = true } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index e00769ef7398..a36bac5d3db4 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -28,11 +28,11 @@ use http::header; use serde::Deserialize; #[cfg(feature = "xet")] -use xet_data::XetFileInfo; +use subxet::data::XetFileInfo; #[cfg(feature = "xet")] -use xet_data::streaming::XetClient; +use subxet::data::streaming::XetClient; #[cfg(feature = "xet")] -use xet_utils::auth::TokenRefresher; +use subxet::utils::auth::TokenRefresher; use super::error::parse_error; use super::uri::HfRepo; @@ -204,12 +204,14 @@ impl XetTokenRefresher { #[cfg(feature = "xet")] #[async_trait::async_trait] impl TokenRefresher for XetTokenRefresher { - async fn refresh(&self) -> std::result::Result<(String, u64), xet_utils::errors::AuthError> { + async fn refresh( + &self, + ) -> std::result::Result<(String, u64), subxet::utils::errors::AuthError> { let token = self .core .xet_token(self.token_type) .await - .map_err(xet_utils::errors::AuthError::token_refresh_failure)?; + .map_err(subxet::utils::errors::AuthError::token_refresh_failure)?; Ok((token.access_token, token.exp)) } } diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index 09434ceee12d..c8cb08b59e3d 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -19,10 +19,10 @@ use http::Response; use http::StatusCode; use http::header; -#[cfg(feature = "xet")] -use cas_types::FileRange; #[cfg(feature = "xet")] use futures::StreamExt; +#[cfg(feature = "xet")] +use subxet::cas_types::FileRange; use super::core::HfCore; #[cfg(feature = "xet")] @@ -32,9 +32,9 @@ use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; #[cfg(feature = "xet")] -use xet_data::XetFileInfo; +use subxet::data::XetFileInfo; #[cfg(feature = "xet")] -use xet_data::streaming::XetReader; +use subxet::data::streaming::XetReader; pub enum HfReader { Http(HttpBody), diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 10605905bde9..6d144c2d2300 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -29,7 +29,7 @@ use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; #[cfg(feature = "xet")] -use xet_data::streaming::XetWriter; +use subxet::data::streaming::XetWriter; /// Writer that handles both regular (small) and XET (large) file uploads. pub enum HfWriter { From 8f0d98012a0f9c54e5ba7bb98e83b2cbee126d9b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 13 Feb 2026 14:44:23 +0100 Subject: [PATCH 20/25] chore(hf): format cargo toml --- core/services/hf/Cargo.toml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index b8ac0475ecdc..49ff46cc879b 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -32,12 +32,7 @@ all-features = true [features] default = [] -xet = [ - "dep:reqwest", - "dep:subxet", - "dep:futures", - "dep:async-trait", -] +xet = ["dep:reqwest", "dep:subxet", "dep:futures", "dep:async-trait"] [dependencies] backon = "1.6" From 0073475971248d6bab019cc92e34a4ced0bd487f Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 18 Feb 2026 11:22:17 +0100 Subject: [PATCH 21/25] chore(hf): adjustments after rebase --- core/Cargo.lock | 24 ++++++++++++------------ core/services/hf/src/backend.rs | 23 +++++++++++------------ core/services/hf/src/config.rs | 9 +++------ core/services/hf/src/lib.rs | 4 ++-- 4 files changed, 28 insertions(+), 32 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 56e405baf7d7..80128e79dbd8 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -6309,8 +6309,8 @@ dependencies = [ "opendal-service-gridfs", "opendal-service-hdfs", "opendal-service-hdfs-native", + "opendal-service-hf", "opendal-service-http", - "opendal-service-huggingface", "opendal-service-ipfs", "opendal-service-ipmfs", "opendal-service-koofr", @@ -7054,33 +7054,33 @@ dependencies = [ ] [[package]] -name = "opendal-service-http" +name = "opendal-service-hf" version = "0.55.0" dependencies = [ + "async-trait", + "backon", + "base64 0.22.1", + "bytes", + "futures", "http 1.4.0", "log", "opendal-core", + "percent-encoding", + "reqwest 0.12.28", "serde", + "serde_json", + "subxet", "tokio", ] [[package]] -name = "opendal-service-huggingface" +name = "opendal-service-http" version = "0.55.0" dependencies = [ - "async-trait", - "backon", - "base64 0.22.1", - "bytes", - "futures", "http 1.4.0", "log", "opendal-core", - "percent-encoding", - "reqwest 0.12.28", "serde", - "serde_json", - "subxet", "tokio", ] diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 2927a9763859..41e1a4f6db50 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -191,18 +191,17 @@ impl Builder for HfBuilder { let info: Arc = { let am = AccessorInfo::default(); - am.set_scheme(HF_SCHEME) - .set_native_capability(Capability { - stat: true, - read: true, - write: token.is_some(), - delete: token.is_some(), - delete_max_size: Some(100), - list: true, - list_with_recursive: true, - shared: true, - ..Default::default() - }); + am.set_scheme(HF_SCHEME).set_native_capability(Capability { + stat: true, + read: true, + write: token.is_some(), + delete: token.is_some(), + delete_max_size: Some(100), + list: true, + list_with_recursive: true, + shared: true, + ..Default::default() + }); am.into() }; diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index 1bba36f37dc3..951e9c1054a2 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -15,15 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::fmt::Debug; - -use serde::Deserialize; -use serde::Serialize; - -use super::HF_SCHEME; use super::backend::HfBuilder; use super::uri::HfUri; use super::uri::RepoType; +use serde::Deserialize; +use serde::Serialize; +use std::fmt::Debug; /// Configuration for Hugging Face service support. #[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)] diff --git a/core/services/hf/src/lib.rs b/core/services/hf/src/lib.rs index 0d4adbac9f1b..b50185d55d4d 100644 --- a/core/services/hf/src/lib.rs +++ b/core/services/hf/src/lib.rs @@ -30,12 +30,12 @@ pub fn register_hf_service(registry: &opendal_core::OperatorRegistry) { mod backend; mod config; mod core; +mod deleter; mod error; mod lister; mod reader; -mod writer; mod uri; -mod deleter; +mod writer; pub use backend::HfBuilder as Hf; pub use config::HfConfig; From 2c4e74d82a0bec8152fbf19a97a5c635bf85a381 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 21 Feb 2026 16:29:23 +0100 Subject: [PATCH 22/25] refactor(hf): make xet a mandatory dependency --- core/Cargo.toml | 2 -- core/services/hf/Cargo.toml | 19 ++++--------------- core/services/hf/src/backend.rs | 12 ++---------- core/services/hf/src/core.rs | 29 ++++------------------------- core/services/hf/src/reader.rs | 13 ------------- core/services/hf/src/uri.rs | 4 ---- core/services/hf/src/writer.rs | 20 ++------------------ 7 files changed, 12 insertions(+), 87 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 08dbc67e3e80..93be3579f2e0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -147,10 +147,8 @@ services-gridfs = ["dep:opendal-service-gridfs"] services-hdfs = ["dep:opendal-service-hdfs"] services-hdfs-native = ["dep:opendal-service-hdfs-native"] services-hf = ["dep:opendal-service-hf"] -services-hf-xet = ["dep:opendal-service-hf", "opendal-service-hf/xet"] services-http = ["dep:opendal-service-http"] services-huggingface = ["services-hf"] -services-huggingface-xet = ["services-hf-xet"] services-ipfs = ["dep:opendal-service-ipfs"] services-ipmfs = ["dep:opendal-service-ipmfs"] services-koofr = ["dep:opendal-service-koofr"] diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index 49ff46cc879b..d95bd19a3291 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -30,35 +30,24 @@ version = { workspace = true } [package.metadata.docs.rs] all-features = true -[features] -default = [] -xet = ["dep:reqwest", "dep:subxet", "dep:futures", "dep:async-trait"] - [dependencies] +async-trait = "0.1" backon = "1.6" base64 = { workspace = true } bytes = { workspace = true } +futures = { workspace = true } http = { workspace = true } log = { workspace = true } opendal-core = { path = "../../core", version = "0.55.0", default-features = false } percent-encoding = "2" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -# XET storage protocol support (optional) -async-trait = { version = "0.1", optional = true } -futures = { workspace = true, optional = true } -reqwest = { version = "0.12", default-features = false, features = [ - "rustls-tls", -], optional = true } -subxet = { git = "https://github.com/kszucs/subxet", optional = true } +subxet = { git = "https://github.com/kszucs/subxet" } [dev-dependencies] -futures = { workspace = true } opendal-core = { path = "../../core", version = "0.55.0", features = [ "reqwest-rustls-tls", ] } -reqwest = { version = "0.12", default-features = false, features = [ - "rustls-tls", -] } serde_json = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 41e1a4f6db50..6065fb1e8083 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -123,9 +123,8 @@ impl HfBuilder { /// Enable XET storage protocol for reads. /// - /// When the `xet` feature is compiled in, reads will check for - /// XET-backed files and use the XET protocol for downloading. - /// Default is disabled. + /// When enabled, reads will check for XET-backed files and use the + /// XET protocol for downloading. Default is disabled. pub fn enable_xet(mut self) -> Self { self.config.xet = true; self @@ -219,7 +218,6 @@ impl Builder for HfBuilder { token, endpoint, max_retries, - #[cfg(feature = "xet")] self.config.xet, )?), }) @@ -298,7 +296,6 @@ pub(super) mod test_utils { (repo_id, token) } - #[cfg(feature = "xet")] pub fn testing_bucket_credentials() -> (String, String) { let repo_id = std::env::var("HF_OPENDAL_BUCKET").expect("HF_OPENDAL_BUCKET must be set"); let token = std::env::var("HF_OPENDAL_TOKEN").expect("HF_OPENDAL_TOKEN must be set"); @@ -321,7 +318,6 @@ pub(super) mod test_utils { finish_operator(op) } - #[cfg(feature = "xet")] pub fn testing_xet_operator() -> Operator { let (repo_id, token) = testing_credentials(); let op = Operator::new( @@ -339,7 +335,6 @@ pub(super) mod test_utils { /// Operator for a bucket requiring HF_OPENDAL_BUCKET and HF_OPENDAL_TOKEN. /// Buckets always use XET for writes. - #[cfg(feature = "xet")] pub fn testing_bucket_operator() -> Operator { let (repo_id, token) = testing_bucket_credentials(); let op = Operator::new( @@ -377,7 +372,6 @@ pub(super) mod test_utils { finish_operator(op) } - #[cfg(feature = "xet")] pub fn mbpp_xet_operator() -> Operator { let mut builder = HfBuilder::default() .repo_type("dataset") @@ -394,7 +388,6 @@ pub(super) mod test_utils { #[cfg(test)] mod tests { use super::test_utils::mbpp_operator; - #[cfg(feature = "xet")] use super::test_utils::mbpp_xet_operator; use super::*; @@ -466,7 +459,6 @@ mod tests { assert_eq!(&footer.to_vec(), PARQUET_MAGIC); } - #[cfg(feature = "xet")] #[tokio::test] #[ignore = "requires network access"] async fn test_read_parquet_xet() { diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index a36bac5d3db4..85a3c52abcba 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -27,11 +27,8 @@ use http::Response; use http::header; use serde::Deserialize; -#[cfg(feature = "xet")] use subxet::data::XetFileInfo; -#[cfg(feature = "xet")] use subxet::data::streaming::XetClient; -#[cfg(feature = "xet")] use subxet::utils::auth::TokenRefresher; use super::error::parse_error; @@ -87,7 +84,6 @@ pub(super) struct DeletedFile { } /// Bucket batch operation payload structures -#[cfg(feature = "xet")] #[derive(Debug, serde::Serialize)] #[serde(tag = "type", rename_all = "camelCase")] pub(super) enum BucketOperation { @@ -176,7 +172,6 @@ pub(super) struct LastCommit { pub date: String, } -#[cfg(feature = "xet")] #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub(super) struct XetToken { @@ -185,13 +180,11 @@ pub(super) struct XetToken { pub exp: u64, } -#[cfg(feature = "xet")] pub(super) struct XetTokenRefresher { core: HfCore, token_type: &'static str, } -#[cfg(feature = "xet")] impl XetTokenRefresher { pub(super) fn new(core: &HfCore, token_type: &'static str) -> Self { Self { @@ -201,7 +194,6 @@ impl XetTokenRefresher { } } -#[cfg(feature = "xet")] #[async_trait::async_trait] impl TokenRefresher for XetTokenRefresher { async fn refresh( @@ -229,15 +221,13 @@ pub struct HfCore { pub endpoint: String, pub max_retries: usize, - // Whether XET storage protocol is enabled for reads. When true - // and the `xet` feature is compiled in, reads will check for - // XET-backed files and use the XET protocol for downloading. - #[cfg(feature = "xet")] + // Whether XET storage protocol is enabled for reads. When true, + // reads will check for XET-backed files and use the XET protocol + // for downloading. pub xet_enabled: bool, /// HTTP client with redirects disabled, used by XET probes to /// inspect headers on 302 responses. - #[cfg(feature = "xet")] pub no_redirect_client: HttpClient, } @@ -247,7 +237,6 @@ impl Debug for HfCore { s.field("repo", &self.repo) .field("root", &self.root) .field("endpoint", &self.endpoint); - #[cfg(feature = "xet")] s.field("xet_enabled", &self.xet_enabled); s.finish_non_exhaustive() } @@ -261,7 +250,7 @@ impl HfCore { token: Option, endpoint: String, max_retries: usize, - #[cfg(feature = "xet")] xet_enabled: bool, + xet_enabled: bool, ) -> Result { // When xet is enabled at runtime, use dedicated reqwest clients instead // of the global one. This avoids "dispatch task is gone" errors when @@ -269,7 +258,6 @@ impl HfCore { // no-redirect client shares the same runtime as the standard client. // When xet is disabled, preserve whatever HTTP client is already set // on `info` (important for mock-based unit tests). - #[cfg(feature = "xet")] let no_redirect_client = if xet_enabled { let standard = HttpClient::with(build_reqwest(reqwest::redirect::Policy::default())?); let no_redirect = HttpClient::with(build_reqwest(reqwest::redirect::Policy::none())?); @@ -286,9 +274,7 @@ impl HfCore { token, endpoint, max_retries, - #[cfg(feature = "xet")] xet_enabled, - #[cfg(feature = "xet")] no_redirect_client, }) } @@ -372,7 +358,6 @@ impl HfCore { Ok(files.remove(0)) } - #[cfg(feature = "xet")] pub(super) async fn xet_token(&self, token_type: &str) -> Result { let url = self.repo.xet_token_url(&self.endpoint, token_type); let req = self @@ -383,7 +368,6 @@ impl HfCore { Ok(token) } - #[cfg(feature = "xet")] pub(super) async fn xet_client(&self, token_type: &'static str) -> Result { let token = self.xet_token(token_type).await?; let refresher = Arc::new(XetTokenRefresher::new(self, token_type)); @@ -402,7 +386,6 @@ impl HfCore { /// /// Uses a dedicated no-redirect HTTP client so we can inspect /// headers (e.g. `X-Xet-Hash`) on the 302 response. - #[cfg(feature = "xet")] pub(super) async fn maybe_xet_file(&self, path: &str) -> Result> { let uri = self.uri(path); let url = uri.resolve_url(&self.endpoint); @@ -530,7 +513,6 @@ impl HfCore { /// Upload files to a bucket using the batch API. /// /// Sends operations as JSON lines (one operation per line). - #[cfg(feature = "xet")] pub(super) async fn bucket_batch(&self, operations: Vec) -> Result<()> { let _token = self.token.as_deref().ok_or_else(|| { Error::new( @@ -638,7 +620,6 @@ pub(crate) mod test_utils { None, endpoint.to_string(), 3, - #[cfg(feature = "xet")] false, ) .unwrap(); @@ -734,12 +715,10 @@ mod tests { } } -#[cfg(feature = "xet")] pub(super) fn map_xet_error(err: impl std::error::Error + Send + Sync + 'static) -> Error { Error::new(ErrorKind::Unexpected, "xet operation failed").set_source(err) } -#[cfg(feature = "xet")] fn build_reqwest(policy: reqwest::redirect::Policy) -> Result { reqwest::Client::builder() .redirect(policy) diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index c8cb08b59e3d..735c80957a26 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -19,26 +19,19 @@ use http::Response; use http::StatusCode; use http::header; -#[cfg(feature = "xet")] use futures::StreamExt; -#[cfg(feature = "xet")] use subxet::cas_types::FileRange; use super::core::HfCore; -#[cfg(feature = "xet")] use super::core::map_xet_error; -#[cfg(feature = "xet")] use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; -#[cfg(feature = "xet")] use subxet::data::XetFileInfo; -#[cfg(feature = "xet")] use subxet::data::streaming::XetReader; pub enum HfReader { Http(HttpBody), - #[cfg(feature = "xet")] Xet(XetReader), } @@ -49,7 +42,6 @@ impl HfReader { /// a HEAD request probes for the `X-Xet-Hash` header. Files stored on /// XET are downloaded via the CAS protocol; all others fall back to HTTP GET. pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { - #[cfg(feature = "xet")] if core.xet_enabled { // Buckets always use XET if core.repo.repo_type == RepoType::Bucket { @@ -97,7 +89,6 @@ impl HfReader { } } - #[cfg(feature = "xet")] async fn try_new_xet( core: &HfCore, file_info: &XetFileInfo, @@ -124,7 +115,6 @@ impl oio::Read for HfReader { async fn read(&mut self) -> Result { match self { Self::Http(body) => body.read().await, - #[cfg(feature = "xet")] Self::Xet(stream) => match stream.next().await { Some(Ok(bytes)) => Ok(Buffer::from(bytes)), Some(Err(e)) => Err(map_xet_error(e)), @@ -136,7 +126,6 @@ impl oio::Read for HfReader { #[cfg(test)] mod tests { - #[cfg(feature = "xet")] use super::super::backend::test_utils::mbpp_xet_operator; use super::super::backend::test_utils::{gpt2_operator, mbpp_operator}; @@ -163,7 +152,6 @@ mod tests { assert_eq!(&data.to_vec(), PARQUET_MAGIC); } - #[cfg(feature = "xet")] #[tokio::test] #[ignore = "requires network access"] async fn test_read_xet_parquet() { @@ -178,7 +166,6 @@ mod tests { assert_eq!(&bytes[bytes.len() - 4..], PARQUET_MAGIC); } - #[cfg(feature = "xet")] #[tokio::test] #[ignore = "requires network access"] async fn test_read_xet_range() { diff --git a/core/services/hf/src/uri.rs b/core/services/hf/src/uri.rs index c29d34431dfc..034438db7b4b 100644 --- a/core/services/hf/src/uri.rs +++ b/core/services/hf/src/uri.rs @@ -119,7 +119,6 @@ impl HfRepo { } /// Build the XET token API URL for this repository. - #[cfg(feature = "xet")] pub fn xet_token_url(&self, endpoint: &str, token_type: &str) -> String { match self.repo_type { RepoType::Bucket => { @@ -142,7 +141,6 @@ impl HfRepo { } /// Build the bucket batch API URL for this repository. - #[cfg(feature = "xet")] pub fn bucket_batch_url(&self, endpoint: &str) -> String { format!("{}/api/buckets/{}/batch", endpoint, &self.repo_id) } @@ -582,7 +580,6 @@ mod tests { } #[test] - #[cfg(feature = "xet")] fn test_bucket_xet_token_urls() { let p = resolve("buckets/user/bucket"); let read_url = p.repo.xet_token_url("https://huggingface.co", "read"); @@ -598,7 +595,6 @@ mod tests { } #[test] - #[cfg(feature = "xet")] fn test_bucket_batch_url() { let p = resolve("buckets/user/bucket"); let url = p.repo.bucket_batch_url("https://huggingface.co"); diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 6d144c2d2300..8aef9f17f8d1 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -16,19 +16,14 @@ // under the License. use std::sync::Arc; -#[cfg(feature = "xet")] use std::sync::Mutex; use base64::Engine; -#[cfg(feature = "xet")] -use super::core::{BucketOperation, LfsFile, map_xet_error}; -use super::core::{CommitFile, HfCore}; -#[cfg(feature = "xet")] +use super::core::{BucketOperation, CommitFile, HfCore, LfsFile, map_xet_error}; use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; -#[cfg(feature = "xet")] use subxet::data::streaming::XetWriter; /// Writer that handles both regular (small) and XET (large) file uploads. @@ -40,7 +35,6 @@ pub enum HfWriter { buf: Vec, }, /// XET writer for large files using streaming protocol. - #[cfg(feature = "xet")] Xet { core: Arc, path: String, @@ -52,7 +46,6 @@ impl HfWriter { /// Create a new writer by determining the upload mode from the API. pub async fn try_new(core: Arc, path: String) -> Result { // Buckets always use XET and don't have a preupload endpoint - #[cfg(feature = "xet")] if core.repo.repo_type == RepoType::Bucket { if !core.xet_enabled { return Err(Error::new( @@ -72,7 +65,6 @@ impl HfWriter { let mode_str = core.determine_upload_mode(&path).await?; if mode_str == "lfs" { - #[cfg(feature = "xet")] if core.xet_enabled { let client = core.xet_client("write").await?; let writer = client.write(None).await.map_err(map_xet_error)?; @@ -84,7 +76,7 @@ impl HfWriter { } return Err(Error::new( ErrorKind::Unsupported, - "file requires LFS; enable the xet feature for large file support", + "file requires LFS; call enable_xet() on the builder for large file support", )); } @@ -112,7 +104,6 @@ impl oio::Write for HfWriter { buf.push(bs); Ok(()) } - #[cfg(feature = "xet")] HfWriter::Xet { writer, .. } => writer .get_mut() .unwrap() @@ -142,7 +133,6 @@ impl oio::Write for HfWriter { } Ok(meta) } - #[cfg(feature = "xet")] HfWriter::Xet { core, path, writer } => { let file_info = writer .get_mut() @@ -191,7 +181,6 @@ impl oio::Write for HfWriter { HfWriter::Regular { buf, .. } => { buf.clear(); } - #[cfg(feature = "xet")] HfWriter::Xet { writer, .. } => { let _ = writer.get_mut().unwrap().abort().await; } @@ -203,7 +192,6 @@ impl oio::Write for HfWriter { #[cfg(test)] mod tests { use super::super::backend::test_utils::testing_operator; - #[cfg(feature = "xet")] use super::super::backend::test_utils::{testing_bucket_operator, testing_xet_operator}; use super::*; use base64::Engine; @@ -256,7 +244,6 @@ mod tests { .expect("write with content type should succeed"); } - #[cfg(feature = "xet")] #[tokio::test] #[ignore] async fn test_write_xet() { @@ -404,7 +391,6 @@ mod tests { // --- Bucket tests (require HF_OPENDAL_BUCKET and HF_OPENDAL_TOKEN) --- - #[cfg(feature = "xet")] #[tokio::test] #[ignore] async fn test_bucket_write() { @@ -422,7 +408,6 @@ mod tests { let _ = op.delete(path).await; } - #[cfg(feature = "xet")] #[tokio::test] #[ignore] async fn test_bucket_write_roundtrip() { @@ -443,7 +428,6 @@ mod tests { let _ = op.delete(path).await; } - #[cfg(feature = "xet")] #[tokio::test] #[ignore] async fn test_bucket_overwrite() { From 1064f9b4948d9cf785560063cb50f69119f0bda4 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 21 Feb 2026 16:37:15 +0100 Subject: [PATCH 23/25] refactor(hf): remove the option to disable xet in runtime --- core/services/hf/src/backend.rs | 50 +----------------------- core/services/hf/src/config.rs | 7 ---- core/services/hf/src/core.rs | 68 +++++++++++++++++---------------- core/services/hf/src/reader.rs | 34 +++++++---------- core/services/hf/src/writer.rs | 30 +++++---------- 5 files changed, 60 insertions(+), 129 deletions(-) diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 6065fb1e8083..14eda4b837e5 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -121,21 +121,6 @@ impl HfBuilder { self } - /// Enable XET storage protocol for reads. - /// - /// When enabled, reads will check for XET-backed files and use the - /// XET protocol for downloading. Default is disabled. - pub fn enable_xet(mut self) -> Self { - self.config.xet = true; - self - } - - /// Disable XET storage protocol for reads. - pub fn disable_xet(mut self) -> Self { - self.config.xet = false; - self - } - /// Set the maximum number of retries for commit operations. /// /// Retries on commit conflicts (HTTP 412) and transient server @@ -211,14 +196,13 @@ impl Builder for HfBuilder { debug!("backend max_retries: {}", max_retries); Ok(HfBackend { - core: Arc::new(HfCore::new( + core: Arc::new(HfCore::build( info, repo, root, token, endpoint, max_retries, - self.config.xet, )?), }) } @@ -318,23 +302,7 @@ pub(super) mod test_utils { finish_operator(op) } - pub fn testing_xet_operator() -> Operator { - let (repo_id, token) = testing_credentials(); - let op = Operator::new( - HfBuilder::default() - .repo_type("dataset") - .repo_id(&repo_id) - .token(&token) - .enable_xet() - .max_retries(10), - ) - .unwrap() - .finish(); - finish_operator(op) - } - /// Operator for a bucket requiring HF_OPENDAL_BUCKET and HF_OPENDAL_TOKEN. - /// Buckets always use XET for writes. pub fn testing_bucket_operator() -> Operator { let (repo_id, token) = testing_bucket_credentials(); let op = Operator::new( @@ -342,7 +310,6 @@ pub(super) mod test_utils { .repo_type("bucket") .repo_id(&repo_id) .token(&token) - .enable_xet() .max_retries(10), ) .unwrap() @@ -371,24 +338,11 @@ pub(super) mod test_utils { .finish(); finish_operator(op) } - - pub fn mbpp_xet_operator() -> Operator { - let mut builder = HfBuilder::default() - .repo_type("dataset") - .repo_id("google-research-datasets/mbpp") - .enable_xet(); - if let Ok(token) = std::env::var("HF_OPENDAL_TOKEN") { - builder = builder.token(&token); - } - let op = Operator::new(builder).unwrap().finish(); - finish_operator(op) - } } #[cfg(test)] mod tests { use super::test_utils::mbpp_operator; - use super::test_utils::mbpp_xet_operator; use super::*; #[test] @@ -462,7 +416,7 @@ mod tests { #[tokio::test] #[ignore = "requires network access"] async fn test_read_parquet_xet() { - let op = mbpp_xet_operator(); + let op = mbpp_operator(); let path = "full/train-00000-of-00001.parquet"; // Full read via XET and verify parquet magic at both ends diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index 951e9c1054a2..7f4e5ac05b8e 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -52,12 +52,6 @@ pub struct HfConfig { /// /// Default is "https://huggingface.co". pub endpoint: Option, - /// Enable XET storage protocol for reads. - /// - /// When true and the `xet` feature is compiled in, reads will - /// check for XET-backed files and use the XET protocol for - /// downloading. Default is false. - pub xet: bool, /// Maximum number of retries for commit operations. /// /// Retries on commit conflicts (HTTP 412) and transient server @@ -125,7 +119,6 @@ impl opendal_core::Configurator for HfConfig { root: opts.get("root").cloned(), token: opts.get("token").cloned(), endpoint: opts.get("endpoint").cloned(), - xet: opts.get("xet").is_some_and(|v| v == "true"), ..Default::default() }) } diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index 85a3c52abcba..efaa9328f8d7 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -221,11 +221,6 @@ pub struct HfCore { pub endpoint: String, pub max_retries: usize, - // Whether XET storage protocol is enabled for reads. When true, - // reads will check for XET-backed files and use the XET protocol - // for downloading. - pub xet_enabled: bool, - /// HTTP client with redirects disabled, used by XET probes to /// inspect headers on 302 responses. pub no_redirect_client: HttpClient, @@ -233,12 +228,11 @@ pub struct HfCore { impl Debug for HfCore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut s = f.debug_struct("HfCore"); - s.field("repo", &self.repo) + f.debug_struct("HfCore") + .field("repo", &self.repo) .field("root", &self.root) - .field("endpoint", &self.endpoint); - s.field("xet_enabled", &self.xet_enabled); - s.finish_non_exhaustive() + .field("endpoint", &self.endpoint) + .finish_non_exhaustive() } } @@ -250,33 +244,44 @@ impl HfCore { token: Option, endpoint: String, max_retries: usize, - xet_enabled: bool, + no_redirect_client: HttpClient, + ) -> Self { + Self { + info, + repo, + root, + token, + endpoint, + max_retries, + no_redirect_client, + } + } + + /// Build HfCore with dedicated reqwest HTTP clients. + /// + /// Uses separate clients for standard and no-redirect requests to + /// avoid "dispatch task is gone" errors with multiple tokio runtimes. + pub fn build( + info: Arc, + repo: HfRepo, + root: String, + token: Option, + endpoint: String, + max_retries: usize, ) -> Result { - // When xet is enabled at runtime, use dedicated reqwest clients instead - // of the global one. This avoids "dispatch task is gone" errors when - // multiple tokio runtimes exist (e.g. in tests) and ensures the - // no-redirect client shares the same runtime as the standard client. - // When xet is disabled, preserve whatever HTTP client is already set - // on `info` (important for mock-based unit tests). - let no_redirect_client = if xet_enabled { - let standard = HttpClient::with(build_reqwest(reqwest::redirect::Policy::default())?); - let no_redirect = HttpClient::with(build_reqwest(reqwest::redirect::Policy::none())?); - info.update_http_client(|_| standard); - no_redirect - } else { - info.http_client() - }; + let standard = HttpClient::with(build_reqwest(reqwest::redirect::Policy::default())?); + let no_redirect = HttpClient::with(build_reqwest(reqwest::redirect::Policy::none())?); + info.update_http_client(|_| standard); - Ok(Self { + Ok(Self::new( info, repo, root, token, endpoint, max_retries, - xet_enabled, - no_redirect_client, - }) + no_redirect, + )) } /// Build an authenticated HTTP request. @@ -620,9 +625,8 @@ pub(crate) mod test_utils { None, endpoint.to_string(), 3, - false, - ) - .unwrap(); + HttpClient::with(mock_client.clone()), + ); (core, mock_client) } diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index 735c80957a26..5817fd82ed03 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -38,26 +38,19 @@ pub enum HfReader { impl HfReader { /// Create a reader, automatically choosing between XET and HTTP. /// - /// Buckets always use XET. For other repo types, when XET is enabled - /// a HEAD request probes for the `X-Xet-Hash` header. Files stored on - /// XET are downloaded via the CAS protocol; all others fall back to HTTP GET. + /// Buckets always use XET. For other repo types, a HEAD request + /// probes for the `X-Xet-Hash` header. Files stored on XET are + /// downloaded via the CAS protocol; all others fall back to HTTP GET. pub async fn try_new(core: &HfCore, path: &str, range: BytesRange) -> Result { - if core.xet_enabled { - // Buckets always use XET - if core.repo.repo_type == RepoType::Bucket { - if let Some(xet_file) = core.maybe_xet_file(path).await? { - return Self::try_new_xet(core, &xet_file, range).await; - } - return Err(Error::new( - ErrorKind::Unexpected, - "bucket file is missing XET metadata", - )); - } + if let Some(xet_file) = core.maybe_xet_file(path).await? { + return Self::try_new_xet(core, &xet_file, range).await; + } - // For other repos, probe for XET - if let Some(xet_file) = core.maybe_xet_file(path).await? { - return Self::try_new_xet(core, &xet_file, range).await; - } + if core.repo.repo_type == RepoType::Bucket { + return Err(Error::new( + ErrorKind::Unexpected, + "bucket file is missing XET metadata", + )); } Self::try_new_http(core, path, range).await @@ -126,7 +119,6 @@ impl oio::Read for HfReader { #[cfg(test)] mod tests { - use super::super::backend::test_utils::mbpp_xet_operator; use super::super::backend::test_utils::{gpt2_operator, mbpp_operator}; /// Parquet magic bytes: "PAR1" @@ -155,7 +147,7 @@ mod tests { #[tokio::test] #[ignore = "requires network access"] async fn test_read_xet_parquet() { - let op = mbpp_xet_operator(); + let op = mbpp_operator(); let data = op .read("full/train-00000-of-00001.parquet") .await @@ -169,7 +161,7 @@ mod tests { #[tokio::test] #[ignore = "requires network access"] async fn test_read_xet_range() { - let op = mbpp_xet_operator(); + let op = mbpp_operator(); let data = op .read_with("full/train-00000-of-00001.parquet") .range(0..4) diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 8aef9f17f8d1..0afded654031 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -47,12 +47,6 @@ impl HfWriter { pub async fn try_new(core: Arc, path: String) -> Result { // Buckets always use XET and don't have a preupload endpoint if core.repo.repo_type == RepoType::Bucket { - if !core.xet_enabled { - return Err(Error::new( - ErrorKind::Unsupported, - "buckets require XET to be enabled", - )); - } let client = core.xet_client("write").await?; let writer = client.write(None).await.map_err(map_xet_error)?; return Ok(HfWriter::Xet { @@ -65,19 +59,13 @@ impl HfWriter { let mode_str = core.determine_upload_mode(&path).await?; if mode_str == "lfs" { - if core.xet_enabled { - let client = core.xet_client("write").await?; - let writer = client.write(None).await.map_err(map_xet_error)?; - return Ok(HfWriter::Xet { - core, - path, - writer: Mutex::new(writer), - }); - } - return Err(Error::new( - ErrorKind::Unsupported, - "file requires LFS; call enable_xet() on the builder for large file support", - )); + let client = core.xet_client("write").await?; + let writer = client.write(None).await.map_err(map_xet_error)?; + return Ok(HfWriter::Xet { + core, + path, + writer: Mutex::new(writer), + }); } Ok(HfWriter::Regular { @@ -191,8 +179,8 @@ impl oio::Write for HfWriter { #[cfg(test)] mod tests { + use super::super::backend::test_utils::testing_bucket_operator; use super::super::backend::test_utils::testing_operator; - use super::super::backend::test_utils::{testing_bucket_operator, testing_xet_operator}; use super::*; use base64::Engine; @@ -247,7 +235,7 @@ mod tests { #[tokio::test] #[ignore] async fn test_write_xet() { - let op = testing_xet_operator(); + let op = testing_operator(); op.write("test-xet.bin", b"Binary data for XET test".as_slice()) .await .expect("xet write should succeed"); From 839455ab2582ce448d0e73c5b27b95c12d51fb6c Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 21 Feb 2026 18:08:35 +0100 Subject: [PATCH 24/25] chore(hf): more explicit error handling --- core/services/hf/src/core.rs | 8 ++--- core/services/hf/src/reader.rs | 22 +++++++++++-- core/services/hf/src/writer.rs | 60 +++++++++++++++++----------------- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index efaa9328f8d7..fb35eaf9552e 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -382,7 +382,9 @@ impl HfCore { Some(refresher), "opendal/1.0".to_string(), ) - .map_err(map_xet_error) + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to create xet client").set_source(err) + }) } /// Issue a HEAD request and extract XET file info (hash and size). @@ -719,10 +721,6 @@ mod tests { } } -pub(super) fn map_xet_error(err: impl std::error::Error + Send + Sync + 'static) -> Error { - Error::new(ErrorKind::Unexpected, "xet operation failed").set_source(err) -} - fn build_reqwest(policy: reqwest::redirect::Policy) -> Result { reqwest::Client::builder() .redirect(policy) diff --git a/core/services/hf/src/reader.rs b/core/services/hf/src/reader.rs index 5817fd82ed03..e2be0af14d78 100644 --- a/core/services/hf/src/reader.rs +++ b/core/services/hf/src/reader.rs @@ -23,11 +23,12 @@ use futures::StreamExt; use subxet::cas_types::FileRange; use super::core::HfCore; -use super::core::map_xet_error; use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; +use subxet::cas_client::CasClientError; use subxet::data::XetFileInfo; +use subxet::data::errors::DataProcessingError; use subxet::data::streaming::XetReader; pub enum HfReader { @@ -99,7 +100,9 @@ impl HfReader { let reader = client .read(file_info.clone(), file_range, None, 256) - .map_err(map_xet_error)?; + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to create xet reader").set_source(err) + })?; Ok(Self::Xet(reader)) } } @@ -110,7 +113,20 @@ impl oio::Read for HfReader { Self::Http(body) => body.read().await, Self::Xet(stream) => match stream.next().await { Some(Ok(bytes)) => Ok(Buffer::from(bytes)), - Some(Err(e)) => Err(map_xet_error(e)), + Some(Err(e)) => { + let kind = match &e { + DataProcessingError::CasClientError( + CasClientError::FileNotFound(_) | CasClientError::XORBNotFound(_), + ) + | DataProcessingError::HashNotFound => ErrorKind::NotFound, + DataProcessingError::CasClientError(CasClientError::InvalidRange) => { + ErrorKind::RangeNotSatisfied + } + _ => ErrorKind::Unexpected, + }; + let err = Error::new(kind, "xet read error").set_source(e); + Err(err) + } None => Ok(Buffer::new()), }, } diff --git a/core/services/hf/src/writer.rs b/core/services/hf/src/writer.rs index 0afded654031..ec8d7c816f9f 100644 --- a/core/services/hf/src/writer.rs +++ b/core/services/hf/src/writer.rs @@ -20,10 +20,11 @@ use std::sync::Mutex; use base64::Engine; -use super::core::{BucketOperation, CommitFile, HfCore, LfsFile, map_xet_error}; +use super::core::{BucketOperation, CommitFile, HfCore, LfsFile}; use super::uri::RepoType; use opendal_core::raw::*; use opendal_core::*; +use subxet::data::errors::DataProcessingError; use subxet::data::streaming::XetWriter; /// Writer that handles both regular (small) and XET (large) file uploads. @@ -45,34 +46,33 @@ pub enum HfWriter { impl HfWriter { /// Create a new writer by determining the upload mode from the API. pub async fn try_new(core: Arc, path: String) -> Result { - // Buckets always use XET and don't have a preupload endpoint - if core.repo.repo_type == RepoType::Bucket { + // Buckets always use XET and don't have a preupload endpoint; + // other repo types check the preupload API. + let use_xet = core.repo.repo_type == RepoType::Bucket + || core.determine_upload_mode(&path).await? == "lfs"; + + let writer = if use_xet { let client = core.xet_client("write").await?; - let writer = client.write(None).await.map_err(map_xet_error)?; - return Ok(HfWriter::Xet { + let writer = client.write(None).await.map_err(|err| { + let kind = match &err { + DataProcessingError::AuthError(_) => ErrorKind::PermissionDenied, + _ => ErrorKind::Unexpected, + }; + Error::new(kind, "failed to create xet writer").set_source(err) + })?; + HfWriter::Xet { core, path, writer: Mutex::new(writer), - }); - } - - let mode_str = core.determine_upload_mode(&path).await?; - - if mode_str == "lfs" { - let client = core.xet_client("write").await?; - let writer = client.write(None).await.map_err(map_xet_error)?; - return Ok(HfWriter::Xet { + } + } else { + HfWriter::Regular { core, path, - writer: Mutex::new(writer), - }); - } - - Ok(HfWriter::Regular { - core, - path, - buf: Vec::new(), - }) + buf: Vec::new(), + } + }; + Ok(writer) } fn prepare_commit_file(path: &str, body: &[u8]) -> CommitFile { @@ -97,7 +97,10 @@ impl oio::Write for HfWriter { .unwrap() .write(bs.to_bytes()) .await - .map_err(map_xet_error), + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to write chunk to xet stream") + .set_source(err) + }), } } @@ -122,12 +125,9 @@ impl oio::Write for HfWriter { Ok(meta) } HfWriter::Xet { core, path, writer } => { - let file_info = writer - .get_mut() - .unwrap() - .close() - .await - .map_err(map_xet_error)?; + let file_info = writer.get_mut().unwrap().close().await.map_err(|err| { + Error::new(ErrorKind::Unexpected, "failed to close xet writer").set_source(err) + })?; let meta = Metadata::default().with_content_length(file_info.file_size()); From 21368c50f9b39dc39086aa4446d25e735b3ce037 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 21 Feb 2026 20:56:30 +0100 Subject: [PATCH 25/25] chore(hf): remove redundant retry logic --- core/Cargo.lock | 1 - core/services/hf/Cargo.toml | 5 ++- core/services/hf/src/backend.rs | 28 ++------------ core/services/hf/src/config.rs | 5 --- core/services/hf/src/core.rs | 65 ++++++--------------------------- 5 files changed, 18 insertions(+), 86 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 80128e79dbd8..2e98a70a6567 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -7058,7 +7058,6 @@ name = "opendal-service-hf" version = "0.55.0" dependencies = [ "async-trait", - "backon", "base64 0.22.1", "bytes", "futures", diff --git a/core/services/hf/Cargo.toml b/core/services/hf/Cargo.toml index d95bd19a3291..b47159839dff 100644 --- a/core/services/hf/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -32,7 +32,6 @@ all-features = true [dependencies] async-trait = "0.1" -backon = "1.6" base64 = { workspace = true } bytes = { workspace = true } futures = { workspace = true } @@ -40,7 +39,9 @@ http = { workspace = true } log = { workspace = true } opendal-core = { path = "../../core", version = "0.55.0", default-features = false } percent-encoding = "2" -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = [ + "rustls-tls", +] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } subxet = { git = "https://github.com/kszucs/subxet" } diff --git a/core/services/hf/src/backend.rs b/core/services/hf/src/backend.rs index 14eda4b837e5..2dd41b472555 100644 --- a/core/services/hf/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -120,15 +120,6 @@ impl HfBuilder { } self } - - /// Set the maximum number of retries for commit operations. - /// - /// Retries on commit conflicts (HTTP 412) and transient server - /// errors (HTTP 5xx). Default is 3. - pub fn max_retries(mut self, max_retries: usize) -> Self { - self.config.max_retries = Some(max_retries); - self - } } impl Builder for HfBuilder { @@ -192,18 +183,8 @@ impl Builder for HfBuilder { let repo = HfRepo::new(repo_type, repo_id, Some(revision.clone())); debug!("backend repo uri: {:?}", repo.uri(&root, "")); - let max_retries = self.config.max_retries.unwrap_or(3); - debug!("backend max_retries: {}", max_retries); - Ok(HfBackend { - core: Arc::new(HfCore::build( - info, - repo, - root, - token, - endpoint, - max_retries, - )?), + core: Arc::new(HfCore::build(info, repo, root, token, endpoint)?), }) } } @@ -287,15 +268,13 @@ pub(super) mod test_utils { } /// Operator for a private dataset requiring HF_OPENDAL_DATASET and HF_OPENDAL_TOKEN. - /// Uses higher max_retries to tolerate concurrent commit conflicts (412). pub fn testing_operator() -> Operator { let (repo_id, token) = testing_credentials(); let op = Operator::new( HfBuilder::default() .repo_type("dataset") .repo_id(&repo_id) - .token(&token) - .max_retries(10), + .token(&token), ) .unwrap() .finish(); @@ -309,8 +288,7 @@ pub(super) mod test_utils { HfBuilder::default() .repo_type("bucket") .repo_id(&repo_id) - .token(&token) - .max_retries(10), + .token(&token), ) .unwrap() .finish(); diff --git a/core/services/hf/src/config.rs b/core/services/hf/src/config.rs index 7f4e5ac05b8e..8874ea55cfa1 100644 --- a/core/services/hf/src/config.rs +++ b/core/services/hf/src/config.rs @@ -52,11 +52,6 @@ pub struct HfConfig { /// /// Default is "https://huggingface.co". pub endpoint: Option, - /// Maximum number of retries for commit operations. - /// - /// Retries on commit conflicts (HTTP 412) and transient server - /// errors (HTTP 5xx). Default is 3. - pub max_retries: Option, } impl Debug for HfConfig { diff --git a/core/services/hf/src/core.rs b/core/services/hf/src/core.rs index fb35eaf9552e..6b139bf839e2 100644 --- a/core/services/hf/src/core.rs +++ b/core/services/hf/src/core.rs @@ -18,8 +18,6 @@ use std::fmt::Debug; use std::sync::Arc; -use backon::ExponentialBuilder; -use backon::Retryable; use bytes::Buf; use bytes::Bytes; use http::Request; @@ -219,7 +217,6 @@ pub struct HfCore { pub root: String, pub token: Option, pub endpoint: String, - pub max_retries: usize, /// HTTP client with redirects disabled, used by XET probes to /// inspect headers on 302 responses. @@ -243,7 +240,6 @@ impl HfCore { root: String, token: Option, endpoint: String, - max_retries: usize, no_redirect_client: HttpClient, ) -> Self { Self { @@ -252,7 +248,6 @@ impl HfCore { root, token, endpoint, - max_retries, no_redirect_client, } } @@ -267,21 +262,12 @@ impl HfCore { root: String, token: Option, endpoint: String, - max_retries: usize, ) -> Result { let standard = HttpClient::with(build_reqwest(reqwest::redirect::Policy::default())?); let no_redirect = HttpClient::with(build_reqwest(reqwest::redirect::Policy::none())?); info.update_http_client(|_| standard); - Ok(Self::new( - info, - repo, - root, - token, - endpoint, - max_retries, - no_redirect, - )) + Ok(Self::new(info, repo, root, token, endpoint, no_redirect)) } /// Build an authenticated HTTP request. @@ -304,30 +290,14 @@ impl HfCore { self.repo.uri(&self.root, path) } - /// Send a request with retries, returning the successful response. - /// - /// Retries on commit conflicts (HTTP 412) and transient server errors - /// (HTTP 5xx) up to `self.max_retries` attempts with exponential backoff. + /// Send a request and return the successful response or a parsed error. pub(super) async fn send(&self, req: Request) -> Result> { - let backoff = ExponentialBuilder::default() - .with_min_delay(std::time::Duration::from_millis(200)) - .with_max_delay(std::time::Duration::from_millis(6400)) - .with_max_times(self.max_retries.saturating_sub(1)); - let client = self.info.http_client(); - - let send_once = || async { - let resp = client.send(req.clone()).await?; - if resp.status().is_success() { - Ok(resp) - } else { - Err(parse_error(resp)) - } - }; - - send_once - .retry(backoff) - .when(|e: &Error| e.kind() == ErrorKind::ConditionNotMatch || e.is_temporary()) - .await + let resp = self.info.http_client().send(req).await?; + if resp.status().is_success() { + Ok(resp) + } else { + Err(parse_error(resp)) + } } /// Send a request, check for success, and deserialize the JSON response. @@ -402,16 +372,11 @@ impl HfCore { .body(Buffer::new()) .map_err(new_request_build_error)?; - let mut attempt = 0; - let resp = loop { - let resp = self.no_redirect_client.send(req.clone()).await?; + let resp = self.no_redirect_client.send(req).await?; - attempt += 1; - let retryable = resp.status().is_server_error(); - if attempt >= self.max_retries || !retryable { - break resp; - } - }; + if resp.status().is_client_error() || resp.status().is_server_error() { + return Err(parse_error(resp)); + } let hash = resp .headers() @@ -434,11 +399,6 @@ impl HfCore { Ok(Some(XetFileInfo::new(hash.to_string(), size))) } - /// Commit file changes (uploads and/or deletions) to the repository. - /// - /// Retries on commit conflicts (HTTP 412) and transient server errors - /// (HTTP 5xx), matching the behavior of the official HuggingFace Hub - /// client. /// Determine upload mode by calling the preupload API. /// /// Returns the upload mode string from the API (e.g., "regular" or "lfs"). @@ -626,7 +586,6 @@ pub(crate) mod test_utils { "/".to_string(), None, endpoint.to_string(), - 3, HttpClient::with(mock_client.clone()), );