From b59704fce4b3ddf7f589e12b22c5edeb8f83495e Mon Sep 17 00:00:00 2001 From: am2rican5 Date: Tue, 23 Dec 2025 14:13:27 +0900 Subject: [PATCH 1/3] feat: add parse subcommand to parse URLs into components Add URL parsing feature that extracts scheme, host, port, path, query, fragment, username, and password from URLs. Query parameters are also parsed into key-value pairs. --- Cargo.lock | 257 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/tools/url.rs | 145 ++++++++++++++++++++++++-- 3 files changed, 395 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 020711d..06356c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -821,6 +821,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "edit" version = "0.1.5" @@ -1262,12 +1273,114 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "image" version = "0.25.8" @@ -1502,6 +1615,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.14" @@ -1920,6 +2039,15 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2627,6 +2755,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "static_assertions" version = "1.1.0" @@ -2695,6 +2829,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "system-deps" version = "6.2.2" @@ -2850,6 +2995,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.10.0" @@ -3162,6 +3317,18 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + [[package]] name = "urlencoding" version = "2.1.3" @@ -3214,6 +3381,7 @@ dependencies = [ "tracing-subscriber", "tui-textarea", "ulid", + "url", "urlencoding", "uuid", ] @@ -3224,6 +3392,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -3645,6 +3819,12 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + [[package]] name = "wyz" version = "0.5.1" @@ -3654,6 +3834,29 @@ dependencies = [ "tap", ] +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.27" @@ -3674,12 +3877,66 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "zune-core" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index 69053c6..63c10a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ serde_json = "1.0.145" sha2 = "0.10" sha1 = "0.10" md-5 = "0.10" +url = "2" urlencoding = "2.1.3" uuid = { version = "1.11.0", features = ["v1", "v3", "v4", "v5", "v7"] } ulid = { version = "1.1", features = ["serde", "uuid"] } diff --git a/src/tools/url.rs b/src/tools/url.rs index 59f1185..fa85d48 100644 --- a/src/tools/url.rs +++ b/src/tools/url.rs @@ -2,6 +2,7 @@ use crate::args::StringInput; use crate::tool::{Output, Tool}; use anyhow::Context; use clap::{Command, CommandFactory, Parser, Subcommand}; +use url::Url; #[derive(Parser, Debug)] #[command(name = "url", about = "URL encode and decode utilities")] @@ -22,6 +23,11 @@ enum UrlCommand { /// Text to URL decode (use "-" for stdin) text: StringInput, }, + /// Parse URL into its components + Parse { + /// URL to parse (use "-" for stdin) + url: StringInput, + }, } impl Tool for UrlTool { @@ -30,14 +36,41 @@ impl Tool for UrlTool { } fn execute(&self) -> anyhow::Result> { - let result = match &self.command { - UrlCommand::Encode { text } => urlencoding::encode(text.as_ref()).into_owned(), - UrlCommand::Decode { text } => urlencoding::decode(text.as_ref()) - .context("Could not decode")? - .into_owned(), - }; - - Ok(Some(Output::JsonValue(serde_json::json!(result)))) + match &self.command { + UrlCommand::Encode { text } => { + let result = urlencoding::encode(text.as_ref()).into_owned(); + Ok(Some(Output::JsonValue(serde_json::json!(result)))) + } + UrlCommand::Decode { text } => { + let result = urlencoding::decode(text.as_ref()) + .context("Could not decode")? + .into_owned(); + Ok(Some(Output::JsonValue(serde_json::json!(result)))) + } + UrlCommand::Parse { url } => { + let parsed = Url::parse(url.as_ref()).context("Could not parse URL")?; + + // Build query params as a JSON object + let query_params: serde_json::Map = parsed + .query_pairs() + .map(|(k, v)| (k.into_owned(), serde_json::json!(v))) + .collect(); + + let result = serde_json::json!({ + "scheme": parsed.scheme(), + "host": parsed.host_str(), + "port": parsed.port_or_known_default(), + "path": parsed.path(), + "query": parsed.query(), + "query_params": query_params, + "fragment": parsed.fragment(), + "username": parsed.username(), + "password": parsed.password(), + }); + + Ok(Some(Output::JsonValue(result))) + } + } } } @@ -243,4 +276,100 @@ mod tests { }; assert_eq!(val.as_str().unwrap(), original); } + + #[test] + fn test_parse_basic_url() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("https://example.com/path".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + assert_eq!(val["scheme"], "https"); + assert_eq!(val["host"], "example.com"); + assert_eq!(val["port"], 443); + assert_eq!(val["path"], "/path"); + assert!(val["query"].is_null()); + assert!(val["fragment"].is_null()); + } + + #[test] + fn test_parse_url_with_query_params() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("https://example.com/search?key1=value1&key2=value2".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + assert_eq!(val["query"], "key1=value1&key2=value2"); + assert_eq!(val["query_params"]["key1"], "value1"); + assert_eq!(val["query_params"]["key2"], "value2"); + } + + #[test] + fn test_parse_url_with_fragment() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("https://example.com/page#section".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + assert_eq!(val["fragment"], "section"); + } + + #[test] + fn test_parse_url_with_credentials() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("https://user:pass@example.com/".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + assert_eq!(val["username"], "user"); + assert_eq!(val["password"], "pass"); + } + + #[test] + fn test_parse_url_with_port() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("http://localhost:8080/api".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + assert_eq!(val["host"], "localhost"); + assert_eq!(val["port"], 8080); + assert_eq!(val["scheme"], "http"); + } + + #[test] + fn test_parse_invalid_url() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("not-a-valid-url".to_string()), + }, + }; + let result = tool.execute(); + assert!(result.is_err()); + } } From 3c94194c5f63e72c1b6c30d965c1e54878f143e7 Mon Sep 17 00:00:00 2001 From: am2rican5 Date: Tue, 23 Dec 2025 14:28:09 +0900 Subject: [PATCH 2/3] fix: handle duplicate query params by grouping values When a URL has duplicate query parameter keys like `?a=1&a=2&b=3`, values are now grouped by key: - Single value: returns string `"b": "3"` - Multiple values: returns array `"a": ["1", "2"]` --- src/tools/url.rs | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/src/tools/url.rs b/src/tools/url.rs index fa85d48..bd1d585 100644 --- a/src/tools/url.rs +++ b/src/tools/url.rs @@ -50,10 +50,27 @@ impl Tool for UrlTool { UrlCommand::Parse { url } => { let parsed = Url::parse(url.as_ref()).context("Could not parse URL")?; - // Build query params as a JSON object - let query_params: serde_json::Map = parsed - .query_pairs() - .map(|(k, v)| (k.into_owned(), serde_json::json!(v))) + // Group query params by key to handle duplicates + let mut grouped: std::collections::HashMap> = + std::collections::HashMap::new(); + for (k, v) in parsed.query_pairs() { + grouped + .entry(k.into_owned()) + .or_default() + .push(v.into_owned()); + } + + // Convert to JSON: single value → string, multiple values → array + let query_params: serde_json::Map = grouped + .into_iter() + .map(|(k, v)| { + let value = if v.len() == 1 { + serde_json::json!(v.into_iter().next().unwrap()) + } else { + serde_json::json!(v) + }; + (k, value) + }) .collect(); let result = serde_json::json!({ @@ -314,6 +331,24 @@ mod tests { assert_eq!(val["query_params"]["key2"], "value2"); } + #[test] + fn test_parse_url_with_duplicate_query_params() { + let tool = UrlTool { + command: UrlCommand::Parse { + url: StringInput("https://example.com/search?a=1&a=2&b=3".to_string()), + }, + }; + let result = tool.execute().unwrap().unwrap(); + + let Output::JsonValue(val) = result else { + unreachable!() + }; + // Duplicate key 'a' should be an array + assert_eq!(val["query_params"]["a"], serde_json::json!(["1", "2"])); + // Single key 'b' should be a string + assert_eq!(val["query_params"]["b"], "3"); + } + #[test] fn test_parse_url_with_fragment() { let tool = UrlTool { From b78b0b8050cea4cd9bd39e6da6a8d81607685acb Mon Sep 17 00:00:00 2001 From: am2rican5 Date: Tue, 23 Dec 2025 14:32:45 +0900 Subject: [PATCH 3/3] docs: add url parse subcommand to README --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4c159f3..31ab11b 100644 --- a/README.md +++ b/README.md @@ -109,9 +109,10 @@ After setting up completions, restart your shell or source your configuration fi │ ├── base64 - Base64 encode/decode │ │ ├── encode │ │ └── decode -│ └── url - URL encode/decode +│ └── url - URL encode/decode/parse │ ├── encode -│ └── decode +│ ├── decode +│ └── parse ├── Hashing & Security │ ├── hash - Cryptographic hash digests │ │ ├── md5 @@ -185,11 +186,12 @@ echo -n "hello world" | ut base64 encode - ``` #### `url` -URL encode and decode text. +URL encode, decode, and parse URLs. ```bash ut url encode "hello world" ut url decode "hello%20world" +ut url parse "https://example.com:8080/path?key=value#section" printf "hello world" | ut url encode - ```