diff --git a/doc/modules/ROOT/examples/unit/snippets.cpp b/doc/modules/ROOT/examples/unit/snippets.cpp index 2c3f8fff5..bac96b82a 100644 --- a/doc/modules/ROOT/examples/unit/snippets.cpp +++ b/doc/modules/ROOT/examples/unit/snippets.cpp @@ -1782,6 +1782,40 @@ encoding() } } +void +decoding_helpers() +{ + { + // tag::snippet_decoding_helpers_1[] + boost::core::string_view encoded = "name%3Dboost+url"; + encoding_opts opt; + opt.space_as_plus = true; + + auto const needed = decoded_size(encoded).value(); + std::string buffer; + buffer.resize(needed); + auto const written = decode(&buffer[0], buffer.size(), encoded, opt).value(); + buffer.resize(written); + + assert(buffer == "name=boost url"); + // end::snippet_decoding_helpers_1[] + } + + { + // tag::snippet_decoding_helpers_2[] + encoding_opts opt; + opt.space_as_plus = true; + + auto plain = decode(boost::core::string_view("city%3DSan+Jose"), opt).value(); + assert(plain == "city=San Jose"); + + std::string scratch = "prefix:"; + decode(boost::core::string_view("value%2F42"), {}, string_token::append_to(scratch)).value(); + assert(scratch == "prefix:value/42"); + // end::snippet_decoding_helpers_2[] + } +} + void readme_snippets() { @@ -1845,6 +1879,7 @@ class snippets_test normalizing(); decode_with_token(); encoding(); + decoding_helpers(); ignore_unused(&readme_snippets); BOOST_TEST_PASS(); @@ -1854,4 +1889,4 @@ class snippets_test TEST_SUITE(snippets_test, "boost.url.snippets"); } // urls -} // boost \ No newline at end of file +} // boost diff --git a/doc/modules/ROOT/pages/reference.adoc b/doc/modules/ROOT/pages/reference.adoc index bf01ecdd2..185548df4 100644 --- a/doc/modules/ROOT/pages/reference.adoc +++ b/doc/modules/ROOT/pages/reference.adoc @@ -118,6 +118,10 @@ cpp:boost::urls::encode[encode] cpp:boost::urls::encoded_size[encoded_size] +cpp:boost::urls::decode[decode] + +cpp:boost::urls::decoded_size[decoded_size] + cpp:boost::urls::make_pct_string_view[make_pct_string_view] **Types** diff --git a/doc/modules/ROOT/pages/urls/percent-encoding.adoc b/doc/modules/ROOT/pages/urls/percent-encoding.adoc index 8f8ac6263..de66d3398 100644 --- a/doc/modules/ROOT/pages/urls/percent-encoding.adoc +++ b/doc/modules/ROOT/pages/urls/percent-encoding.adoc @@ -145,6 +145,7 @@ include::example$unit/snippets.cpp[tag=snippet_encoding_13,indent=0] The member function cpp:pct_string_view::decode[] can be used to decode the data into a buffer. + Like the free-function cpp:encode[], decoding options and the string token can be customized. @@ -154,6 +155,26 @@ cpp:encode[], decoding options and the string token can be customized. include::example$unit/snippets.cpp[tag=snippet_encoding_14,indent=0] ---- +When you need to decode an ad-hoc string, the cpp:decode[] free functions mirror the +cpp:encode[] API so you do not need to construct a cpp:pct_string_view[] or a +cpp:decode_view[]. The cpp:decoded_size[] function reports the exact number of bytes required to store the decoded text, +and cpp:decode[] writes into caller-provided buffers. + +// snippet_decoding_helpers_1 +[source,cpp] +---- +include::example$unit/snippets.cpp[tag=snippet_decoding_helpers_1,indent=0] +---- +All overloads take a `core::string_view` parameter and return a +`system::result`, so the functions always validate the input and surface any +percent-encoding errors without throwing. +Just like their encoding counterparts, the string-token overloads let you +reuse storage or append to an existing container without intermediate allocations. +// snippet_decoding_helpers_2 +[source,cpp] +---- +include::example$unit/snippets.cpp[tag=snippet_decoding_helpers_2,indent=0] +---- diff --git a/include/boost/url.hpp b/include/boost/url.hpp index 31a828ce0..5d0e19843 100644 --- a/include/boost/url.hpp +++ b/include/boost/url.hpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/include/boost/url/decode.hpp b/include/boost/url/decode.hpp new file mode 100644 index 000000000..19e19d991 --- /dev/null +++ b/include/boost/url/decode.hpp @@ -0,0 +1,158 @@ +// +// Copyright (c) 2025 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + +#ifndef BOOST_URL_DECODE_HPP +#define BOOST_URL_DECODE_HPP + +#include +#include +#include +#include +#include + +namespace boost { +namespace urls { + +/** Return the buffer size needed for percent-decoding + + This function returns the exact number of bytes needed + to store the decoded form of the specified string using + the given options. The string is validated before the + size is computed; malformed escapes cause the returned + result to contain an error instead. + + @par Example + @code + auto n = decoded_size( "My%20Stuff" ); + assert( n && *n == 8 ); + @endcode + + @par Exception Safety + Throws nothing. Validation errors are reported in the + returned result. + + @return A result containing the decoded size, excluding + any null terminator. + + @param s The string to measure. + + @par Specification + @li 2.1. Percent-Encoding (rfc3986) + + @see + @ref decode, + @ref encoding_opts, + @ref make_pct_string_view. +*/ +system::result +decoded_size(core::string_view s) noexcept; + +/** Apply percent-decoding to an arbitrary string + + This function percent-decodes the specified string into + the destination buffer provided by the caller. The input + is validated first; malformed escapes cause the returned + result to hold an error instead of a size. If the buffer + is too small, the output is truncated and the number of + bytes actually written is returned. + + @par Example + @code + char buf[100]; + auto n = decode( buf, sizeof(buf), "Program%20Files" ); + assert( n && *n == 13 ); + @endcode + + @par Exception Safety + Throws nothing. Validation errors are reported in the + returned result. + + @return The number of characters written to the + destination buffer, or an error. + + @param dest The destination buffer to write to. + + @param size The number of writable characters pointed + to by `dest`. If this is less than the decoded size, the + result is truncated. + + @param s The string to decode. + + @param opt The decoding options. If omitted, the + default options are used. + + @par Specification + @li 2.1. Percent-Encoding (rfc3986) + + @see + @ref decoded_size, + @ref encoding_opts, + @ref make_pct_string_view. +*/ +system::result +decode( + char* dest, + std::size_t size, + core::string_view s, + encoding_opts opt = {}) noexcept; + +//------------------------------------------------ + +/** Return a percent-decoded string + + This function percent-decodes the specified string and + returns the result using any @ref string_token. The + string is validated before decoding; malformed escapes + cause the returned result to hold an error. + + @par Example + @code + auto plain = decode( "My%20Stuff" ); + assert( plain && *plain == "My Stuff" ); + @endcode + + @par Exception Safety + Calls to allocate may throw. Validation errors are + reported in the returned result. + + @return A result containing the decoded string in the + format described by the passed string token. + + @param s The string to decode. + + @param opt The decoding options. If omitted, the + default options are used. + + @param token A string token. + + @par Specification + @li 2.1. Percent-Encoding (rfc3986) + + @see + @ref decode, + @ref decoded_size, + @ref encoding_opts, + @ref string_token::return_string. +*/ +template +system::result +decode( + core::string_view s, + encoding_opts opt = {}, + StringToken&& token = {}) noexcept; + +} // urls +} // boost + +#include + +#endif diff --git a/src/detail/decode.hpp b/include/boost/url/detail/decode.hpp similarity index 59% rename from src/detail/decode.hpp rename to include/boost/url/detail/decode.hpp index 053dd067f..848906af7 100644 --- a/src/detail/decode.hpp +++ b/include/boost/url/detail/decode.hpp @@ -11,7 +11,7 @@ #ifndef BOOST_URL_DETAIL_DECODE_HPP #define BOOST_URL_DETAIL_DECODE_HPP -#include "boost/url/encoding_opts.hpp" +#include #include #include @@ -19,16 +19,24 @@ namespace boost { namespace urls { namespace detail { +// Reads two hex digits without checking bounds or validity; invalid input or +// missing digits produces garbage and may touch bytes past the buffer. BOOST_URL_DECL char decode_one( char const* it) noexcept; +// Counts decoded bytes assuming the caller already validated escapes; a stray +// '%' still makes it skip three characters, so the reported size can be too +// small and lead to overflow when decoding. BOOST_URL_DECL std::size_t decode_bytes_unsafe( core::string_view s) noexcept; +// Writes decoded bytes trusting the buffer is large enough and escapes are +// complete; a short buffer stops decoding early, and a malformed escape zeros +// the remaining space before returning. BOOST_URL_DECL std::size_t decode_unsafe( diff --git a/include/boost/url/impl/decode.hpp b/include/boost/url/impl/decode.hpp new file mode 100644 index 000000000..a24a60387 --- /dev/null +++ b/include/boost/url/impl/decode.hpp @@ -0,0 +1,83 @@ +// +// Copyright (c) 2025 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + +#ifndef BOOST_URL_IMPL_DECODE_HPP +#define BOOST_URL_IMPL_DECODE_HPP + +#include +#include +#include +#include +#include + +namespace boost { +namespace urls { + +inline +system::result +decoded_size(core::string_view s) noexcept +{ + auto const rv = make_pct_string_view(s); + if(! rv) + return rv.error(); + return rv->decoded_size(); +} + +inline +system::result +decode( + char* dest, + std::size_t size, + core::string_view s, + encoding_opts opt) noexcept +{ + auto const rv = make_pct_string_view(s); + if(! rv) + return rv.error(); + return detail::decode_unsafe( + dest, + dest + size, + detail::to_sv(rv.value()), + opt); +} + +template< + BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken> +system::result +decode( + core::string_view s, + encoding_opts opt, + StringToken&& token) noexcept +{ + static_assert( + string_token::is_token< + StringToken>::value, + "Type requirements not met"); + + auto const rv = make_pct_string_view(s); + if(! rv) + return rv.error(); + + auto const n = rv->decoded_size(); + auto p = token.prepare(n); + // Some tokens might hand back a null/invalid buffer for n == 0, so skip the + // decode call entirely in that case to avoid touching unspecified memory. + if(n > 0) + detail::decode_unsafe( + p, + p + n, + detail::to_sv(rv.value()), + opt); + return token.result(); +} + +} // urls +} // boost + +#endif diff --git a/src/detail/decode.cpp b/src/detail/decode.cpp index d62f76add..6b82af388 100644 --- a/src/detail/decode.cpp +++ b/src/detail/decode.cpp @@ -9,7 +9,7 @@ #include -#include "decode.hpp" +#include #include #include #include @@ -150,4 +150,3 @@ decode_unsafe( } // detail } // urls } // boost - diff --git a/src/detail/normalize.cpp b/src/detail/normalize.cpp index 04212d079..e4ed4fefc 100644 --- a/src/detail/normalize.cpp +++ b/src/detail/normalize.cpp @@ -11,7 +11,7 @@ #include #include -#include "decode.hpp" +#include #include #include #include @@ -961,4 +961,3 @@ segments_compare( } // urls } // boost - diff --git a/src/detail/segments_iter_impl.cpp b/src/detail/segments_iter_impl.cpp index f7fd2376e..af5c31536 100644 --- a/src/detail/segments_iter_impl.cpp +++ b/src/detail/segments_iter_impl.cpp @@ -11,7 +11,7 @@ #include #include "path.hpp" -#include "decode.hpp" +#include #include #include "boost/url/rfc/detail/path_rules.hpp" #include diff --git a/src/pct_string_view.cpp b/src/pct_string_view.cpp index 394a3f9b0..4b2fb7b49 100644 --- a/src/pct_string_view.cpp +++ b/src/pct_string_view.cpp @@ -11,7 +11,7 @@ #include #include #include -#include "detail/decode.hpp" +#include #include #include @@ -91,4 +91,3 @@ make_pct_string_view( } // urls } // boost - diff --git a/src/url_base.cpp b/src/url_base.cpp index 8e66adc6e..149bcb7c0 100644 --- a/src/url_base.cpp +++ b/src/url_base.cpp @@ -18,7 +18,7 @@ #include #include #include -#include "detail/decode.hpp" +#include #include #include #include "detail/normalize.hpp" @@ -3013,4 +3013,3 @@ to_lower_impl(int id) noexcept } // urls } // boost - diff --git a/test/unit/Jamfile b/test/unit/Jamfile index 9c0c97027..09424e4ec 100644 --- a/test/unit/Jamfile +++ b/test/unit/Jamfile @@ -24,6 +24,7 @@ local SOURCES = authority_view.cpp error.cpp error_types.cpp + decode.cpp encode.cpp encoding_opts.cpp decode_view.cpp diff --git a/test/unit/decode.cpp b/test/unit/decode.cpp new file mode 100644 index 000000000..45e3ee9eb --- /dev/null +++ b/test/unit/decode.cpp @@ -0,0 +1,192 @@ +// +// Copyright (c) 2025 Alan de Freitas (alandefreitas@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/url +// + +// Test that header file is self-contained. +#include + +#include +#include + +#include "test_suite.hpp" + +namespace boost { +namespace urls { + +class decode_test +{ +public: + void + testDecodedSize() + { + // validated percent-encoding + { + auto const r = decoded_size("Hello%20World"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, 11); + } + + // fully encoded input + { + auto const r = decoded_size("alpha%20beta"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, 10); + } + + // malformed escape + { + auto const r = decoded_size("bad%2"); + BOOST_TEST(r.error() == error::incomplete_encoding); + } + } + + void + testDecodeBuffer() + { + // full buffer + { + core::string_view const encoded = "Program%20Files"; + char buf[32] = {}; + auto const r = decode(buf, sizeof(buf), encoded); + BOOST_TEST(r); + if(r) + { + BOOST_TEST_EQ(*r, 13); + BOOST_TEST_EQ(core::string_view(buf, *r), "Program Files"); + } + } + + // truncated buffer + { + char small[4] = {}; + auto const r = decode(small, sizeof(small), "Program%20Files"); + BOOST_TEST(r); + if(r) + { + BOOST_TEST_EQ(*r, sizeof(small)); + BOOST_TEST_EQ(core::string_view(small, *r), "Prog"); + } + } + + // plus handling + { + encoding_opts opt; + opt.space_as_plus = true; + char plus_buf[8] = {}; + auto const r = decode(plus_buf, sizeof(plus_buf), "a+b", opt); + BOOST_TEST(r); + if(r) + { + BOOST_TEST_EQ(*r, 3); + BOOST_TEST_EQ(core::string_view(plus_buf, *r), "a b"); + } + } + + // plain percent sequence + { + char checked[16] = {}; + auto const r = decode(checked, sizeof(checked), "ready%21"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(core::string_view(checked, *r), "ready!"); + } + + // incomplete escape + { + char checked[16] = {}; + auto const r = decode(checked, sizeof(checked), "oops%2"); + BOOST_TEST(r.error() == error::incomplete_encoding); + } + } + + void + testDecodeTokens() + { + // default token + { + auto const r = decode("user%3Dboost"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, "user=boost"); + } + + // token plus handling + { + encoding_opts opt; + opt.space_as_plus = true; + auto const r = decode("a+b", opt); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, "a b"); + } + + // explicit std::string token + { + auto r = decode("plan%3Dgold"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, "plan=gold"); + } + + // token error + { + auto r = decode("bad%X"); + BOOST_TEST(! r); + if(! r) + BOOST_TEST(r.error() == error::incomplete_encoding); + } + } + + void + testDocExamples() + { + // docs decoded_size example + { + auto const r = decoded_size("My%20Stuff"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, 8); + } + + // docs buffer example + { + char buf[100]; + auto const r = decode(buf, sizeof(buf), "Program%20Files"); + BOOST_TEST(r); + if(r) + { + BOOST_TEST_EQ(*r, 13); + BOOST_TEST_EQ(core::string_view(buf, *r), "Program Files"); + } + } + + // docs token example + { + auto const r = decode("My%20Stuff"); + BOOST_TEST(r); + if(r) + BOOST_TEST_EQ(*r, "My Stuff"); + } + } + + void + run() + { + testDecodedSize(); + testDecodeBuffer(); + testDecodeTokens(); + testDocExamples(); + } +}; + +TEST_SUITE(decode_test, "boost.url.decode"); + +} // urls +} // boost diff --git a/test/unit/snippets.cpp b/test/unit/snippets.cpp index 4bf70fb63..b4b269f51 100644 --- a/test/unit/snippets.cpp +++ b/test/unit/snippets.cpp @@ -1782,6 +1782,40 @@ encoding() } } +void +decoding_helpers() +{ + { + //[snippet_decoding_helpers_1 + boost::core::string_view encoded = "name%3Dboost+url"; + encoding_opts opt; + opt.space_as_plus = true; + + std::size_t const needed = decoded_size(encoded).value(); + std::string buffer; + buffer.resize(needed); + std::size_t const written = decode(&buffer[0], buffer.size(), encoded, opt).value(); + buffer.resize(written); + + assert(buffer == "name=boost url"); + //] + } + + { + //[snippet_decoding_helpers_2 + encoding_opts opt; + opt.space_as_plus = true; + + std::string plain = decode(boost::core::string_view("city%3DSan+Jose"), opt).value(); + assert(plain == "city=San Jose"); + + std::string scratch = "prefix:"; + decode(boost::core::string_view("value%2F42"), {}, string_token::append_to(scratch)).value(); + assert(scratch == "prefix:value/42"); + //] + } +} + void readme_snippets() { @@ -1845,6 +1879,7 @@ class snippets_test normalizing(); decode_with_token(); encoding(); + decoding_helpers(); ignore_unused(&readme_snippets); BOOST_TEST_PASS();