diff --git a/src/rpc/pathutils.cpp b/src/rpc/pathutils.cpp index 1d6e0454..b6a4ec00 100644 --- a/src/rpc/pathutils.cpp +++ b/src/rpc/pathutils.cpp @@ -13,12 +13,16 @@ namespace tremotesf { // We can't use QDir::to/fromNativeSeparators because it checks for current OS, // and we need it to work regardless of OS we are running on + static const QRegularExpression schemeUrlRegex( + R"(^[a-zA-Z][a-zA-Z0-9+.-]+:/(?:/+)(?:[a-zA-Z0-9._~-]+)?(?::[a-zA-Z0-9._~-]*)?@?(?:[a-zA-Z0-9.-]+|\[[a-fA-F0-9:]+\]):?(?:\d+)?)"_L1 + ); + namespace { constexpr auto windowsSeparatorChar = '\\'; constexpr auto unixSeparatorChar = '/'; constexpr auto unixSeparatorString = "/"_L1; - enum class PathType { Unix, WindowsAbsoluteDOSFilePath, WindowsUNCOrDOSDevicePath }; + enum class PathType { Scheme, Unix, WindowsAbsoluteDOSFilePath, WindowsUNCOrDOSDevicePath }; bool isWindowsUNCOrDOSDevicePath(QStringView path) { static const QRegularExpression regex(R"(^(?:\\|//).*$)"_L1); @@ -26,6 +30,9 @@ namespace tremotesf { } PathType determinePathType(QStringView path, PathOs pathOs) { + if (isSchemeUrl(QString(path))) { + return PathType::Scheme; + } switch (pathOs) { case PathOs::Unix: return PathType::Unix; @@ -78,8 +85,10 @@ namespace tremotesf { return 3; // e.g. 'C:/' case PathType::WindowsUNCOrDOSDevicePath: return 2; // e.g. '//' + case PathType::Scheme: + return 6; // e.g. aa://a } - throw std::logic_error("Unknown PathOs value"); + throw std::logic_error("Unknown PathType value"); }(); if (path.size() <= minimumLength) { if (pathType == PathType::WindowsAbsoluteDOSFilePath && path.size() == 2) { @@ -91,6 +100,19 @@ namespace tremotesf { path.chop(1); } } + + void normalizeSchemePrefix(QString& prefix) { + // Lowercase the scheme + const int colonPos = prefix.indexOf("://"_L1); + if (colonPos != -1) { + prefix.replace(0, colonPos, prefix.left(colonPos).toLower()); + } + + // Collapse multiple / after : to :// + while (prefix.contains(":///")) { + prefix.replace(":///", "://"); + } + } } bool isAbsoluteWindowsDOSFilePath(QStringView path) { @@ -98,6 +120,8 @@ namespace tremotesf { return regex.matchView(path).hasMatch(); } + bool isSchemeUrl(const QString& path) { return schemeUrlRegex.matchView(path).hasMatch(); } + QString normalizePath(const QString& path, PathOs pathOs) { if (path.isEmpty()) { return path; @@ -106,15 +130,27 @@ namespace tremotesf { if (normalized.isEmpty()) { return normalized; } + // we will fill it and use if path type is a scheme URL + QString pathPrefix; const auto pathType = determinePathType(normalized, pathOs); - if (pathType != PathType::Unix) { + if (pathType == PathType::Scheme) { + // For scheme URLs, normalize authority and the path part separately + auto match = schemeUrlRegex.match(normalized); + if (match.hasMatch()) { + const int originalPrefixLength = match.capturedLength(); + pathPrefix = match.captured(); + normalizeSchemePrefix(pathPrefix); + normalized = normalized.mid(originalPrefixLength); + } + } + if (pathType != PathType::Unix && pathType != PathType::Scheme) { convertFromNativeWindowsSeparators(normalized); if (pathType == PathType::WindowsAbsoluteDOSFilePath) { capitalizeWindowsDriveLetter(normalized); } } collapseRepeatingSeparators(normalized, pathType); - dropOrAddTrailingSeparator(normalized, pathType); + dropOrAddTrailingSeparator(normalized.prepend(pathPrefix), pathType); return normalized; } diff --git a/src/rpc/pathutils.h b/src/rpc/pathutils.h index 278584d1..bf701fa9 100644 --- a/src/rpc/pathutils.h +++ b/src/rpc/pathutils.h @@ -11,6 +11,7 @@ namespace tremotesf { bool isAbsoluteWindowsDOSFilePath(QStringView path); + bool isSchemeUrl(const QString& path); /** * We need to pass PathOs explicitly because we can't determing whether given path is Unix or Windows path from its string alone: diff --git a/src/rpc/pathutils_test.cpp b/src/rpc/pathutils_test.cpp index f13d498c..19b3ef87 100644 --- a/src/rpc/pathutils_test.cpp +++ b/src/rpc/pathutils_test.cpp @@ -122,6 +122,98 @@ private slots: .inputPath = R"(c::\wtf)", .expectedNormalizedPath = R"(C::/wtf)", .pathOs = PathOs::Windows + }, + + // URL normalization tests + + + NormalizeTestCase{ + .inputPath = "SMB://HOSTNAME/PATH", + .expectedNormalizedPath = "smb://HOSTNAME/PATH", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "smb:////hostname/path/to/share", + .expectedNormalizedPath = "smb://hostname/path/to/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "smb:///hostname/path/to/share", + .expectedNormalizedPath = "smb://hostname/path/to/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "smb://hostname//path/to/share", + .expectedNormalizedPath = "smb://hostname/path/to/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "ftp://hostname/path//to/share", + .expectedNormalizedPath = "ftp://hostname/path/to/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "z://hostname/path/to/share", + .expectedNormalizedPath = "Z:/hostname/path/to/share", + .pathOs = PathOs::Windows + }, // single char before :// is not a scheme url, but windows drive + + // ips - untouched + NormalizeTestCase{ + .inputPath = "smb://192.168.1.100/share", + .expectedNormalizedPath = "smb://192.168.1.100/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "ftp://[::1]:21/share", + .expectedNormalizedPath = "ftp://[::1]:21/share", + .pathOs = PathOs::Unix + }, + // local network hostnames and domain names - untouched + NormalizeTestCase{ + .inputPath = "nfs://localhost/share", + .expectedNormalizedPath = "nfs://localhost/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "smb://example.com/path", + .expectedNormalizedPath = "smb://example.com/path", + .pathOs = PathOs::Unix + }, + // file protocol - untouched + NormalizeTestCase{ + .inputPath = "file://local/path", + .expectedNormalizedPath = "file://local/path", + .pathOs = PathOs::Unix + }, + // full RFC example with username, password and port - untouched + NormalizeTestCase{ + .inputPath = "ftp://user:password@example:21/share", + .expectedNormalizedPath = "ftp://user:password@example:21/share", + .pathOs = PathOs::Unix + }, + // same ipv6 - untouched + NormalizeTestCase{ + .inputPath = "ftp://user:password@[::1]:21/path", + .expectedNormalizedPath = "ftp://user:password@[::1]:21/path", + .pathOs = PathOs::Unix + }, + // weird paths - untouched + NormalizeTestCase{ + .inputPath = "ftp://:@hostname/path/to/share", + .expectedNormalizedPath = "ftp://:@hostname/path/to/share", + .pathOs = PathOs::Unix + }, + NormalizeTestCase{ + .inputPath = "ftp://hostname:/path/to/share", + .expectedNormalizedPath = "ftp://hostname:/path/to/share", + .pathOs = PathOs::Unix + }, + // weird path - collapse slashes inside + NormalizeTestCase{ + .inputPath = "/path/with/http://inside", + .expectedNormalizedPath = "/path/with/http:/inside", + .pathOs = PathOs::Unix } }; @@ -130,6 +222,48 @@ private slots: } } + void checkSchemeDetection() { + // Test cases that should be detected as scheme URLs + const std::vector passCases = { + "ftp://hostname/", + "ftp://hostname.com/", + "ftp://@hostname:21/", + "ftp://user:@hostname:21/", + "ftp://user:@hostname.com:21/", + "ftp://user:pass@hostname:21/", + "ftp://user:pass@hostname:21/asdasd/asdasdasd/", + "ftp://user:pass@hostname:21//asdasd/asdasdasd/", + "ftp://user:@hostname://", + "ftp://user@192.168.100.1/", + "ftp://user:@192.168.100.1:21/", + "ftp://user@[::1]/", + "ftp://user:@[::1]:21/" + }; + + for (const auto& url : passCases) { + QCOMPARE(isSchemeUrl(url), true); + } + + // Test cases that should NOT be detected as scheme URLs + const std::vector failCases = { + "C:/", + "C://", + "C:\\", + "C:\\\\", + "C:/file", + "C://file/path", + "C:\\file", + "C:\\\\file", + "//scheme://url/insdie/some/path" + "user@hostname" + "//user@hostname" + }; + + for (const auto& input : failCases) { + QCOMPARE(isSchemeUrl(input), false); + } + } + void checkToNativeSeparators() { const auto testCases = std::array{ NativeSeparatorsTestCase{.inputPath = "/", .expectedNativeSeparatorsPath = "/", .pathOs = PathOs::Unix},