-
Notifications
You must be signed in to change notification settings - Fork 93
[perf] Make ASCII checks faster by enabling the compiler to leverage SIMD instructions + Fix a bug #531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[perf] Make ASCII checks faster by enabling the compiler to leverage SIMD instructions + Fix a bug #531
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,22 @@ | |
| import NIOCore | ||
| import NIOHTTP1 | ||
|
|
||
| #if os(Windows) | ||
| import ucrt | ||
| #elseif canImport(Darwin) | ||
| import Darwin | ||
| #elseif canImport(Glibc) | ||
| @preconcurrency import Glibc | ||
| #elseif canImport(Musl) | ||
| @preconcurrency import Musl | ||
| #elseif canImport(Bionic) | ||
| @preconcurrency import Bionic | ||
| #elseif canImport(WASILibc) | ||
| @preconcurrency import WASILibc | ||
| #else | ||
| #error("The HPACK Header module was unable to identify your C library.") | ||
| #endif | ||
|
|
||
| /// Very similar to `NIOHTTP1.HTTPHeaders`, but with extra data for storing indexing | ||
| /// information. | ||
| public struct HPACKHeaders: ExpressibleByDictionaryLiteral, Sendable { | ||
|
|
@@ -140,7 +156,7 @@ public struct HPACKHeaders: ExpressibleByDictionaryLiteral, Sendable { | |
| /// regard to this header. | ||
| @inlinable | ||
| public mutating func add(name: String, value: String, indexing: HPACKIndexing = .indexable) { | ||
| precondition(!name.utf8.contains(where: { !$0.isASCII }), "name must be ASCII") | ||
| precondition(!name.utf8.isASCII, "name must be ASCII") | ||
| self.headers.append(HPACKHeader(name: name, value: value, indexing: indexing)) | ||
| } | ||
|
|
||
|
|
@@ -638,10 +654,15 @@ extension HPACKHeader { | |
| } | ||
| } | ||
|
|
||
| extension UInt8 { | ||
| extension String.UTF8View { | ||
| /// Determines if this `UTF8View` corresponds to an `String` which contains only ASCII characters. | ||
| @inlinable | ||
| var isASCII: Bool { | ||
| self <= 127 | ||
| var result: UInt8 = 0 | ||
| for byte in self { | ||
| result |= byte | ||
| } | ||
| return result <= 127 | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -732,6 +753,71 @@ extension String.UTF8View { | |
| } | ||
| } | ||
|
|
||
| @usableFromInline | ||
| enum ASCIIAndLowercasedCheckResult { | ||
| case lowercasedASCII | ||
| case ascii | ||
| case notASCII | ||
| } | ||
|
|
||
| extension Sequence<UInt8> { | ||
| /// Determines if this `Sequence` corresponds to a sequence of bytes that can represent an ASCII-only `String`. | ||
| @inlinable | ||
| func checkASCIIAndLowercased() -> ASCIIAndLowercasedCheckResult { | ||
| // Use simple bitwise operations so the compiler can vectorize the loop. | ||
| var orResult: UInt8 = 0 | ||
| var andResult: UInt8 = 0 | ||
| for byte in self { | ||
| orResult |= byte | ||
| andResult &= byte | ||
| } | ||
| let isASCII = orResult <= 127 | ||
|
|
||
| // If the sixth bit is set then this for sure doesn't contain an uppercased letter because | ||
| // those all have the sixth bit turned off. | ||
| // a-z and 0-9 have the sixth bit turned on, so if a string only consists of a-b and 0-9 | ||
| // then this check can be a very fast way to know the bytes are all lowercased. | ||
| let isDefinitelyLowercasedOnly = andResult & 0b100000 == 0b100000 | ||
|
|
||
| switch (isASCII, isDefinitelyLowercasedOnly) { | ||
| case (true, true): | ||
| return .lowercasedASCII | ||
| case (true, false): | ||
| return .ascii | ||
| default: | ||
| return .notASCII | ||
| } | ||
| } | ||
|
|
||
| @inlinable | ||
| func quickCompareASCIIBytes(to other: Self) -> ASCIIAndLowercasedCheckResult { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function feels to me like it has the wrong name: it's not actually comparing the strings, but some property of the strings. That makes it a tempting source of bugs in the future. I'm not entirely convinced that this function makes sense at all (it's pretty safe to inline directly into the usage site), but if we want to keep it as a separate function than it might actually be a combining operation on |
||
| let lhsCheck = self.checkASCIIAndLowercased() | ||
| let rhsCheck = other.checkASCIIAndLowercased() | ||
|
|
||
| switch (lhsCheck, rhsCheck) { | ||
| case (.lowercasedASCII, .lowercasedASCII): | ||
| return .lowercasedASCII | ||
| case (.notASCII, _), (_, .notASCII): | ||
| return .notASCII | ||
| default: | ||
| return .ascii | ||
| } | ||
| } | ||
| } | ||
|
|
||
| extension UInt8 { | ||
| /// Checks if this byte is equal to another byte, ignoring case. | ||
| @inlinable | ||
| func isCaseInsensitivelyEqualAssumingASCII(to other: Self) -> Bool { | ||
| if self == other { return true } | ||
| // Check if the bytes are equal if we ignore casing via masking-off the sixth bit. | ||
| return self & 0xdf == other & 0xdf | ||
| // Check to see if the first byte is a latin letter at all, otherwise | ||
| // case-insensitive comparison is meaningless and can be incorrect. | ||
| && (UInt8(ascii: "A")...UInt8(ascii: "Z")).contains(self & 0xdf) | ||
| } | ||
| } | ||
|
|
||
| extension String.UTF8View { | ||
| /// Compares two UTF8 strings as case insensitive ASCII bytes. | ||
| /// | ||
|
|
@@ -746,13 +832,27 @@ extension String.UTF8View { | |
| return false | ||
| } | ||
|
|
||
| for idx in 0..<lhsBuffer.count { | ||
| // let's hope this gets vectorised ;) | ||
| if lhsBuffer[idx] & 0xdf != rhsBuffer[idx] & 0xdf && lhsBuffer[idx].isASCII { | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check is incorrect for case-insensitively comparing 2 ASCII bytes, swift -e '
let a = 0b01111110
let b = 0b01011110
print(Unicode.Scalar(a)!, Unicode.Scalar(b)!)
print(a <= 127, b <= 127)
print(a == b)
print(a & 0xdf == b & 0xdf)
'Result: ~ ^
true true
false
trueAs you can see, the 2 bytes are not equal, but after masking-off the sixth bit via |
||
| return false | ||
| switch lhsBuffer.quickCompareASCIIBytes(to: rhsBuffer) { | ||
| case .lowercasedASCII: | ||
| // Make sure the base-address force-unwrap below is safe. | ||
| if lhsBuffer.count == 0 { | ||
| return true | ||
| } | ||
| return memcmp( | ||
| lhsBuffer.baseAddress!, | ||
| rhsBuffer.baseAddress!, | ||
| lhsBuffer.count | ||
| ) == 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a place where Swift's lack of good SIMD mask support makes me really sad. Conceptually, we should be able to express this as a natural vector loop ourselves and avoid a double-iteration of the strings. Conceivably we can do the full thing, ASCII checking, memcmp, and lowercase checking, all using a very small number of vector registers at full register size, but doing so requires efficiently projecting a SIMD mask into an integer which Swift couldn't do last time I checked. I do wonder whether a more careful comparison of this autovec to manually writing a SWAR-based approach is worth pursuing. I suspect a careful use of SWAR will get us to a better outcome, where we can at least 8 bytes at a time (16 at a time when UInt128 is available). We can certainly do the is-ASCII check using SWAR. The full case insensitive ASCII comparison may be possible do, though I'd need to investigate the SWAR algorithm for it. |
||
| case .ascii: | ||
| for idx in 0..<lhsBuffer.count { | ||
| if !lhsBuffer[idx].isCaseInsensitivelyEqualAssumingASCII(to: rhsBuffer[idx]) { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| case .notASCII: | ||
| return false | ||
| } | ||
| return true | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -766,7 +866,20 @@ extension String.UTF8View { | |
| @inlinable | ||
| @inline(never) | ||
| func _compareCaseInsensitiveASCIIBytesSlowPath(to other: String.UTF8View) -> Bool { | ||
| self.elementsEqual(other, by: { (($0 & 0xdf) == ($1 & 0xdf) && $0.isASCII) }) | ||
| if self.count != other.count { | ||
| return false | ||
| } | ||
|
|
||
| switch self.quickCompareASCIIBytes(to: other) { | ||
| case .lowercasedASCII: | ||
| return self.elementsEqual(other, by: { $0 == $1 }) | ||
| case .ascii: | ||
| return self.elementsEqual(other) { | ||
| $0.isCaseInsensitivelyEqualAssumingASCII(to: $1) | ||
| } | ||
| case .notASCII: | ||
| return false | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will be vectorized.