Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .dictionaries
102 changes: 81 additions & 21 deletions DAWGWizard/DAWGBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@

import Foundation

enum DAWGFormat {
static let magic: UInt32 = 0x47574453
static let version: UInt32 = 2
static let headerSize = 24
static let nodeSize = 8
static let edgeSize = 6
}

struct DAWGBuilder {
private var nodes = [BuildNode()]
private var uncheckedEdges = [UncheckedEdge]()
Expand All @@ -28,25 +20,34 @@ struct DAWGBuilder {

func data() throws -> Data {
let compactDAWG = try compact()
let alphabet = try alphabet(for: compactDAWG)
let alphabetByteCount = alphabet.count * MemoryLayout<UInt16>.size
var data = Data()
data.reserveCapacity(DAWGFormat.headerSize + compactDAWG.nodes.count * DAWGFormat.nodeSize + compactDAWG.edges.count * DAWGFormat.edgeSize)
data.reserveCapacity(DAWGFormat.headerSize + alphabetByteCount + compactDAWG.nodes.count * DAWGFormat.nodeSize + compactDAWG.edges.count * DAWGFormat.edgeSize)

data.appendLittleEndianUInt32(DAWGFormat.magic)
data.appendLittleEndianUInt32(DAWGFormat.version)
data.appendLittleEndianUInt32(UInt32(wordCount))
data.appendLittleEndianUInt32(UInt32(compactDAWG.nodes.count))
data.appendLittleEndianUInt32(UInt32(compactDAWG.edges.count))
data.appendLittleEndianUInt32(0)
data.appendLittleEndianUInt32(UInt32(alphabet.count))

compactDAWG.nodes.forEach { node in
alphabet.keys.forEach { data.appendLittleEndianUInt16($0) }

for node in compactDAWG.nodes {
guard node.edgeCount <= DAWGFormat.packedEdgeCountMask else {
throw DAWGBuilderError.tooManyOutgoingEdgesForPackedCount
}
data.appendLittleEndianUInt32(node.firstEdge)
data.appendLittleEndianUInt16(node.edgeCount)
data.appendLittleEndianUInt16(node.isWord ? 1 : 0)
data.appendLittleEndianUInt16(node.packedEdgeCount)
}

compactDAWG.edges.forEach { edge in
data.appendLittleEndianUInt16(edge.key)
data.appendLittleEndianUInt32(edge.target)
for edge in compactDAWG.edges {
guard edge.target <= UInt32.max24 else {
throw DAWGBuilderError.tooManyNodesForUInt24
}
data.appendUInt8(alphabet.indexByKey[edge.key]!)
data.appendLittleEndianUInt24(edge.target)
}

return data
Expand Down Expand Up @@ -126,7 +127,7 @@ struct DAWGBuilder {
for buildIndex in orderedNodes {
let firstEdge = UInt32(compactEdges.count)
guard nodes[buildIndex].edges.count <= Int(UInt16.max) else {
throw DAWGBuilderError.tooManyOutgoingEdges
throw DAWGBuilderError.tooManyOutgoingEdgesForUInt16
}

for edge in nodes[buildIndex].edges {
Expand All @@ -145,18 +146,36 @@ struct DAWGBuilder {

return CompactDAWG(nodes: compactNodes, edges: compactEdges)
}

private func alphabet(for compactDAWG: CompactDAWG) throws -> Alphabet {
let keys = compactDAWG.edges.map(\.key).uniqued().sorted()
guard keys.count <= Int(UInt8.max) + 1 else {
throw DAWGBuilderError.tooManyAlphabetScalarsForUInt8(keys.count)
}

return Alphabet(keys: keys)
}
}

enum DAWGBuilderError: Error, CustomStringConvertible {
case tooManyOutgoingEdges
case unsupportedScalar(UInt32)
case tooManyOutgoingEdgesForUInt16
case tooManyNodesForUInt24
case tooManyAlphabetScalarsForUInt8(Int)
case tooManyOutgoingEdgesForPackedCount

var description: String {
switch self {
case .tooManyOutgoingEdges:
"DAWG v2 supports at most \(UInt16.max) outgoing edges per node."
case let .unsupportedScalar(scalar):
"DAWG v2 supports Unicode scalars up to \(UInt16.max); unsupported scalar: \(scalar)."
"DAWG supports Unicode scalars up to \(UInt16.max); unsupported scalar: \(scalar)."
case .tooManyOutgoingEdgesForUInt16:
"DAWG supports at most \(UInt16.max) outgoing edges per node."
case .tooManyNodesForUInt24:
"DAWG supports at most \(UInt32.max24 + 1) nodes."
case let .tooManyAlphabetScalarsForUInt8(count):
"DAWG supports at most \(Int(UInt8.max) + 1) distinct scalars; found \(count)."
case .tooManyOutgoingEdgesForPackedCount:
"DAWG supports at most \(DAWGFormat.packedEdgeCountMask) outgoing edges per node."
}
}
}
Expand Down Expand Up @@ -191,19 +210,60 @@ private struct CompactNode {
let firstEdge: UInt32
let edgeCount: UInt16
let isWord: Bool

var packedEdgeCount: UInt16 {
edgeCount | (isWord ? DAWGFormat.wordFlag : 0)
}
}

private struct CompactEdge {
let key: UInt16
let target: UInt32
}

private struct Alphabet {
let keys: [UInt16]
let indexByKey: [UInt16: UInt8]

var count: Int {
keys.count
}

init(keys: [UInt16]) {
self.keys = keys
self.indexByKey = Dictionary(uniqueKeysWithValues: keys.enumerated().map { index, key in
(key, UInt8(index))
})
}
}

private extension Data {
mutating func appendUInt8(_ value: UInt8) {
append(value)
}

mutating func appendLittleEndianUInt16(_ value: UInt16) {
Swift.withUnsafeBytes(of: value.littleEndian) { append(contentsOf: $0) }
}

mutating func appendLittleEndianUInt24(_ value: UInt32) {
appendUInt8(UInt8(truncatingIfNeeded: value))
appendUInt8(UInt8(truncatingIfNeeded: value >> 8))
appendUInt8(UInt8(truncatingIfNeeded: value >> 16))
}

mutating func appendLittleEndianUInt32(_ value: UInt32) {
Swift.withUnsafeBytes(of: value.littleEndian) { append(contentsOf: $0) }
}
}

private extension Sequence where Element: Hashable {
func uniqued() -> [Element] {
var seen = Set<Element>()
return filter { seen.insert($0).inserted }
}
}

private extension UInt32 {
static let max24: Self = 0x00ff_ffff
}
15 changes: 15 additions & 0 deletions DAWGWizard/DAWGFormat.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//
// DAWGWizard
// Copyright © 2026 Piotr Sochalewski.
// Licensed under the Apache License, Version 2.0.
//

enum DAWGFormat {
static let magic: UInt32 = 0x47574453
static let version: UInt32 = 3
static let headerSize = 24
static let nodeSize = 6
static let edgeSize = 4
static let wordFlag: UInt16 = 0x8000
static let packedEdgeCountMask: UInt16 = 0x7fff
}
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,18 @@ The generator in `DAWGWizard/` works broadly like this:
4. Minimize completed branches by reusing previously seen equivalent nodes.
5. Write a compact little-endian binary file with:
- a header containing magic/version/counts,
- a small alphabet table,
- a node table,
- an edge table.

The app loads these generated `.dawg` files with memory-mapped `Data` when possible. Validation walks graph edges for an exact word. Tile search performs a depth-first traversal while consuming available letters. Pattern search treats `?` as a single-character wildcard.
The app loads generated `.dawg` files with memory-mapped `Data` when possible. Validation walks graph edges for an exact word. Tile search performs a depth-first traversal while consuming available letters. Pattern search treats `?` as a single-character wildcard.

At a high level, the DAWG v3 binary layout is:

- header: magic, version, word count, node count, edge count, and alphabet count,
- alphabet table: the distinct `UInt16` Unicode scalar values used by edge labels,
- node table: each node stores `firstEdge` as `UInt32` and a packed `UInt16` edge count, with the high bit reserved as the word-terminating flag,
- edge table: each edge stores a `UInt8` alphabet index and a 24-bit little-endian target node index.

The binary format is defined in `DAWGWizard/DAWGBuilder.swift` and read by `Scrabbdict/Helpers/DAWG.swift`.

Expand Down
12 changes: 12 additions & 0 deletions Scrabbdict.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
isa = PBXFileSystemSynchronizedBuildFileExceptionSet;
membershipExceptions = (
DAWGBuilder.swift,
DAWGFormat.swift,
main.swift,
);
target = AE62F7202024EA8C00A57DAE /* DAWGWizard */;
Expand All @@ -66,6 +67,15 @@
);
target = AE78C2771FFD267600874685 /* ScrabbdictTests */;
};
AECC64D22FCD10000032633A /* Exceptions for "DAWGWizard" folder in "Scrabbdict" target */ = {
isa = PBXFileSystemSynchronizedBuildFileExceptionSet;
membershipExceptions = (
DAWGBuilder.swift,
Files,
main.swift,
);
target = AEF8AC811EBA02AA005996D5 /* Scrabbdict */;
};
/* End PBXFileSystemSynchronizedBuildFileExceptionSet section */

/* Begin PBXFileSystemSynchronizedRootGroup section */
Expand All @@ -77,6 +87,7 @@
AECC64C82FB9CCA20032633A /* DAWGWizard */ = {
isa = PBXFileSystemSynchronizedRootGroup;
exceptions = (
AECC64D22FCD10000032633A /* Exceptions for "DAWGWizard" folder in "Scrabbdict" target */,
AECC64D12FB9D6A10032633A /* Exceptions for "DAWGWizard" folder in "ScrabbdictTests" target */,
AECC64CA2FB9CCA30032633A /* Exceptions for "DAWGWizard" folder in "DAWGWizard" target */,
);
Expand Down Expand Up @@ -200,6 +211,7 @@
);
fileSystemSynchronizedGroups = (
AECC64A12FB9CC920032633A /* Scrabbdict */,
AECC64C82FB9CCA20032633A /* DAWGWizard */,
);
name = Scrabbdict;
productName = Scrabbdict;
Expand Down
Loading