diff --git a/benchmarks/concurrent/01_socket_throughput/bench.cht b/benchmarks/concurrent/01_socket_throughput/bench.cht index d053e2be9..b3c475194 100644 --- a/benchmarks/concurrent/01_socket_throughput/bench.cht +++ b/benchmarks/concurrent/01_socket_throughput/bench.cht @@ -28,36 +28,36 @@ # `client` (reader fd) is owned by main; its defer fires on RETURN. FN main() RETURNS Void -> - server = TCPServer::listen(14537); - conn = TCPClient::connect("127.0.0.1", 14537); - client = accept(server); + server = TCPServer::listen(14_537); + conn = TCPClient::connect("127.0.0.1", 14_537); + client = server.accept(); - # Writer fiber: sends 100 000 × 256-byte messages. - # `conn` is an i32, captured by value into the BG closure. - # The fiber runs concurrently with the reader loop below. - msg = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; - w: ~Void = BG { - MUTABLE wi = 0; - WHILE wi < 100000 DO - tcpWrite(conn, msg); - wi += 1; - END - }; - - # Reader loop: reads until all 25 600 000 bytes received. - # TCP coalesces writes so each tcpRead may return more than 256 bytes. - # Timer covers only the read loop (same scope as C/Rust BENCH_RESULT). - t0 = timestampMs(); - MUTABLE total_bytes = 0; - WHILE total_bytes < 25600000 DO - data = tcpRead(client); - total_bytes = total_bytes + data.length(); + # Writer fiber: sends 100 000 × 256-byte messages. + # `conn` is an i32, captured by value into the BG closure. + # The fiber runs concurrently with the reader loop below. + msg = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + w: ~Void = BG { + MUTABLE wi = 0; + WHILE wi < 100_000 DO + conn.tcpWrite(msg); + wi += 1; END - elapsed = timestampMs() - t0; + }; + + # Reader loop: reads until all 25 600 000 bytes received. + # TCP coalesces writes so each tcpRead may return more than 256 bytes. + # Timer covers only the read loop (same scope as C/Rust BENCH_RESULT). + t0 = timestampMs(); + MUTABLE total_bytes = 0; + WHILE total_bytes < 25_600_000 DO + data = client.tcpRead(); + total_bytes = total_bytes + data.length(); + END + elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); - # Wait for writer to finish, then let defers close both fds. - NEXT w; - RETURN; + # Wait for writer to finish, then let defers close both fds. + NEXT w; + RETURN; END diff --git a/benchmarks/concurrent/02_concurrent_search/bench.cht b/benchmarks/concurrent/02_concurrent_search/bench.cht index 9dd2116d4..ba7d419c1 100644 --- a/benchmarks/concurrent/02_concurrent_search/bench.cht +++ b/benchmarks/concurrent/02_concurrent_search/bench.cht @@ -21,74 +21,81 @@ # zig build-exe zig/bench.zig zig/switch.S zig/onRoot.S \ # --name bench_clear -O ReleaseFast -lc -STRUCT SearchResult { file_idx: Int64, count: Int64 } +STRUCT SearchResult { + file_idx: Int64, + count: Int64 +} # Search a batch of files concurrently. Returns heap-promoted count list. # Isolating the promise list here prevents per-iteration frame rewind in main. FN search_batch!(files: String[], start: Int64, end: Int64, data_dir: String, needle: String) RETURNS !Int64[] -> - MUTABLE futures: ~Int64[]@list = []; - FOR i IN (start ..< end) DO - filepath = "${data_dir}/${files[i]}"; - futures.append(BG { - content = readFile(filepath); - countOccurrences(content, needle); - }); - END - counts: Int64[]@list = NEXT futures; - RETURN counts; + MUTABLE futures: ~Int64[]@list = []; + FOR i IN (start ..< end) DO + filepath = "${data_dir}/${files[i]}"; + futures.append( + BG { + content = readFile(filepath); + content.countOccurrences(needle); + } + ); + END + counts: Int64[]@list = NEXT futures; + RETURN counts; END FN main() RETURNS Void -> - t0 = timestampMs(); - needle = "the"; - data_dir = "benchmarks/10_concurrent_search/data"; - - # List all files in the data directory (filenames only, not full paths). - files = listDir(data_dir); - n: Int64 = files.length(); - - # Process files in batches to bound io_uring SQ pressure. - # io_uring SQ capacity = 256; BATCH_SIZE = 128 keeps headroom. - batch_size: Int64 = 128; - MUTABLE results: SearchResult[]@list = []; - MUTABLE batch_start: Int64 = 0; + t0 = timestampMs(); + needle = "the"; + data_dir = "benchmarks/10_concurrent_search/data"; - WHILE batch_start < n DO - MUTABLE batch_end: Int64 = batch_start + batch_size; - IF batch_end > n THEN batch_end = n; END + # List all files in the data directory (filenames only, not full paths). + files = data_dir.listDir(); + n = files.length(); - counts = search_batch!(files, batch_start, batch_end, data_dir, needle); - FOR k IN (0 ..< counts.length()) DO - results.append(SearchResult{ file_idx: batch_start + k, count: counts[k] }); - END + # Process files in batches to bound io_uring SQ pressure. + # io_uring SQ capacity = 256; BATCH_SIZE = 128 keeps headroom. + batch_size = 128; + MUTABLE results: SearchResult[]@list = []; + MUTABLE batch_start = 0; - batch_start = batch_end; + WHILE batch_start < n DO + MUTABLE batch_end = batch_start + batch_size; + IF batch_end > n THEN + batch_end = n; END - # Selection sort: find top-10 by swapping into the first 10 positions. - FOR j IN (0 ..< 10) DO - MUTABLE best: Int64 = j; - FOR m IN (j + 1 ..< n) DO - IF results[m].count > results[best].count THEN - best = m; - END - END - IF best != j THEN - tmp = results[j]; - results[j] = results[best]; - results[best] = tmp; - END + counts = search_batch!(files, batch_start, batch_end, data_dir, needle); + FOR k IN (0 ..< counts.length()) DO + results.append(SearchResult{ file_idx: batch_start + k, count: counts[k] }); END - # Print top 10. - print("Top 10 files by '", needle, "' count:"); - FOR p IN (0_i64 ..< 10) DO - r = results[p]; - print(" ", files[r.file_idx], " ", r.count); + batch_start = batch_end; + END + + # Selection sort: find top-10 by swapping into the first 10 positions. + FOR j IN (0 ..< 10) DO + MUTABLE best = j; + FOR m IN (j + 1 ..< n) DO + IF results[m].count > results[best].count THEN + best = m; + END + END + IF best != j THEN + tmp = results[j]; + results[j] = results[best]; + results[best] = tmp; END + END + + # Print top 10. + print("Top 10 files by '", needle, "' count:"); + FOR p IN (0 ..< 10) DO + r = results[p]; + print(" ", files[r.file_idx], " ", r.count); + END - elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + elapsed = timestampMs() - t0; + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/03_atomic_contention/bench.cht b/benchmarks/concurrent/03_atomic_contention/bench.cht index 43d8be2a7..41e4f3506 100644 --- a/benchmarks/concurrent/03_atomic_contention/bench.cht +++ b/benchmarks/concurrent/03_atomic_contention/bench.cht @@ -12,24 +12,28 @@ # Compare to Go's atomic.AddInt64 which bounces the cache line across # cores under M:N scheduling (~15ns/op on 2 cores, worse on more). -STRUCT Counter { value: Int64 } +STRUCT Counter { + value: Int64 +} FN main() RETURNS Void -> - MUTABLE c = Counter{ value: 0 } @local; + MUTABLE c = Counter{ value: 0 } @local; - t0 = timestampMs(); - MUTABLE futures: ~Void[]@list = []; - FOR i IN (0_i64 ..< 1024) DO - futures.append(BG { - FOR j IN (0_i64 ..< 10000) -> c.value += 1; - }); - END + t0 = timestampMs(); + MUTABLE futures: ~Void[]@list = []; + FOR i IN (0 ..< 1024) DO + futures.append( + BG { + FOR j IN (0 ..< 10_000) -> c.value += 1; + } + ); + END - FOR k IN (0_i64 ..< 1024) -> NEXT futures[k]; - elapsed = timestampMs() - t0; + FOR k IN (0 ..< 1024) -> NEXT futures[k]; + elapsed = timestampMs() - t0; - print("Counter: ${c.value.toString()}"); - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + print("Counter: ${c.value.toString()}"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/04_fanout_fanin/bench.cht b/benchmarks/concurrent/04_fanout_fanin/bench.cht index 6a88abd1e..131860550 100644 --- a/benchmarks/concurrent/04_fanout_fanin/bench.cht +++ b/benchmarks/concurrent/04_fanout_fanin/bench.cht @@ -11,34 +11,36 @@ # Compare to Go (10K goroutines + WaitGroup) and Rust (10K Tokio tasks # + JoinSet). -STRUCT Work { seed: Int64 } +STRUCT Work { + seed: Int64 +} FN doWork(seed: Int64) RETURNS Int64 -> - MUTABLE x: Int64 = seed; - FOR i IN (0_i64 ..< 100000) -> x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - RETURN x; + MUTABLE x = seed; + FOR i IN (0 ..< 100_000) -> x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + RETURN x; END FN main() RETURNS Void -> - t0 = timestampMs(); - # Build work items - MUTABLE items: Work[] = []; - FOR i IN (0_i64 ..< 10000) -> items.append(Work{ seed: i }); + t0 = timestampMs(); + # Build work items + MUTABLE items: Work[] = []; + FOR i IN (0 ..< 10_000) -> items.append(Work{ seed: i }); - # Fan-out: scale workers with core count, distributed across schedulers - results = items |> CONCURRENT(parallel: TRUE) SELECT doWork(_.seed); + # Fan-out: scale workers with core count, distributed across schedulers + results = items |> CONCURRENT(parallel: TRUE) SELECT doWork(_.seed); - # Fan-in: sum results - MUTABLE total: Int64 = 0; - FOR j IN (0_i64 ..< 10000) -> total = total %+ results[j]; + # Fan-in: sum results + MUTABLE total = 0; + FOR j IN (0 ..< 10_000) -> total = total %+ results[j]; - checksum = total MOD 1000000000; - print("Checksum:", checksum); - print("Workers: 10000"); - print("Iterations: 100000"); + checksum = total MOD 1_000_000_000; + print("Checksum:", checksum); + print("Workers: 10000"); + print("Iterations: 100000"); - elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + elapsed = timestampMs() - t0; + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/05_backpressure/bench.cht b/benchmarks/concurrent/05_backpressure/bench.cht index 834eca5f2..1ba9533af 100644 --- a/benchmarks/concurrent/05_backpressure/bench.cht +++ b/benchmarks/concurrent/05_backpressure/bench.cht @@ -8,42 +8,44 @@ # @shared:locked struct — no result list is ever materialized. # Peak memory: O(capacity), not O(N). -STRUCT Acc { value: Int64 } +STRUCT Acc { + value: Int64 +} FN processItem(val: Int64) RETURNS Int64 -> - MUTABLE x: Int64 = val; - FOR i IN (0_i64 ..< 5000) -> x = x %* 6364136223846793005 %+ 1442695040888963407; - RETURN x; + MUTABLE x = val; + FOR i IN (0 ..< 5000) -> x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + RETURN x; END FN main() RETURNS Void -> - acc = Acc{ value: 0 } @shared:locked; - t0 = timestampMs(); + acc = Acc{ value: 0 } @shared:locked; + t0 = timestampMs(); - gen: ~?Int64[] = BG STREAM { - MUTABLE i: Int64 = 0; - WHILE i < 100000 DO - YIELD i; - i = i + 1; - END - }; + gen: ~?Int64[] = BG STREAM { + MUTABLE i = 0; + WHILE i < 100_000 DO + YIELD i; + i = i + 1; + END + }; - gen |> CONCURRENT(capacity: 64) EACH { - h = processItem(_); - WITH EXCLUSIVE acc AS a { - a.value = a.value %+ h; - } - }; - - WITH acc AS a { - checksum = a.value MOD 1000000000; - print("Checksum:", checksum); + gen |> CONCURRENT(capacity: 64) EACH { + h = processItem(_); + WITH EXCLUSIVE acc AS a { + a.value = a.value %+ h; } + }; + + WITH acc AS a { + checksum = a.value MOD 1_000_000_000; + print("Checksum:", checksum); + } - elapsed = timestampMs() - t0; - print("Items: 100000"); - print("Channel capacity: 64"); - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + elapsed = timestampMs() - t0; + print("Items: 100000"); + print("Channel capacity: 64"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/06_dynamic_spawn/bench.cht b/benchmarks/concurrent/06_dynamic_spawn/bench.cht index 6fc7bbabf..8e0efed9a 100644 --- a/benchmarks/concurrent/06_dynamic_spawn/bench.cht +++ b/benchmarks/concurrent/06_dynamic_spawn/bench.cht @@ -9,28 +9,28 @@ # CONCURRENT(workers: N) which avoids per-item spawning. FN doWork(seed: Int64) RETURNS Int64 -> - MUTABLE x: Int64 = seed; - FOR i IN (0_i64 ..< 10000) -> x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - RETURN x; + MUTABLE x = seed; + FOR i IN (0 ..< 10_000) -> x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + RETURN x; END FN main() RETURNS Void -> - t0 = timestampMs(); - MUTABLE futures: ~Int64[]@list = []; - FOR i IN (0_i64 ..< 100000) -> futures.append(BG { doWork(i); }); + t0 = timestampMs(); + MUTABLE futures: ~Int64[]@list = []; + FOR i IN (0 ..< 100_000) -> futures.append(BG {doWork(i);}); - MUTABLE total: Int64 = 0; - FOR j IN (0_i64 ..< 100000) DO - result = NEXT futures[j]; - total = total %+ result; - END - elapsed = timestampMs() - t0; + MUTABLE total = 0; + FOR j IN (0 ..< 100_000) DO + result = NEXT futures[j]; + total = total %+ result; + END + elapsed = timestampMs() - t0; - checksum = total MOD 1000000000; - print("Checksum: ${checksum.toString()}"); - print("Tasks: 100000"); - print("Iterations: 10000"); - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + checksum = total MOD 1_000_000_000; + print("Checksum: ${checksum.toString()}"); + print("Tasks: 100000"); + print("Iterations: 10000"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/07_stream_merge/bench.cht b/benchmarks/concurrent/07_stream_merge/bench.cht index 1fb841049..299eddbba 100644 --- a/benchmarks/concurrent/07_stream_merge/bench.cht +++ b/benchmarks/concurrent/07_stream_merge/bench.cht @@ -10,36 +10,60 @@ # Zero channel overhead, zero lock contention. FN main() RETURNS Void -> - t0 = timestampMs(); - # Create 8 producer streams (infinite — consumer controls iteration count) - s0: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 1; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s1: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 2; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s2: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 3; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s3: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 4; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s4: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 5; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s5: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 6; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s6: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 7; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; - s7: ~Int64[INF] = BG STREAM { MUTABLE x: Int64 = 8; WHILE TRUE DO x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; YIELD x; END }; + t0 = timestampMs(); + # Create 8 producer streams (infinite — consumer controls iteration count) + s0: ~Int64[INF] = BG STREAM {MUTABLE x = 1; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s1: ~Int64[INF] = BG STREAM {MUTABLE x = 2; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s2: ~Int64[INF] = BG STREAM {MUTABLE x = 3; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s3: ~Int64[INF] = BG STREAM {MUTABLE x = 4; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s4: ~Int64[INF] = BG STREAM {MUTABLE x = 5; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s5: ~Int64[INF] = BG STREAM {MUTABLE x = 6; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s6: ~Int64[INF] = BG STREAM {MUTABLE x = 7; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; + s7: ~Int64[INF] = BG STREAM {MUTABLE x = 8; WHILE TRUE DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + YIELD x; + END}; - # Consumer: round-robin merge from all 8 streams - MUTABLE total: Int64 = 0; - MUTABLE j: Int64 = 0; - WHILE j < 100000 DO - v0: Int64 = NEXT s0; v1: Int64 = NEXT s1; - v2: Int64 = NEXT s2; v3: Int64 = NEXT s3; - v4: Int64 = NEXT s4; v5: Int64 = NEXT s5; - v6: Int64 = NEXT s6; v7: Int64 = NEXT s7; - total = total %+ v0 %+ v1 %+ v2 %+ v3 %+ v4 %+ v5 %+ v6 %+ v7; - j += 1; - END + # Consumer: round-robin merge from all 8 streams + MUTABLE total = 0; + MUTABLE j = 0; + WHILE j < 100_000 DO + v0 = NEXT s0; v1 = NEXT s1; + v2 = NEXT s2; v3 = NEXT s3; + v4 = NEXT s4; v5 = NEXT s5; + v6 = NEXT s6; v7 = NEXT s7; + total = total %+ v0 %+ v1 %+ v2 %+ v3 %+ v4 %+ v5 %+ v6 %+ v7; + j += 1; + END - checksum = total MOD 1000000000; - print("Checksum:", checksum); - print("Producers: 8"); - print("Items per producer: 100000"); + checksum = total MOD 1_000_000_000; + print("Checksum:", checksum); + print("Producers: 8"); + print("Items per producer: 100000"); - elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + elapsed = timestampMs() - t0; + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/08_pubsub/bench.cht b/benchmarks/concurrent/08_pubsub/bench.cht index 40b63c4f9..cde66b802 100644 --- a/benchmarks/concurrent/08_pubsub/bench.cht +++ b/benchmarks/concurrent/08_pubsub/bench.cht @@ -9,46 +9,51 @@ # Each subscriber CLONEs an independent cursor — zero message copying. # Compare to Go/Rust: publisher sends to 64 separate channels (N copies). -STRUCT Msg { seed: Int64 } +STRUCT Msg { + seed: Int64 +} FN processMessage(seed: Int64) RETURNS Int64 -> - MUTABLE x: Int64 = seed; - FOR i IN (0_i64 ..< 2000) -> x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - RETURN x; + MUTABLE x = seed; + FOR i IN (0 ..< 2000) -> x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + RETURN x; END FN main() RETURNS Void -> - t0 = timestampMs(); + t0 = timestampMs(); - msgs: ~?Msg[] @split = BG STREAM { - FOR i IN (0 ..< 10000) -> YIELD Msg{ seed: i }; - }; + msgs: ~?Msg[]@split = BG STREAM { + FOR i IN (0 ..< 10_000) -> YIELD Msg{ seed: i }; + }; - MUTABLE futures: ~Int64[]@list = []; - FOR i IN (0 ..< 64) DO - subscriber_msgs: ~?Msg[] @split = CLONE msgs; - futures.append(BG { - @parallel -> - MUTABLE total: Int64 = 0; + MUTABLE futures: ~Int64[]@list = []; + FOR i IN (0 ..< 64) DO + subscriber_msgs: ~?Msg[]@split = CLONE msgs; + futures + .append( + BG { + @parallel -> + MUTABLE total = 0; WHILE NEXT subscriber_msgs AS m DO - total = total %+ processMessage(m.seed); + total = total %+ processMessage(m.seed); END total; - }); - END + } + ); + END - MUTABLE total: Int64 = 0; - FOR j IN (0 ..< 64) DO - result = NEXT futures[j]; - total = total %+ result; - END - elapsed = timestampMs() - t0; + MUTABLE total = 0; + FOR j IN (0 ..< 64) DO + result = NEXT futures[j]; + total = total %+ result; + END + elapsed = timestampMs() - t0; - checksum = total MOD 1000000000; - print("Checksum: ${checksum.toString()}"); - print("Messages: 10000"); - print("Subscribers: 64"); - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + checksum = total MOD 1_000_000_000; + print("Checksum: ${checksum.toString()}"); + print("Messages: 10000"); + print("Subscribers: 64"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/09_kvstore/bench.cht b/benchmarks/concurrent/09_kvstore/bench.cht index 24a03b103..f4768d9c7 100644 --- a/benchmarks/concurrent/09_kvstore/bench.cht +++ b/benchmarks/concurrent/09_kvstore/bench.cht @@ -28,37 +28,47 @@ # ========================================================================= FN hFunc(x: Float64, s: Float64) RETURNS Float64 -> - RETURN exp(0.0 - s * log(x)); + RETURN exp(0.0 - s * log(x)); END FN hInt(x: Float64, s: Float64) RETURNS Float64 -> - t = 1.0 - s; - IF abs(t) > 0.00000001 -> RETURN (x ** t - 1.0) / t; - RETURN log(x); + t = 1.0 - s; + IF abs(t) > 0.000_000_01 -> RETURN (x ** t - 1.0) / t; + RETURN log(x); END FN hIntInv(x: Float64, s: Float64) RETURNS Float64 -> - t = 1.0 - s; - IF abs(t) > 0.00000001 -> RETURN (t * x + 1.0) ** (1.0 / t); - RETURN exp(x); + t = 1.0 - s; + IF abs(t) > 0.000_000_01 -> RETURN (t * x + 1.0) ** (1.0 / t); + RETURN exp(x); END FN zipfNext!(MUTABLE state: Int64, n: Int64, s: Float64, hIntegral: Float64, hFraction: Float64) RETURNS Int64 -> - hIntHalf = hInt(0.5, s); - WHILE TRUE DO - state = state %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - MUTABLE uBits = state; - IF uBits < 0 THEN uBits = 0 - uBits; END - MUTABLE u = (uBits MOD 1000000000) / 1000000000.0; - u = hIntegral + u * (hIntHalf - hIntegral); - x = hIntInv(u, s); - MUTABLE k = toInt(x + 0.5); - IF k < 1 THEN k = 1; END - IF k > n THEN k = n; END - IF k - x <= hFraction THEN RETURN k - 1; END - IF u >= hInt(k + 0.5, s) - hFunc(k, s) THEN RETURN k - 1; END + hIntHalf = hInt(0.5, s); + WHILE TRUE DO + state = state %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + MUTABLE uBits = state; + IF uBits < 0 THEN + uBits = 0 - uBits; END - RETURN 0; + MUTABLE u = (uBits MOD 1_000_000_000) / 1_000_000_000.0; + u = hIntegral + u * (hIntHalf - hIntegral); + x = hIntInv(u, s); + MUTABLE k = (x + 0.5).toInt(); + IF k < 1 THEN + k = 1; + END + IF k > n THEN + k = n; + END + IF k - x <= hFraction THEN + RETURN k - 1; + END + IF u >= hInt(k + 0.5, s) - hFunc(k, s) THEN + RETURN k - 1; + END + END + RETURN 0; END # ========================================================================= @@ -66,122 +76,147 @@ END # ========================================================================= FN main() RETURNS Void -> - MUTABLE map: HashMap@shared:sharded(128):locked = {}; - n = 1000000; - workers = threadCount(); - chunk = n / workers; - zipfSkew = 1.0; + MUTABLE map: HashMap @shared:sharded(128):locked = {}; + n = 1_000_000; + workers = threadCount(); + chunk = n / workers; + zipfSkew = 1.0; - # Workload 1: Uniform SET - t0 = timestampMs(); - MUTABLE set_futures: ~Void[]@list = []; - MUTABLE wi: Int64 = 0; - WHILE wi < workers DO - start = wi * chunk; - cnt = chunk; - set_futures.append(BG { @parallel -> + # Workload 1: Uniform SET + t0 = timestampMs(); + MUTABLE set_futures: ~Void[]@list = []; + MUTABLE wi = 0; + WHILE wi < workers DO + start = wi * chunk; + cnt = chunk; + set_futures + .append( + BG { + @parallel -> FOR idx IN (start ..< start + cnt) DO - map["key:${idx.toString()}"] = "value-${idx.toString()}"; + map["key:${idx.toString()}"] = "value-${idx.toString()}"; END - }); - wi += 1; - END - FOR si IN (0_i64 ..< workers) -> NEXT set_futures[si]; - setMs = timestampMs() - t0; - # Workload 2: Uniform GET - t1 = timestampMs(); - MUTABLE get_futures: ~Int64[]@list = []; - wi = 0; - WHILE wi < workers DO - start = wi * chunk; - cnt = chunk; - get_futures.append(BG { @parallel -> - MUTABLE hits: Int64 = 0; + } + ); + wi += 1; + END + FOR si IN (0 ..< workers) -> NEXT set_futures[si]; + setMs = timestampMs() - t0; + + # Workload 2: Uniform GET + t1 = timestampMs(); + MUTABLE get_futures: ~Int64[]@list = []; + wi = 0; + WHILE wi < workers DO + start = wi * chunk; + cnt = chunk; + get_futures + .append( + BG { + @parallel -> + MUTABLE hits = 0; FOR idx IN (start ..< start + cnt) DO - got = map["key:${idx.toString()}"] OR ""; - IF got.length() > 0 THEN hits += 1; END + got = map["key:${idx.toString()}"] OR ""; + IF got.any?() THEN + hits += 1; + END END hits; - }); - wi += 1; - END - MUTABLE total_hits: Int64 = 0; - FOR gi IN (0_i64 ..< workers) -> total_hits += NEXT get_futures[gi]; - getMs = timestampMs() - t1; + } + ); + wi += 1; + END + MUTABLE total_hits = 0; + FOR gi IN (0 ..< workers) -> total_hits += NEXT get_futures[gi]; + getMs = timestampMs() - t1; - # Workload 3: Zipfian GET - hIntegral = hInt(n + 0.5, zipfSkew); - hFraction = hFunc(1.5, zipfSkew) - 1.0; - t2 = timestampMs(); - MUTABLE zipf_futures: ~Int64[]@list = []; - wi = 0; - WHILE wi < workers DO - seed = wi + 42; - cnt = chunk; - zipf_futures.append(BG { @parallel -> - MUTABLE hits: Int64 = 0; - MUTABLE state: Int64 = seed; - FOR idx IN (0_i64 ..< cnt) DO - k = zipfNext!(state, n, zipfSkew, hIntegral, hFraction); - state = state %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - got = map["key:${k.toString()}"] OR ""; - IF got.length() > 0 THEN hits += 1; END + # Workload 3: Zipfian GET + hIntegral = hInt(n + 0.5, zipfSkew); + hFraction = hFunc(1.5, zipfSkew) - 1.0; + t2 = timestampMs(); + MUTABLE zipf_futures: ~Int64[]@list = []; + wi = 0; + WHILE wi < workers DO + seed = wi + 42; + cnt = chunk; + zipf_futures + .append( + BG { + @parallel -> + MUTABLE hits = 0; + MUTABLE state = seed; + FOR idx IN (0 ..< cnt) DO + k = zipfNext!(state, n, zipfSkew, hIntegral, hFraction); + state = state %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + got = map["key:${k.toString()}"] OR ""; + IF got.any?() THEN + hits += 1; + END END hits; - }); - wi += 1; - END - MUTABLE zipf_hits: Int64 = 0; - FOR zi IN (0_i64 ..< workers) -> zipf_hits += NEXT zipf_futures[zi]; - zipfMs = timestampMs() - t2; + } + ); + wi += 1; + END + MUTABLE zipf_hits = 0; + FOR zi IN (0 ..< workers) -> zipf_hits += NEXT zipf_futures[zi]; + zipfMs = timestampMs() - t2; - # Workload 4: Mixed 80/20 (Zipfian) - t3 = timestampMs(); - MUTABLE mix_futures: ~Int64[]@list = []; - wi = 0; - WHILE wi < workers DO - seed = wi + 99; - cnt = chunk; - mix_futures.append(BG { @parallel -> - MUTABLE hits: Int64 = 0; - MUTABLE state: Int64 = seed; + # Workload 4: Mixed 80/20 (Zipfian) + t3 = timestampMs(); + MUTABLE mix_futures: ~Int64[]@list = []; + wi = 0; + WHILE wi < workers DO + seed = wi + 99; + cnt = chunk; + mix_futures + .append( + BG { + @parallel -> + MUTABLE hits = 0; + MUTABLE state = seed; MUTABLE val: String = ""; - FOR idx IN (0_i64 ..< cnt) DO - k = zipfNext!(state, n, zipfSkew, hIntegral, hFraction); - state = state %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - MUTABLE decision = state MOD 100; - IF decision < 0 THEN decision = 0 - decision; END - IF decision < 80 THEN - val = map["key:${k.toString()}"] OR ""; - IF val.length() > 0 THEN hits += 1; END - ELSE - map["key:${k.toString()}"] = "updated-${idx.toString()}"; + FOR idx IN (0 ..< cnt) DO + k = zipfNext!(state, n, zipfSkew, hIntegral, hFraction); + state = state %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + MUTABLE decision = state MOD 100; + IF decision < 0 THEN + decision = 0 - decision; + END + IF decision < 80 THEN + val = map["key:${k.toString()}"] OR ""; + IF val.any?() THEN + hits += 1; END + ELSE + map["key:${k.toString()}"] = "updated-${idx.toString()}"; + END END hits; - }); - wi += 1; - END - MUTABLE mix_hits: Int64 = 0; - FOR mi IN (0_i64 ..< workers) -> mix_hits += NEXT mix_futures[mi]; - mixMs = timestampMs() - t3; + } + ); + wi += 1; + END + MUTABLE mix_hits = 0; + FOR mi IN (0 ..< workers) -> mix_hits += NEXT mix_futures[mi]; + mixMs = timestampMs() - t3; - # Assert against workers*chunk, not n: integer division may drop a - # remainder when n doesn't evenly divide workers (e.g. @leak n=1000 with - # nproc=32 → chunk=31, 32*31=992 keys actually written, not 1000). - ASSERT total_hits == workers * chunk, "GET hits must equal SET count"; - ASSERT zipf_hits > 0, "Zipf hits must be > 0"; - ASSERT mix_hits > 0, "Mixed hits must be > 0"; + # Assert against workers*chunk, not n: integer division may drop a + # remainder when n doesn't evenly divide workers (e.g. @leak n=1000 with + # nproc=32 → chunk=31, 32*31=992 keys actually written, not 1000). + ASSERT total_hits == workers * chunk, "GET hits must equal SET count"; + ASSERT zipf_hits > 0, "Zipf hits must be > 0"; + ASSERT mix_hits > 0, "Mixed hits must be > 0"; - totalMs = setMs + getMs + zipfMs + mixMs; - print("BENCH_RESULT: ${totalMs.toString()} ms"); - print("Keys: ${n.toString()}"); - print("Workers: ${workers.toString()}"); - print("Set: ${setMs.toString()} ms"); - print("Get: ${getMs.toString()} ms"); - print("Zipf: ${zipfMs.toString()} ms"); - print("Mixed: ${mixMs.toString()} ms"); - print("Verified: yes"); - RETURN; + totalMs = setMs + getMs + zipfMs + mixMs; + print("BENCH_RESULT: ${totalMs.toString()} ms"); + print("Keys: ${n.toString()}"); + print("Workers: ${workers.toString()}"); + print("Set: ${setMs.toString()} ms"); + print("Get: ${getMs.toString()} ms"); + print("Zipf: ${zipfMs.toString()} ms"); + print("Mixed: ${mixMs.toString()} ms"); + print("Verified: yes"); + RETURN; END diff --git a/benchmarks/concurrent/10_shard_vs_locked/bench.cht b/benchmarks/concurrent/10_shard_vs_locked/bench.cht index e71cbc295..0349ac34c 100644 --- a/benchmarks/concurrent/10_shard_vs_locked/bench.cht +++ b/benchmarks/concurrent/10_shard_vs_locked/bench.cht @@ -18,40 +18,40 @@ # Compare to Go (sync.Map) and Rust (dashmap). FN main() RETURNS Void -> - MUTABLE map: HashMap@sharded(32) = {}; - n = 10000000; + MUTABLE map: HashMap @sharded(32) = {}; + n = 10_000_000; - # Workload 1: Uniform SET (1M keys, partitioned) - t0 = timestampMs(); - (0.. SHARD("key:" + toString(_), map) |> CONCURRENT(batch: 128) EACH { - map[_] = "value"; - }; - setMs = timestampMs() - t0; + # Workload 1: Uniform SET (1M keys, partitioned) + t0 = timestampMs(); + (0 ..< n) + |> SHARD("key:" + _.toString(), map) + |> CONCURRENT(batch: 128) EACH {map[_] = "value";}; + setMs = timestampMs() - t0; - # Workload 2: Uniform GET (1M keys, all local, 100% hit) - t1 = timestampMs(); - (0.. SHARD("key:" + toString(_), map) |> CONCURRENT(batch: 128) EACH { - got = map[_] OR ""; - }; - getMs = timestampMs() - t1; + # Workload 2: Uniform GET (1M keys, all local, 100% hit) + t1 = timestampMs(); + (0 ..< n) + |> SHARD("key:" + _.toString(), map) + |> CONCURRENT(batch: 128) EACH {got = map[_] OR "";}; + getMs = timestampMs() - t1; - # Workload 3: Mixed — 200K SET (overwrite) + 800K GET - t2 = timestampMs(); - (0..<(n / 5)) |> SHARD("key:" + toString(_), map) |> CONCURRENT(batch: 128) EACH { - map[_] = "updated"; - }; - (0..<((n / 5) * 4)) |> SHARD("key:" + toString(_), map) |> CONCURRENT(batch: 128) EACH { - got = map[_] OR ""; - }; - mixMs = timestampMs() - t2; + # Workload 3: Mixed — 200K SET (overwrite) + 800K GET + t2 = timestampMs(); + (0 ..< (n / 5)) + |> SHARD("key:" + _.toString(), map) + |> CONCURRENT(batch: 128) EACH {map[_] = "updated";}; + (0 ..< ((n / 5) * 4)) + |> SHARD("key:" + _.toString(), map) + |> CONCURRENT(batch: 128) EACH {got = map[_] OR "";}; + mixMs = timestampMs() - t2; - ASSERT map.count() > 0; - totalMs = setMs + getMs + mixMs; - print("BENCH_RESULT: ${totalMs.toString()} ms"); - print("Keys: ${n.toString()}"); - print("Shards: 32"); - print("Set: ${setMs.toString()} ms"); - print("Get: ${getMs.toString()} ms"); - print("Mixed: ${mixMs.toString()} ms"); - print("Verified: yes"); + ASSERT map.count() > 0; + totalMs = setMs + getMs + mixMs; + print("BENCH_RESULT: ${totalMs.toString()} ms"); + print("Keys: ${n.toString()}"); + print("Shards: 32"); + print("Set: ${setMs.toString()} ms"); + print("Get: ${getMs.toString()} ms"); + print("Mixed: ${mixMs.toString()} ms"); + print("Verified: yes"); END diff --git a/benchmarks/concurrent/10_shard_vs_locked/bench_compare.cht b/benchmarks/concurrent/10_shard_vs_locked/bench_compare.cht index 1a41febe9..cdd53dcb6 100644 --- a/benchmarks/concurrent/10_shard_vs_locked/bench_compare.cht +++ b/benchmarks/concurrent/10_shard_vs_locked/bench_compare.cht @@ -8,92 +8,88 @@ # The workload: 1M SET + 1M GET + 200K SET + 800K GET = 3M total ops. FN main() RETURNS Void -> - n = 1000000; - workers = 8; - chunk = n / workers; + n = 1_000_000; + workers = 8; + chunk = n / workers; - # ═══════════════════════════════════════════════════ - # Strategy 1: @sharded(8):locked (RwLock per shard) - # ═══════════════════════════════════════════════════ + # ═══════════════════════════════════════════════════ + # Strategy 1: @sharded(8):locked (RwLock per shard) + # ═══════════════════════════════════════════════════ - MUTABLE locked_map: HashMap@sharded(8):locked = {}; + MUTABLE locked_map: HashMap @sharded(8):locked = {}; - # SET 1M - MUTABLE set_futures: ~Void[]@list = []; - MUTABLE wi: Int64 = 0; - WHILE wi < workers DO - start = wi * chunk; - set_futures.append(BG { - FOR i IN (start ..< start + chunk) -> locked_map["key:" + toString(i)] = "value"; - }); - wi += 1; - END - FOR si IN (0_i64 ..< workers) -> NEXT set_futures[si]; + # SET 1M + MUTABLE set_futures: ~Void[]@list = []; + MUTABLE wi = 0; + WHILE wi < workers DO + start = wi * chunk; + set_futures + .append(BG {FOR i IN (start ..< start + chunk) -> locked_map["key:" + i.toString()] = "value";}); + wi += 1; + END + FOR si IN (0 ..< workers) -> NEXT set_futures[si]; - # GET 1M - MUTABLE get_futures: ~Void[]@list = []; - wi = 0; - WHILE wi < workers DO - start = wi * chunk; - get_futures.append(BG { - FOR i IN (start ..< start + chunk) -> MUTABLE got = locked_map["key:" + toString(i)] OR ""; - }); - wi += 1; - END - FOR gi IN (0_i64 ..< workers) -> NEXT get_futures[gi]; + # GET 1M + MUTABLE get_futures: ~Void[]@list = []; + wi = 0; + WHILE wi < workers DO + start = wi * chunk; + get_futures + .append(BG {FOR i IN (start ..< start + chunk) -> MUTABLE got = locked_map["key:" + i.toString()] OR "";}); + wi += 1; + END + FOR gi IN (0 ..< workers) -> NEXT get_futures[gi]; - # Mixed: 200K SET + 800K GET - MUTABLE mix_set_futures: ~Void[]@list = []; - wi = 0; - MUTABLE mix_chunk = (n / 5) / workers; - WHILE wi < workers DO - start = wi * mix_chunk; - mix_set_futures.append(BG { - FOR i IN (start ..< start + mix_chunk) -> locked_map["key:" + toString(i)] = "updated"; - }); - wi += 1; - END - FOR msi IN (0_i64 ..< workers) -> NEXT mix_set_futures[msi]; + # Mixed: 200K SET + 800K GET + MUTABLE mix_set_futures: ~Void[]@list = []; + wi = 0; + MUTABLE mix_chunk = (n / 5) / workers; + WHILE wi < workers DO + start = wi * mix_chunk; + mix_set_futures + .append(BG {FOR i IN (start ..< start + mix_chunk) -> locked_map["key:" + i.toString()] = "updated";}); + wi += 1; + END + FOR msi IN (0 ..< workers) -> NEXT mix_set_futures[msi]; - MUTABLE mix_get_futures: ~Void[]@list = []; - wi = 0; - MUTABLE mix_get_chunk = ((n / 5) * 4) / workers; - WHILE wi < workers DO - start = wi * mix_get_chunk; - mix_get_futures.append(BG { - FOR i IN (start ..< start + mix_get_chunk) -> MUTABLE got = locked_map["key:" + toString(i)] OR ""; - }); - wi += 1; - END - FOR mgi IN (0_i64 ..< workers) -> NEXT mix_get_futures[mgi]; + MUTABLE mix_get_futures: ~Void[]@list = []; + wi = 0; + MUTABLE mix_get_chunk = ((n / 5) * 4) / workers; + WHILE wi < workers DO + start = wi * mix_get_chunk; + mix_get_futures + .append(BG {FOR i IN (start ..< start + mix_get_chunk) -> MUTABLE got = locked_map["key:" + i.toString()] OR "";}); + wi += 1; + END + FOR mgi IN (0 ..< workers) -> NEXT mix_get_futures[mgi]; - ASSERT locked_map.count() == n; - print("locked: OK"); + ASSERT locked_map.count() == n; + print("locked: OK"); - # ═══════════════════════════════════════════════════ - # Strategy 2: @sharded(8) + SHARD (shared-nothing) - # ═══════════════════════════════════════════════════ + # ═══════════════════════════════════════════════════ + # Strategy 2: @sharded(8) + SHARD (shared-nothing) + # ═══════════════════════════════════════════════════ - MUTABLE sn_map: HashMap@sharded(8) = {}; + MUTABLE sn_map: HashMap @sharded(8) = {}; - # SET 1M - (0.. SHARD("key:" + toString(_), sn_map) |> CONCURRENT EACH { - sn_map[_] = "value"; - }; + # SET 1M + (0 ..< n) + |> SHARD("key:" + _.toString(), sn_map) + |> CONCURRENT EACH {sn_map[_] = "value";}; - # GET 1M - (0.. SHARD("key:" + toString(_), sn_map) |> CONCURRENT EACH { - got = sn_map[_] OR ""; - }; + # GET 1M + (0 ..< n) + |> SHARD("key:" + _.toString(), sn_map) + |> CONCURRENT EACH {got = sn_map[_] OR "";}; - # Mixed: 200K SET + 800K GET - (0..<(n / 5)) |> SHARD("key:" + toString(_), sn_map) |> CONCURRENT EACH { - sn_map[_] = "updated"; - }; - (0..<((n / 5) * 4)) |> SHARD("key:" + toString(_), sn_map) |> CONCURRENT EACH { - got = sn_map[_] OR ""; - }; + # Mixed: 200K SET + 800K GET + (0 ..< (n / 5)) + |> SHARD("key:" + _.toString(), sn_map) + |> CONCURRENT EACH {sn_map[_] = "updated";}; + (0 ..< ((n / 5) * 4)) + |> SHARD("key:" + _.toString(), sn_map) + |> CONCURRENT EACH {got = sn_map[_] OR "";}; - ASSERT sn_map.count() == n; - print("shared-nothing: OK"); + ASSERT sn_map.count() == n; + print("shared-nothing: OK"); END diff --git a/benchmarks/concurrent/11_parallel_aggregation/bench.cht b/benchmarks/concurrent/11_parallel_aggregation/bench.cht index f63d70bfb..6a45ef880 100644 --- a/benchmarks/concurrent/11_parallel_aggregation/bench.cht +++ b/benchmarks/concurrent/11_parallel_aggregation/bench.cht @@ -15,54 +15,56 @@ # No I/O — pure parallel compute. FN lcg(state: Int64) RETURNS Int64 -> - RETURN state %* 6364136223846793005_i64 %+ 1442695040888963407_i64; + RETURN state %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; END FN absInt(x: Int64) RETURNS Int64 -> - IF x < 0_i64 THEN RETURN 0_i64 - x; END - RETURN x; + IF x < 0 THEN + RETURN 0 - x; + END + RETURN x; END FN main() RETURNS Void -> - n = 10000000; - buckets = 1000; + n = 10_000_000; + buckets = 1000; - # Pre-compute seeds (LCG is sequential so we must pre-compute). - MUTABLE seeds: Int64[]@list = List[]; - MUTABLE seed: Int64 = 42; - FOR i IN (0_i64 ..< n) DO - seed = lcg(seed); - seeds.append(seed); - END + # Pre-compute seeds (LCG is sequential so we must pre-compute). + MUTABLE seeds: Int64[]@list = List[]; + MUTABLE seed = 42; + FOR i IN (0 ..< n) DO + seed = lcg(seed); + seeds.append(seed); + END - # Phase 1: Build histogram via SHARD with integer keys — zero string allocation. - t0 = timestampMs(); - MUTABLE counts: HashMap@sharded(32) = {}; - (0.. SHARD(absInt(seeds[_]) MOD buckets, counts) |> CONCURRENT(batch: 512) EACH { - counts[_] = (counts[_] OR 0_i64) + 1_i64; - }; - shardMs = timestampMs() - t0; + # Phase 1: Build histogram via SHARD with integer keys — zero string allocation. + t0 = timestampMs(); + MUTABLE counts: HashMap @sharded(32) = {}; + (0 ..< n) + |> SHARD(absInt(seeds[_]) MOD buckets, counts) + |> CONCURRENT(batch: 512) EACH {counts[_] = (counts[_] OR 0) + 1;}; + shardMs = timestampMs() - t0; - # Phase 2: Stats over histogram values (partial aggregation) - t1 = timestampMs(); - vals = counts.values(); - total = vals |> CONCURRENT SUM toFloat(_); - highest = vals |> CONCURRENT MAX toFloat(_); - lowest = vals |> CONCURRENT MIN toFloat(_); - average = vals |> CONCURRENT AVERAGE toFloat(_); - statsMs = timestampMs() - t1; + # Phase 2: Stats over histogram values (partial aggregation) + t1 = timestampMs(); + vals = counts.values(); + total = vals |> CONCURRENT SUM _.toFloat(); + highest = vals |> CONCURRENT MAX _.toFloat(); + lowest = vals |> CONCURRENT MIN _.toFloat(); + average = vals |> CONCURRENT AVERAGE _.toFloat(); + statsMs = timestampMs() - t1; - ASSERT total == toFloat(n), "total mismatch"; + ASSERT total == n.toFloat(), "total mismatch"; - totalMs = shardMs + statsMs; - print("BENCH_RESULT: ${totalMs.toString()} ms"); - print("Events: ${n.toString()}"); - print("Buckets: ${buckets.toString()}"); - print("Shard histogram: ${shardMs.toString()} ms"); - print("Aggregation: ${statsMs.toString()} ms"); - print("Total: ${total.toString()}"); - print("Max: ${highest.toString()}"); - print("Min: ${lowest.toString()}"); - print("Avg: ${average.toString()}"); - print("Verified: yes"); + totalMs = shardMs + statsMs; + print("BENCH_RESULT: ${totalMs.toString()} ms"); + print("Events: ${n.toString()}"); + print("Buckets: ${buckets.toString()}"); + print("Shard histogram: ${shardMs.toString()} ms"); + print("Aggregation: ${statsMs.toString()} ms"); + print("Total: ${total.toString()}"); + print("Max: ${highest.toString()}"); + print("Min: ${lowest.toString()}"); + print("Avg: ${average.toString()}"); + print("Verified: yes"); END diff --git a/benchmarks/concurrent/12_false_sharing/bench.cht b/benchmarks/concurrent/12_false_sharing/bench.cht index 778f2ac87..b700f6835 100644 --- a/benchmarks/concurrent/12_false_sharing/bench.cht +++ b/benchmarks/concurrent/12_false_sharing/bench.cht @@ -12,51 +12,55 @@ # value is its own heap allocation with a control block, guaranteeing # cache line isolation without manual padding. -STRUCT Counter { value: Int64 } +STRUCT Counter { + value: Int64 +} FN main() RETURNS Void -> - workers = threadCount(); - totalWork = 40000000_i64; - increments = totalWork / workers; - - # Each worker gets its own @shared:locked counter (separate heap alloc) - MUTABLE counters: Counter@shared:locked[]@list = []; - FOR i IN (0_i64 ..< workers) DO - counters.append(Counter{ value: 0 } @shared:locked); - END - - t0 = timestampMs(); - - MUTABLE futures: ~Void[]@list = []; - MUTABLE wi = 0_i64; - WHILE wi < workers DO - ref = counters[wi]; - futures.append(BG { @standard:@parallel -> - FOR j IN (0_i64 ..< increments) DO - WITH EXCLUSIVE ref AS inner { - inner.value = inner.value + 1; - } - END - }); - wi += 1; - END - - FOR i IN (0_i64 ..< workers) -> NEXT futures[i]; - elapsed = timestampMs() - t0; - - # Verify - MUTABLE total = 0_i64; - FOR i IN (0_i64 ..< workers) DO - ref = counters[i]; - WITH ref AS inner { - total = total + inner.value; - } - END - expected = workers * increments; - ASSERT total == expected, "total mismatch"; - - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("False-sharing (${workers.toString()} threads x ${increments.toString()} iters)"); - print(" @shared:locked (no false sharing): ${elapsed.toString()} ms"); - print(" Total: ${total.toString()}"); + workers = threadCount(); + totalWork = 40_000_000; + increments = totalWork / workers; + + # Each worker gets its own @shared:locked counter (separate heap alloc) + MUTABLE counters: Counter@shared:locked[]@list = []; + FOR i IN (0 ..< workers) DO + counters.append(Counter{ value: 0 } @shared:locked); + END + + t0 = timestampMs(); + + MUTABLE futures: ~Void[]@list = []; + MUTABLE wi = 0; + WHILE wi < workers DO + ref = counters[wi]; + futures + .append( + BG { + @standard:@parallel -> FOR j IN (0 ..< increments) DO + WITH EXCLUSIVE ref AS inner {inner.value = inner.value + 1;} + END + + } + ); + wi += 1; +END + +FOR i IN (0 ..< workers) -> NEXT futures[i]; +elapsed = timestampMs() - t0; + +# Verify +MUTABLE total = 0; +FOR i IN (0 ..< workers) DO + ref = counters[i]; + WITH ref AS inner { + total = total + inner.value; + } +END +expected = workers * increments; +ASSERT total == expected, "total mismatch"; + +print("BENCH_RESULT: ${elapsed.toString()} ms"); +print("False-sharing (${workers.toString()} threads x ${increments.toString()} iters)"); +print(" @shared:locked (no false sharing): ${elapsed.toString()} ms"); +print(" Total: ${total.toString()}"); END diff --git a/benchmarks/concurrent/13_rwlock_starvation/bench.cht b/benchmarks/concurrent/13_rwlock_starvation/bench.cht index 6829600c4..a09164253 100644 --- a/benchmarks/concurrent/13_rwlock_starvation/bench.cht +++ b/benchmarks/concurrent/13_rwlock_starvation/bench.cht @@ -11,82 +11,96 @@ # CLEAR uses pthread_rwlock_t with PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP. # Writer-preferring: new readers block once a writer is waiting. No starvation. -STRUCT Counter { value: Int64 } -STRUCT WriterResult { doneMs: Int64, avgWaitMs: Int64, maxWaitMs: Int64 } +STRUCT Counter { + value: Int64 +} +STRUCT WriterResult { + doneMs: Int64, + avgWaitMs: Int64, + maxWaitMs: Int64 +} FN busyWork(n: Int64) RETURNS Int64 -> - MUTABLE acc = 0_i64; - MUTABLE i = 0_i64; - WHILE i < n DO - acc = acc + i; - i += 1; - END - RETURN acc; + MUTABLE acc = 0; + MUTABLE i = 0; + WHILE i < n DO + acc = acc + i; + i += 1; + END + RETURN acc; END FN main() RETURNS Void -> - nCPU = threadCount(); - MUTABLE nReaders = nCPU - 1; - IF nReaders < 1 THEN nReaders = 1; END + nCPU = threadCount(); + MUTABLE nReaders = nCPU - 1; + IF nReaders < 1 THEN + nReaders = 1; + END - readIters = 2000000_i64; - writeIters = 1000_i64; - workPerOp = 100_i64; + readIters = 2_000_000; + writeIters = 1000; + workPerOp = 100; - c = Counter{ value: 0 } @shared:writeLocked; + c = Counter{ value: 0 } @shared:writeLocked; - t0 = timestampMs(); + t0 = timestampMs(); - # Spawn readers - MUTABLE readers: ~Void[]@list = []; - FOR r IN (0_i64 ..< nReaders) DO - readers.append(BG { @parallel -> - MUTABLE localSink = 0_i64; - FOR i IN (0_i64 ..< readIters) DO - WITH c AS inner { - localSink = localSink + inner.value; - localSink = localSink + busyWork(workPerOp); - } - END - }); - END + # Spawn readers + MUTABLE readers: ~Void[]@list = []; + FOR r IN (0 ..< nReaders) DO + readers + .append( + BG { + @parallel -> MUTABLE localSink = 0; + FOR i IN (0 ..< readIters) DO + WITH c AS inner {localSink = localSink + inner.value; localSink = localSink + busyWork( + workPerOp + );} + END - # Spawn writer - measure per-write latency - writer = BG { @parallel -> - MUTABLE maxWaitMs = 0_i64; - MUTABLE totalWaitMs = 0_i64; - MUTABLE localSink = 0_i64; - FOR i IN (0_i64 ..< writeIters) DO - wt0 = timestampMs(); - WITH EXCLUSIVE c AS inner { - waited = timestampMs() - wt0; - IF waited > maxWaitMs THEN maxWaitMs = waited; END - totalWaitMs = totalWaitMs + waited; - inner.value = inner.value + 1; - localSink = localSink + busyWork(workPerOp); - } + } + ); +END + +# Spawn writer - measure per-write latency +writer = BG { + @parallel -> + MUTABLE maxWaitMs = 0; + MUTABLE totalWaitMs = 0; + MUTABLE localSink = 0; + FOR i IN (0 ..< writeIters) DO + wt0 = timestampMs(); + WITH EXCLUSIVE c AS inner { + waited = timestampMs() - wt0; + IF waited > maxWaitMs THEN + maxWaitMs = waited; END - writerDone = timestampMs() - t0; - avgWaitMs = totalWaitMs / writeIters; - WriterResult{ doneMs: writerDone, avgWaitMs: avgWaitMs, maxWaitMs: maxWaitMs }; - }; + totalWaitMs = totalWaitMs + waited; + inner.value = inner.value + 1; + localSink = localSink + busyWork(workPerOp); + } + END + writerDone = timestampMs() - t0; + avgWaitMs = totalWaitMs / writeIters; + WriterResult{ doneMs: writerDone, avgWaitMs: avgWaitMs, maxWaitMs: maxWaitMs }; +}; - # Wait for all - FOR r IN (0_i64 ..< nReaders) -> NEXT readers[r]; - result: WriterResult = NEXT writer; - elapsed = timestampMs() - t0; +# Wait for all +FOR r IN (0 ..< nReaders) -> NEXT readers[r]; +result = NEXT writer; +elapsed = timestampMs() - t0; - # Read final value - MUTABLE finalVal = 0_i64; - WITH c AS inner { - finalVal = inner.value; - } +# Read final value +MUTABLE finalVal = 0; +WITH c AS inner { + finalVal = inner.value; +} - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("RwLock starvation (${nReaders.toString()} readers x ${readIters.toString()} iters)"); - print(" Total time: ${elapsed.toString()} ms"); - print(" Writer done: ${result.doneMs.toString()} ms"); - print(" Avg write wait: ${result.avgWaitMs.toString()} ms"); - print(" Max write wait: ${result.maxWaitMs.toString()} ms"); - print(" Final value: ${finalVal.toString()}"); +print("BENCH_RESULT: ${elapsed.toString()} ms"); +print("RwLock starvation (${nReaders.toString()} readers x ${readIters.toString()} iters)"); +print(" Total time: ${elapsed.toString()} ms"); +print(" Writer done: ${result.doneMs.toString()} ms"); +print(" Avg write wait: ${result.avgWaitMs.toString()} ms"); +print(" Max write wait: ${result.maxWaitMs.toString()} ms"); +print(" Final value: ${finalVal.toString()}"); END diff --git a/benchmarks/concurrent/14_nested_lock/bench.cht b/benchmarks/concurrent/14_nested_lock/bench.cht index 96a6f441b..0621e792f 100644 --- a/benchmarks/concurrent/14_nested_lock/bench.cht +++ b/benchmarks/concurrent/14_nested_lock/bench.cht @@ -8,85 +8,103 @@ # across fibers (read lock only -- list is immutable after setup). # Individual accounts are @shared:locked (Arc) for transfer mutations. -STRUCT Account { balance: Int64 } -STRUCT Bank { accounts: Account@shared:locked[]@list } +STRUCT Account { + balance: Int64 +} +STRUCT Bank { + accounts: Account@shared:locked[]@list +} FN main() RETURNS Void -> - MUTABLE nWorkers = threadCount(); - IF nWorkers < 2 THEN nWorkers = 2; END - numAccounts = 64_i64; - opsPerWorker = 500000_i64; + MUTABLE nWorkers = threadCount(); + IF nWorkers < 2 THEN + nWorkers = 2; + END + numAccounts = 64; + opsPerWorker = 500_000; - MUTABLE accts: Account@shared:locked[]@list = []; - FOR i IN (0_i64 ..< numAccounts) DO - accts.append(Account{ balance: 1000 } @shared:locked); - END - bank = Bank{ accounts: accts } @shared:writeLocked; + MUTABLE accts: Account@shared:locked[]@list = []; + FOR i IN (0 ..< numAccounts) DO + accts.append(Account{ balance: 1000 } @shared:locked); + END + bank = Bank{ accounts: accts } @shared:writeLocked; - t0 = timestampMs(); + t0 = timestampMs(); - MUTABLE futures: ~Void[]@list = []; - MUTABLE w = 0_i64; - WHILE w < nWorkers DO - futures.append(BG { @parallel -> - FOR i IN (0_i64 ..< opsPerWorker) DO - MUTABLE a = randomInt(numAccounts); - MUTABLE b = randomInt(numAccounts); + MUTABLE futures: ~Void[]@list = []; + MUTABLE w = 0; + WHILE w < nWorkers DO + futures + .append( + BG { + @parallel -> + FOR i IN (0 ..< opsPerWorker) DO + MUTABLE a = randomInt(numAccounts); + MUTABLE b = randomInt(numAccounts); - IF a == b THEN b = (a + 1) MOD numAccounts; END + IF a == b THEN + b = (a + 1) MOD numAccounts; + END - # Order by index to prevent deadlock - MUTABLE lo = a; - MUTABLE hi = b; - IF a > b THEN lo = b; hi = a; END + # Order by index to prevent deadlock + MUTABLE lo = a; + MUTABLE hi = b; + IF a > b THEN + lo = b; + hi = a; + END - # Read lock on bank to access accounts list, - # then exclusive locks on both accounts for transfer. - WITH bank AS bk { - loAcct = bk.accounts[lo]; - hiAcct = bk.accounts[hi]; - WITH EXCLUSIVE loAcct AS la, EXCLUSIVE hiAcct AS ha { - IF la.balance > 0 THEN - la.balance = la.balance - 1; - ha.balance = ha.balance + 1; - END - } - } + # Read lock on bank to access accounts list, + # then exclusive locks on both accounts for transfer. + WITH bank AS bk { + loAcct = bk.accounts[lo]; + hiAcct = bk.accounts[hi]; + WITH + EXCLUSIVE loAcct AS la, + EXCLUSIVE hiAcct AS ha { + IF la.balance > 0 THEN + la.balance = la.balance - 1; + ha.balance = ha.balance + 1; + END + } + } END - }); - w += 1; - END - FOR i IN (0_i64 ..< nWorkers) -> NEXT futures[i]; - elapsed = timestampMs() - t0; + } + ); + w += 1; + END + + FOR i IN (0 ..< nWorkers) -> NEXT futures[i]; + elapsed = timestampMs() - t0; - # Verify conservation. Snapshot the per-account Arc handles out of - # the bank's read lock first so the per-account WITH at the bottom - # isn't lexically nested under WITH bank. (The naked-nested-WITH - # lint conservatively flags any single-binding inner WITH while a - # different binding is held; in this verification path workers have - # already joined so no symmetric caller exists, but the lint can't - # prove that. The snapshot pattern sidesteps the warning while - # preserving the read-then-per-element-lock semantics.) - acct_refs: Account@shared:locked[]@list = []; - WITH bank AS bk { - FOR i IN (0_i64 ..< numAccounts) -> acct_refs.append(bk.accounts[i]); + # Verify conservation. Snapshot the per-account Arc handles out of + # the bank's read lock first so the per-account WITH at the bottom + # isn't lexically nested under WITH bank. (The naked-nested-WITH + # lint conservatively flags any single-binding inner WITH while a + # different binding is held; in this verification path workers have + # already joined so no symmetric caller exists, but the lint can't + # prove that. The snapshot pattern sidesteps the warning while + # preserving the read-then-per-element-lock semantics.) + acct_refs: Account@shared:locked[]@list = []; + WITH bank AS bk { + FOR i IN (0 ..< numAccounts) -> acct_refs.append(bk.accounts[i]); + } + MUTABLE total = 0; + FOR i IN (0 ..< numAccounts) DO + a = acct_refs[i]; + WITH a AS inner { + total = total + inner.balance; } - MUTABLE total = 0_i64; - FOR i IN (0_i64 ..< numAccounts) DO - a = acct_refs[i]; - WITH a AS inner { - total = total + inner.balance; - } - END - expected = numAccounts * 1000; - ASSERT total == expected, "balance mismatch"; - totalOps = nWorkers * opsPerWorker; - opsPerSec = totalOps * 1000 / elapsed; + END + expected = numAccounts * 1000; + ASSERT total == expected, "balance mismatch"; + totalOps = nWorkers * opsPerWorker; + opsPerSec = totalOps * 1000 / elapsed; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Nested lock (${nWorkers.toString()} workers x ${opsPerWorker.toString()} ops)"); - print(" Total time: ${elapsed.toString()} ms"); - print(" Ops/sec: ${opsPerSec.toString()}"); - print(" Balance: ${total.toString()} (expected ${expected.toString()})"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Nested lock (${nWorkers.toString()} workers x ${opsPerWorker.toString()} ops)"); + print(" Total time: ${elapsed.toString()} ms"); + print(" Ops/sec: ${opsPerSec.toString()}"); + print(" Balance: ${total.toString()} (expected ${expected.toString()})"); END diff --git a/benchmarks/concurrent/16_observables/bench.cht b/benchmarks/concurrent/16_observables/bench.cht index d04ee88e9..482e21f87 100644 --- a/benchmarks/concurrent/16_observables/bench.cht +++ b/benchmarks/concurrent/16_observables/bench.cht @@ -13,28 +13,28 @@ # one producer, one consumer sum, and a join. FN main() RETURNS Void -> - n_writes: Int64 = 2_000_000_i64; - expected: Int64 = (n_writes * (n_writes - 1_i64)) / 2_i64; + n_writes = 2_000_000; + expected = (n_writes * (n_writes - 1)) / 2; - gen: ~?Int64[] = BG STREAM { - MUTABLE i: Int64 = 0_i64; - WHILE i < n_writes DO - YIELD i; - i = i + 1_i64; - END - }; + gen: ~?Int64[] = BG STREAM { + MUTABLE i = 0; + WHILE i < n_writes DO + YIELD i; + i = i + 1; + END + }; - t0 = timestampMs(); - running: ~Int64@observable = gen |> SUM _; - final = NEXT running; - elapsed = timestampMs() - t0; - checksum: Int64 = final + (n_writes * 131_i64); - expected_checksum: Int64 = expected + (n_writes * 131_i64); + t0 = timestampMs(); + running: ~Int64@observable = gen |> SUM _; + final = NEXT running; + elapsed = timestampMs() - t0; + checksum = final + (n_writes * 131); + expected_checksum = expected + (n_writes * 131); - ASSERT final == expected, "final mismatch"; - ASSERT checksum == expected_checksum, "checksum mismatch"; - print("CLEAR observable: ", final, " (sum 0..N-1) in ", elapsed, " ms"); - print("BENCH_INFO: CLEAR stream_sum final=", final, " checksum=", checksum, " n=", n_writes); - print("BENCH_RESULT: ", elapsed, " ms"); - RETURN; + ASSERT final == expected, "final mismatch"; + ASSERT checksum == expected_checksum, "checksum mismatch"; + print("CLEAR observable: ", final, " (sum 0..N-1) in ", elapsed, " ms"); + print("BENCH_INFO: CLEAR stream_sum final=", final, " checksum=", checksum, " n=", n_writes); + print("BENCH_RESULT: ", elapsed, " ms"); + RETURN; END diff --git a/benchmarks/concurrent/18_atomic_counter/bench.cht b/benchmarks/concurrent/18_atomic_counter/bench.cht index 500004f68..92e2768be 100644 --- a/benchmarks/concurrent/18_atomic_counter/bench.cht +++ b/benchmarks/concurrent/18_atomic_counter/bench.cht @@ -21,41 +21,44 @@ # should NOT yield, but if the compiler injects a yield-budget # check the cost shows up here). -FN bump_loop!(MUTABLE c: Int64, n: Int64) RETURNS Void REQUIRES c: ATOMIC -> - MUTABLE i: Int64 = 0; - WHILE i < n DO - c += 1; - i = i + 1; - END - RETURN; +FN bump_loop!(MUTABLE c: Int64, n: Int64) + RETURNS Void + REQUIRES c: ATOMIC +-> + MUTABLE i = 0; + WHILE i < n DO + c += 1; + i = i + 1; + END + RETURN; END FN main() RETURNS Void -> - MUTABLE c: Int64 = 0 @shared:atomic; + MUTABLE c: Int64 = 0 @shared:atomic; - t0 = timestampMs(); - bg1: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg2: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg3: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg4: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg5: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg6: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg7: ~Void = BG { bump_loop!(c, 1000000_i64); }; - bg8: ~Void = BG { bump_loop!(c, 1000000_i64); }; - NEXT bg1; - NEXT bg2; - NEXT bg3; - NEXT bg4; - NEXT bg5; - NEXT bg6; - NEXT bg7; - NEXT bg8; - elapsed = timestampMs() - t0; + t0 = timestampMs(); + bg1: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg2: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg3: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg4: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg5: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg6: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg7: ~Void = BG {bump_loop!(c, 1_000_000);}; + bg8: ~Void = BG {bump_loop!(c, 1_000_000);}; + NEXT bg1; + NEXT bg2; + NEXT bg3; + NEXT bg4; + NEXT bg5; + NEXT bg6; + NEXT bg7; + NEXT bg8; + elapsed = timestampMs() - t0; - final = c; - ASSERT final == 8000000_i64, "expected 8000000 (no lost updates)"; - print("Counter: ${final.toString()}"); - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + final = c; + ASSERT final == 8_000_000, "expected 8000000 (no lost updates)"; + print("Counter: ${final.toString()}"); + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/concurrent/19_atomic_ptr/bench.cht b/benchmarks/concurrent/19_atomic_ptr/bench.cht index b88bf6dc2..713af2a05 100644 --- a/benchmarks/concurrent/19_atomic_ptr/bench.cht +++ b/benchmarks/concurrent/19_atomic_ptr/bench.cht @@ -28,134 +28,162 @@ # ballpark as Versioned.update (which retries up to MAX_UPDATE_RETRIES # before surfacing Conflict; the inner retry loop is identical shape). -STRUCT Counter { a: Int64, b: Int64 } +STRUCT Counter { + a: Int64, + b: Int64 +} FN main() RETURNS Void -> - n_readers: Int64 = 16_i64; - reads_per: Int64 = 50000_i64; - writes: Int64 = 5000_i64; - - # ───── Variant A: @shared:writeLocked (RwLock) ───── - MUTABLE c_rw = Counter{ a: 0_i64, b: 0_i64 } @shared:writeLocked; - - t0_rw = timestampMs(); - MUTABLE rfutures_rw: ~Int64[]@list = []; - FOR i IN (0_i64 ..< n_readers) DO - rfutures_rw.append(BG { @parallel -> - MUTABLE s: Int64 = 0_i64; - MUTABLE j: Int64 = 0_i64; - MUTABLE viol: Int64 = 0_i64; + n_readers = 16; + reads_per = 50_000; + writes = 5000; + + # ───── Variant A: @shared:writeLocked (RwLock) ───── + MUTABLE c_rw = Counter{ a: 0, b: 0 } @shared:writeLocked; + + t0_rw = timestampMs(); + MUTABLE rfutures_rw: ~Int64[]@list = []; + FOR i IN (0 ..< n_readers) DO + rfutures_rw + .append( + BG { + @parallel -> + MUTABLE s = 0; + MUTABLE j = 0; + MUTABLE viol = 0; WHILE j < reads_per DO - WITH c_rw AS v { - s = s + v.a; - IF v.b != v.a * 2_i64 THEN viol = viol + 1_i64; END - } - j = j + 1_i64; + WITH c_rw AS v { + s = s + v.a; + IF v.b != v.a * 2 THEN + viol = viol + 1; + END + } + j = j + 1; END viol; - }); - END - - w_rw: ~Void = BG { @parallel -> - MUTABLE k: Int64 = 0_i64; - WHILE k < writes DO - WITH EXCLUSIVE c_rw AS v { - v.a = v.a + 1_i64; - v.b = (v.a) * 2_i64; - } - k = k + 1_i64; - END - }; - - NEXT w_rw; - MUTABLE total_viol_rw: Int64 = 0_i64; - FOR i IN (0_i64 ..< n_readers) DO - total_viol_rw = total_viol_rw + NEXT rfutures_rw[i]; - END - elapsed_rw = timestampMs() - t0_rw; - - # ───── Variant B: @shared:versioned (MVCC) ───── - MUTABLE c_v = Counter{ a: 0_i64, b: 0_i64 } @shared:versioned; - - t0_v = timestampMs(); - MUTABLE rfutures_v: ~Int64[]@list = []; - FOR i IN (0_i64 ..< n_readers) DO - rfutures_v.append(BG { @parallel -> - MUTABLE s: Int64 = 0_i64; - MUTABLE j: Int64 = 0_i64; - MUTABLE viol: Int64 = 0_i64; + } + ); + END + + w_rw: ~Void = BG { + @parallel -> + MUTABLE k = 0; + WHILE k < writes DO + WITH EXCLUSIVE c_rw AS v { + v.a = v.a + 1; + v.b = (v.a) * 2; + } + k = k + 1; + END + + }; + + NEXT w_rw; + MUTABLE total_viol_rw = 0; + FOR i IN (0 ..< n_readers) DO + total_viol_rw = total_viol_rw + NEXT rfutures_rw[i]; + END + elapsed_rw = timestampMs() - t0_rw; + + # ───── Variant B: @shared:versioned (MVCC) ───── + MUTABLE c_v = Counter{ a: 0, b: 0 } @shared:versioned; + + t0_v = timestampMs(); + MUTABLE rfutures_v: ~Int64[]@list = []; + FOR i IN (0 ..< n_readers) DO + rfutures_v + .append( + BG { + @parallel -> + MUTABLE s = 0; + MUTABLE j = 0; + MUTABLE viol = 0; WHILE j < reads_per DO - WITH SNAPSHOT c_v AS v { - s = s + v.a; - IF v.b != v.a * 2_i64 THEN viol = viol + 1_i64; END - } - j = j + 1_i64; + WITH SNAPSHOT c_v AS v { + s = s + v.a; + IF v.b != v.a * 2 THEN + viol = viol + 1; + END + } + j = j + 1; END viol; - }); - END - - w_v: ~Void = BG { @parallel -> - MUTABLE k: Int64 = 0_i64; - WHILE k < writes DO - WITH SNAPSHOT c_v AS MUTABLE v { - v.a = v.a + 1_i64; - v.b = (v.a) * 2_i64; - } ON MvccConflict RAISE - k = k + 1_i64; - END - }; - - NEXT w_v; - MUTABLE total_viol_v: Int64 = 0_i64; - FOR i IN (0_i64 ..< n_readers) DO - total_viol_v = total_viol_v + NEXT rfutures_v[i]; - END - elapsed_v = timestampMs() - t0_v; - - # ───── Variant C: @indirect:atomic (M3 AtomicPtr) ───── - MUTABLE c_ap = Counter{ a: 0_i64, b: 0_i64 } @indirect:atomic; - - t0_ap = timestampMs(); - MUTABLE rfutures_ap: ~Int64[]@list = []; - FOR i IN (0_i64 ..< n_readers) DO - rfutures_ap.append(BG { @parallel -> - MUTABLE s: Int64 = 0_i64; - MUTABLE j: Int64 = 0_i64; - MUTABLE viol: Int64 = 0_i64; + } + ); + END + + w_v: ~Void = BG { + @parallel -> + MUTABLE k = 0; + WHILE k < writes DO + WITH SNAPSHOT c_v AS MUTABLE v { + v.a = v.a + 1; + v.b = (v.a) * 2; + } + ON MvccConflict RAISE + k = k + 1; + END + + }; + + NEXT w_v; + MUTABLE total_viol_v = 0; + FOR i IN (0 ..< n_readers) DO + total_viol_v = total_viol_v + NEXT rfutures_v[i]; + END + elapsed_v = timestampMs() - t0_v; + + # ───── Variant C: @indirect:atomic (M3 AtomicPtr) ───── + MUTABLE c_ap = Counter{ a: 0, b: 0 } @indirect:atomic; + + t0_ap = timestampMs(); + MUTABLE rfutures_ap: ~Int64[]@list = []; + FOR i IN (0 ..< n_readers) DO + rfutures_ap + .append( + BG { + @parallel -> + MUTABLE s = 0; + MUTABLE j = 0; + MUTABLE viol = 0; WHILE j < reads_per DO - WITH SNAPSHOT c_ap AS v { - s = s + v.a; - IF v.b != v.a * 2_i64 THEN viol = viol + 1_i64; END - } - j = j + 1_i64; + WITH SNAPSHOT c_ap AS v { + s = s + v.a; + IF v.b != v.a * 2 THEN + viol = viol + 1; + END + } + j = j + 1; END viol; - }); - END - - w_ap: ~Void = BG { @parallel -> - MUTABLE k: Int64 = 0_i64; - WHILE k < writes DO - WITH SNAPSHOT c_ap AS MUTABLE v { - v.a = v.a + 1_i64; - v.b = (v.a) * 2_i64; - } - k = k + 1_i64; - END - }; - - NEXT w_ap; - MUTABLE total_viol_ap: Int64 = 0_i64; - FOR i IN (0_i64 ..< n_readers) DO - total_viol_ap = total_viol_ap + NEXT rfutures_ap[i]; - END - elapsed_ap = timestampMs() - t0_ap; - - # ───── Report ───── - print("RwLock : " + elapsed_rw.toString() + " ms (violations: " + total_viol_rw.toString() + ")"); - print("MVCC : " + elapsed_v.toString() + " ms (violations: " + total_viol_v.toString() + ")"); - print("AtomicPtr : " + elapsed_ap.toString() + " ms (violations: " + total_viol_ap.toString() + ")"); - print("BENCH_RESULT: " + elapsed_ap.toString() + " ms"); - RETURN; + } + ); + END + + w_ap: ~Void = BG { + @parallel -> + MUTABLE k = 0; + WHILE k < writes DO + WITH SNAPSHOT c_ap AS MUTABLE v { + v.a = v.a + 1; + v.b = (v.a) * 2; + } + k = k + 1; + END + + }; + + NEXT w_ap; + MUTABLE total_viol_ap = 0; + FOR i IN (0 ..< n_readers) DO + total_viol_ap = total_viol_ap + NEXT rfutures_ap[i]; + END + elapsed_ap = timestampMs() - t0_ap; + + # ───── Report ───── + print("RwLock : " + elapsed_rw.toString() + " ms (violations: " + total_viol_rw.toString() + ")"); + print("MVCC : " + elapsed_v.toString() + " ms (violations: " + total_viol_v.toString() + ")"); + print("AtomicPtr : " + elapsed_ap.toString() + " ms (violations: " + total_viol_ap.toString() + ")"); + print("BENCH_RESULT: " + elapsed_ap.toString() + " ms"); + RETURN; END diff --git a/benchmarks/sequential/02_sroa/bench.cht b/benchmarks/sequential/02_sroa/bench.cht index 8f607c4d6..a51a0b0a5 100644 --- a/benchmarks/sequential/02_sroa/bench.cht +++ b/benchmarks/sequential/02_sroa/bench.cht @@ -20,81 +20,288 @@ # N = 100 000 000 iterations. STRUCT BigVec { - x1: Int64, x2: Int64, x3: Int64, x4: Int64, x5: Int64, - x6: Int64, x7: Int64, x8: Int64, x9: Int64, x10: Int64, - x11: Int64, x12: Int64, x13: Int64, x14: Int64, x15: Int64, - x16: Int64, x17: Int64, x18: Int64, x19: Int64, x20: Int64, - x21: Int64, x22: Int64, x23: Int64, x24: Int64, x25: Int64, - x26: Int64, x27: Int64, x28: Int64, x29: Int64, x30: Int64, - x31: Int64, x32: Int64, x33: Int64, x34: Int64, x35: Int64, - x36: Int64, x37: Int64, x38: Int64, x39: Int64, x40: Int64, - x41: Int64, x42: Int64, x43: Int64, x44: Int64, x45: Int64, - x46: Int64, x47: Int64, x48: Int64, x49: Int64, x50: Int64, - x51: Int64, x52: Int64, x53: Int64, x54: Int64, x55: Int64, - x56: Int64, x57: Int64, x58: Int64, x59: Int64, x60: Int64, - x61: Int64, x62: Int64, x63: Int64, x64: Int64, x65: Int64, - x66: Int64, x67: Int64, x68: Int64, x69: Int64, x70: Int64, - x71: Int64, x72: Int64, x73: Int64, x74: Int64, x75: Int64, - x76: Int64, x77: Int64, x78: Int64, x79: Int64, x80: Int64, - x81: Int64, x82: Int64, x83: Int64, x84: Int64, x85: Int64, - x86: Int64, x87: Int64, x88: Int64, x89: Int64, x90: Int64, - x91: Int64, x92: Int64, x93: Int64, x94: Int64, x95: Int64, - x96: Int64, x97: Int64, x98: Int64, x99: Int64, x100: Int64, - x101: Int64, x102: Int64, x103: Int64, x104: Int64, x105: Int64, - x106: Int64, x107: Int64, x108: Int64, x109: Int64, x110: Int64, - x111: Int64, x112: Int64, x113: Int64, x114: Int64, x115: Int64, - x116: Int64, x117: Int64, x118: Int64, x119: Int64, x120: Int64, - x121: Int64, x122: Int64, x123: Int64, x124: Int64, x125: Int64, - x126: Int64, x127: Int64, x128: Int64, x129: Int64, x130: Int64 + x1: Int64, + x2: Int64, + x3: Int64, + x4: Int64, + x5: Int64, + x6: Int64, + x7: Int64, + x8: Int64, + x9: Int64, + x10: Int64, + x11: Int64, + x12: Int64, + x13: Int64, + x14: Int64, + x15: Int64, + x16: Int64, + x17: Int64, + x18: Int64, + x19: Int64, + x20: Int64, + x21: Int64, + x22: Int64, + x23: Int64, + x24: Int64, + x25: Int64, + x26: Int64, + x27: Int64, + x28: Int64, + x29: Int64, + x30: Int64, + x31: Int64, + x32: Int64, + x33: Int64, + x34: Int64, + x35: Int64, + x36: Int64, + x37: Int64, + x38: Int64, + x39: Int64, + x40: Int64, + x41: Int64, + x42: Int64, + x43: Int64, + x44: Int64, + x45: Int64, + x46: Int64, + x47: Int64, + x48: Int64, + x49: Int64, + x50: Int64, + x51: Int64, + x52: Int64, + x53: Int64, + x54: Int64, + x55: Int64, + x56: Int64, + x57: Int64, + x58: Int64, + x59: Int64, + x60: Int64, + x61: Int64, + x62: Int64, + x63: Int64, + x64: Int64, + x65: Int64, + x66: Int64, + x67: Int64, + x68: Int64, + x69: Int64, + x70: Int64, + x71: Int64, + x72: Int64, + x73: Int64, + x74: Int64, + x75: Int64, + x76: Int64, + x77: Int64, + x78: Int64, + x79: Int64, + x80: Int64, + x81: Int64, + x82: Int64, + x83: Int64, + x84: Int64, + x85: Int64, + x86: Int64, + x87: Int64, + x88: Int64, + x89: Int64, + x90: Int64, + x91: Int64, + x92: Int64, + x93: Int64, + x94: Int64, + x95: Int64, + x96: Int64, + x97: Int64, + x98: Int64, + x99: Int64, + x100: Int64, + x101: Int64, + x102: Int64, + x103: Int64, + x104: Int64, + x105: Int64, + x106: Int64, + x107: Int64, + x108: Int64, + x109: Int64, + x110: Int64, + x111: Int64, + x112: Int64, + x113: Int64, + x114: Int64, + x115: Int64, + x116: Int64, + x117: Int64, + x118: Int64, + x119: Int64, + x120: Int64, + x121: Int64, + x122: Int64, + x123: Int64, + x124: Int64, + x125: Int64, + x126: Int64, + x127: Int64, + x128: Int64, + x129: Int64, + x130: Int64 } # Only reads x1, x2, x3. The other 127 fields are dead. FN sum3(v: BigVec) RETURNS Int64 -> - RETURN v.x1 + v.x2 + v.x3; + RETURN v.x1 + v.x2 + v.x3; END FN main() RETURNS Void -> - t0 = timestampMs(); - MUTABLE i = 0; - MUTABLE acc: Int64 = 1; + t0 = timestampMs(); + MUTABLE i = 0; + MUTABLE acc = 1; - TIGHT WHILE i < 100000000 DO - bv = BigVec{ - x1: acc, x2: acc+1, x3: acc+2, - x4: 0, x5: 0, x6: 0, x7: 0, x8: 0, - x9: 0, x10: 0, x11: 0, x12: 0, x13: 0, - x14: 0, x15: 0, x16: 0, x17: 0, x18: 0, - x19: 0, x20: 0, x21: 0, x22: 0, x23: 0, - x24: 0, x25: 0, x26: 0, x27: 0, x28: 0, - x29: 0, x30: 0, x31: 0, x32: 0, x33: 0, - x34: 0, x35: 0, x36: 0, x37: 0, x38: 0, - x39: 0, x40: 0, x41: 0, x42: 0, x43: 0, - x44: 0, x45: 0, x46: 0, x47: 0, x48: 0, - x49: 0, x50: 0, x51: 0, x52: 0, x53: 0, - x54: 0, x55: 0, x56: 0, x57: 0, x58: 0, - x59: 0, x60: 0, x61: 0, x62: 0, x63: 0, - x64: 0, x65: 0, x66: 0, x67: 0, x68: 0, - x69: 0, x70: 0, x71: 0, x72: 0, x73: 0, - x74: 0, x75: 0, x76: 0, x77: 0, x78: 0, - x79: 0, x80: 0, x81: 0, x82: 0, x83: 0, - x84: 0, x85: 0, x86: 0, x87: 0, x88: 0, - x89: 0, x90: 0, x91: 0, x92: 0, x93: 0, - x94: 0, x95: 0, x96: 0, x97: 0, x98: 0, - x99: 0, x100: 0, x101: 0, x102: 0, x103: 0, - x104: 0, x105: 0, x106: 0, x107: 0, x108: 0, - x109: 0, x110: 0, x111: 0, x112: 0, x113: 0, - x114: 0, x115: 0, x116: 0, x117: 0, x118: 0, - x119: 0, x120: 0, x121: 0, x122: 0, x123: 0, - x124: 0, x125: 0, x126: 0, x127: 0, x128: 0, - x129: 0, x130: 0 - }; - acc = sum3(bv) MOD 1000000007; - i += 1; - END + TIGHT WHILE i < 100_000_000 DO + bv = BigVec{ + x1: acc, + x2: acc + 1, + x3: acc + 2, + x4: 0, + x5: 0, + x6: 0, + x7: 0, + x8: 0, + x9: 0, + x10: 0, + x11: 0, + x12: 0, + x13: 0, + x14: 0, + x15: 0, + x16: 0, + x17: 0, + x18: 0, + x19: 0, + x20: 0, + x21: 0, + x22: 0, + x23: 0, + x24: 0, + x25: 0, + x26: 0, + x27: 0, + x28: 0, + x29: 0, + x30: 0, + x31: 0, + x32: 0, + x33: 0, + x34: 0, + x35: 0, + x36: 0, + x37: 0, + x38: 0, + x39: 0, + x40: 0, + x41: 0, + x42: 0, + x43: 0, + x44: 0, + x45: 0, + x46: 0, + x47: 0, + x48: 0, + x49: 0, + x50: 0, + x51: 0, + x52: 0, + x53: 0, + x54: 0, + x55: 0, + x56: 0, + x57: 0, + x58: 0, + x59: 0, + x60: 0, + x61: 0, + x62: 0, + x63: 0, + x64: 0, + x65: 0, + x66: 0, + x67: 0, + x68: 0, + x69: 0, + x70: 0, + x71: 0, + x72: 0, + x73: 0, + x74: 0, + x75: 0, + x76: 0, + x77: 0, + x78: 0, + x79: 0, + x80: 0, + x81: 0, + x82: 0, + x83: 0, + x84: 0, + x85: 0, + x86: 0, + x87: 0, + x88: 0, + x89: 0, + x90: 0, + x91: 0, + x92: 0, + x93: 0, + x94: 0, + x95: 0, + x96: 0, + x97: 0, + x98: 0, + x99: 0, + x100: 0, + x101: 0, + x102: 0, + x103: 0, + x104: 0, + x105: 0, + x106: 0, + x107: 0, + x108: 0, + x109: 0, + x110: 0, + x111: 0, + x112: 0, + x113: 0, + x114: 0, + x115: 0, + x116: 0, + x117: 0, + x118: 0, + x119: 0, + x120: 0, + x121: 0, + x122: 0, + x123: 0, + x124: 0, + x125: 0, + x126: 0, + x127: 0, + x128: 0, + x129: 0, + x130: 0 + }; + acc = sum3(bv) MOD 1_000_000_007; + i += 1; + END - ASSERT acc > 0, "acc must be positive"; - elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - print("Time: ${elapsed.toString()} ms"); - RETURN; + ASSERT acc > 0, "acc must be positive"; + elapsed = timestampMs() - t0; + print("BENCH_RESULT: ${elapsed.toString()} ms"); + print("Time: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/sequential/03_alloc_throughput/bench.cht b/benchmarks/sequential/03_alloc_throughput/bench.cht index d445830d0..70bc13954 100644 --- a/benchmarks/sequential/03_alloc_throughput/bench.cht +++ b/benchmarks/sequential/03_alloc_throughput/bench.cht @@ -12,56 +12,62 @@ # Regular FOR is printed as CLEAR overhead reference only. FN benchRegular(n: Int64) RETURNS !Float64 -> - MUTABLE total: Float64 = 0.0; - FOR outer IN (0_i64 ..< n) DO - MUTABLE vals: Float64[10000]@list = []; - FOR inner IN (0_i64 ..< n) -> vals.append(inner + 0.0); - MUTABLE s: Float64 = 0.0; - FOR inner IN (0_i64 ..< vals.length()) -> s += vals[inner]; - total = total + s; - END - RETURN total; + MUTABLE total = 0.0; + FOR outer IN (0 ..< n) DO + MUTABLE vals: Float64[10_000]@list = []; + FOR inner IN (0 ..< n) -> vals.append(inner + 0.0); + MUTABLE s = 0.0; + FOR inner IN (0 ..< vals.length()) -> s += vals[inner]; + total = total + s; + END + RETURN total; END FN benchTight(n: Int64) RETURNS !Float64 -> - MUTABLE total: Float64 = 0.0; - FOR outer IN (0_i64 ..< n) DO - MUTABLE vals: Float64[10000]@list = []; - MUTABLE inner: Int64 = 0; - TIGHT WHILE inner < n DO vals.append(inner + 0.0); inner += 1; END - MUTABLE s: Float64 = 0.0; - inner = 0; - TIGHT WHILE inner < n DO s += vals[inner]; inner += 1; END - total = total + s; + MUTABLE total = 0.0; + FOR outer IN (0 ..< n) DO + MUTABLE vals: Float64[10_000]@list = []; + MUTABLE inner = 0; + TIGHT WHILE inner < n DO + vals.append(inner + 0.0); + inner += 1; + END + MUTABLE s = 0.0; + inner = 0; + TIGHT WHILE inner < n DO + s += vals[inner]; + inner += 1; END - RETURN total; + total = total + s; + END + RETURN total; END FN main() RETURNS Void -> - n = 10000; + n = 10_000; - # Warmup - warmR = benchRegular(100); - warmT = benchTight(100); + # Warmup + warmR = benchRegular(100); + warmT = benchTight(100); - # Primary measurement: TIGHT WHILE (apples-to-apples vs C malloc + Rust Vec) - t0 = timestampMs(); - totalT = benchTight(n); - tightMs = timestampMs() - t0; + # Primary measurement: TIGHT WHILE (apples-to-apples vs C malloc + Rust Vec) + t0 = timestampMs(); + totalT = benchTight(n); + tightMs = timestampMs() - t0; - # Declare the primary result for runner.rb - print("BENCH_RESULT: ${tightMs.toString()} ms"); + # Declare the primary result for runner.rb + print("BENCH_RESULT: ${tightMs.toString()} ms"); - # Regular FOR loops (CLEAR overhead reference, not cross-language comparison) - t1 = timestampMs(); - totalR = benchRegular(n); - regularMs = timestampMs() - t1; + # Regular FOR loops (CLEAR overhead reference, not cross-language comparison) + t1 = timestampMs(); + totalR = benchRegular(n); + regularMs = timestampMs() - t1; - ASSERT totalT == totalR, "results must match"; - ASSERT totalT > 0.0, "total must be positive"; + ASSERT totalT == totalR, "results must match"; + ASSERT totalT > 0.0, "total must be positive"; - print("total = ${totalT.toString()}"); - print("TIGHT WHILE: ${tightMs.toString()} ms (vs C/Rust)"); - print("Regular FOR: ${regularMs.toString()} ms (CLEAR overhead reference, not measured by runner)"); - RETURN; + print("total = ${totalT.toString()}"); + print("TIGHT WHILE: ${tightMs.toString()} ms (vs C/Rust)"); + print("Regular FOR: ${regularMs.toString()} ms (CLEAR overhead reference, not measured by runner)"); + RETURN; END diff --git a/benchmarks/sequential/04_hashmap/bench.cht b/benchmarks/sequential/04_hashmap/bench.cht index ff7c8b8e2..a19e03c8e 100644 --- a/benchmarks/sequential/04_hashmap/bench.cht +++ b/benchmarks/sequential/04_hashmap/bench.cht @@ -6,22 +6,22 @@ # C uses calloc/free for the bucket array. Rust uses heap via with_capacity. FN main() RETURNS Void -> - MUTABLE m: HashMap = {}; + MUTABLE m: HashMap = {}; - # Insert phase: 1M integer key-value pairs - t0 = timestampMs(); - FOR i IN (0_i64 ..< 1000000) -> m[i] = i + 0.0; - insertMs = timestampMs() - t0; + # Insert phase: 1M integer key-value pairs + t0 = timestampMs(); + TIGHT FOR i IN (0 ..< 1_000_000) -> m[i] = i + 0.0; + insertMs = timestampMs() - t0; - # Lookup phase: sum all values to prevent dead-code elimination - t1 = timestampMs(); - MUTABLE total = 0.0; - FOR j IN (0_i64 ..< 1000000) -> total = total + (m[j] OR 0.0); - lookupMs = timestampMs() - t1; + # Lookup phase: sum all values to prevent dead-code elimination + t1 = timestampMs(); + MUTABLE total = 0.0; + TIGHT FOR j IN (0 ..< 1_000_000) -> total = total + (m[j] OR 0.0); + lookupMs = timestampMs() - t1; - ASSERT total > 0.0, "total must be positive"; - totalMs = insertMs + lookupMs; - print("BENCH_RESULT: ${totalMs.toString()} ms"); - print("Insert: ${insertMs.toString()} ms | Lookup: ${lookupMs.toString()} ms | Total: ${totalMs.toString()} ms"); - RETURN; + ASSERT total > 0.0, "total must be positive"; + totalMs = insertMs + lookupMs; + print("BENCH_RESULT: ${totalMs.toString()} ms"); + print("Insert: ${insertMs.toString()} ms | Lookup: ${lookupMs.toString()} ms | Total: ${totalMs.toString()} ms"); + RETURN; END diff --git a/benchmarks/sequential/06_simd/bench.cht b/benchmarks/sequential/06_simd/bench.cht index abdc8c868..343907a33 100644 --- a/benchmarks/sequential/06_simd/bench.cht +++ b/benchmarks/sequential/06_simd/bench.cht @@ -18,28 +18,31 @@ # Result: CLEAR (LLVM auto-vec) is faster than C (GCC explicit AVX2). STRUCT Vec4 { - x: Float64, - y: Float64, - z: Float64, - w: Float64 + x: Float64, + y: Float64, + z: Float64, + w: Float64 } # Scalar dot product: 4 fmul + 3 fadd. # No SIMD equivalent exists in CLEAR today. -FN dot4(a: Vec4, b: Vec4) RETURNS Float64 EFFECTS REENTRANT -> - RETURN a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; +FN dot4(a: Vec4, b: Vec4) + RETURNS Float64 + EFFECTS REENTRANT +-> + RETURN a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; END FN main() RETURNS Void -> - t0 = timestampMs(); - MUTABLE sum: Float64 = 0.0; - FOR i IN (0_i64 ..< 100000000) DO - a = Vec4{ x: i+0.0, y: i+1.0, z: i+2.0, w: i+3.0 }; - b = Vec4{ x: 1.0, y: 2.0, z: 3.0, w: 4.0 }; - sum = sum + dot4(a, b); - END - ASSERT sum > 0.0, "sum must be positive"; - elapsed = timestampMs() - t0; - print("BENCH_RESULT: ${elapsed.toString()} ms"); - RETURN; + t0 = timestampMs(); + MUTABLE sum = 0.0; + FOR i IN (0 ..< 100_000_000) DO + a = Vec4{ x: i + 0.0, y: i + 1.0, z: i + 2.0, w: i + 3.0 }; + b = Vec4{ x: 1.0, y: 2.0, z: 3.0, w: 4.0 }; + sum = sum + dot4(a, b); + END + ASSERT sum > 0.0, "sum must be positive"; + elapsed = timestampMs() - t0; + print("BENCH_RESULT: ${elapsed.toString()} ms"); + RETURN; END diff --git a/benchmarks/sequential/07_pointer_chase/bench.cht b/benchmarks/sequential/07_pointer_chase/bench.cht index db8e10c76..e12676eea 100644 --- a/benchmarks/sequential/07_pointer_chase/bench.cht +++ b/benchmarks/sequential/07_pointer_chase/bench.cht @@ -37,38 +37,40 @@ # chasing in the memory-bound regime. The cost is memory: pool slots are # 2× larger than raw C structs, and the scheduler adds fixed overhead. -STRUCT Node { val: Int64 } +STRUCT Node { + val: Int64 +} FN main() RETURNS Void -> - t0 = timestampMs(); - n_nodes = 2000000_i64; - step = 999983_i64; + t0 = timestampMs(); + n_nodes = 2_000_000; + step = 999_983; - MUTABLE pool: Node[2000000]@pool = []; - MUTABLE id_list: Id[2000000]@list = []; + pool: Node[2_000_000]@pool = []; + MUTABLE id_list: Id [2_000_000]@list = []; - # Phase 1: insert N nodes, record IDs in insertion order. - FOR i IN (0_i64 ..< n_nodes) DO - id = pool.insert(Node{ val: i }); - id_list.append(id); - END + # Phase 1: insert N nodes, record IDs in insertion order. + TIGHT FOR i IN (0 ..< n_nodes) DO + id = pool.insert(Node{ val: i }); + id_list.append(id); + END - # Phase 2: walk N steps via arithmetic permutation, accumulate sum. - # Time only this phase (mirrors C walk phase). - MUTABLE cur_idx: Int64 = 0; - MUTABLE sum = 0; - t1 = timestampMs(); - FOR steps IN (0_i64 ..< n_nodes) DO - IF pool[id_list[cur_idx]] AS n THEN - sum = sum + n.val; - END - cur_idx = (cur_idx + step) MOD n_nodes; + # Phase 2: walk N steps via arithmetic permutation, accumulate sum. + # Time only this phase (mirrors C walk phase). + MUTABLE cur_idx = 0; + MUTABLE sum = 0; + t1 = timestampMs(); + TIGHT FOR steps IN (0 ..< n_nodes) DO + IF pool[id_list[cur_idx]] AS n THEN + sum = sum + n.val; END - walkMs = timestampMs() - t1; + cur_idx = (cur_idx + step) MOD n_nodes; + END + walkMs = timestampMs() - t1; - ASSERT sum > 0, "sum must be positive"; - totalMs = timestampMs() - t0; - print("BENCH_RESULT: ${walkMs.toString()} ms"); - print("Walk: ${walkMs.toString()} ms | Total: ${totalMs.toString()} ms"); - RETURN; + ASSERT sum > 0, "sum must be positive"; + totalMs = timestampMs() - t0; + print("BENCH_RESULT: ${walkMs.toString()} ms"); + print("Walk: ${walkMs.toString()} ms | Total: ${totalMs.toString()} ms"); + RETURN; END diff --git a/benchmarks/sequential/08_sort/bench.cht b/benchmarks/sequential/08_sort/bench.cht index 412431a97..54dc12e6b 100644 --- a/benchmarks/sequential/08_sort/bench.cht +++ b/benchmarks/sequential/08_sort/bench.cht @@ -24,7 +24,7 @@ FN qsort!(MUTABLE arr: Float64[], n: Int64) RETURNS !Void -> MUTABLE lo_stk: Int64[]@list = []; MUTABLE hi_stk: Int64[]@list = []; # Fill loop uses append → cannot be TIGHT (append calls rt.frameAlloc). - FOR fill_i IN (0_i64 ..< 48) DO + TIGHT FOR fill_i IN (0 ..< 48) DO lo_stk.append(0); hi_stk.append(0); END @@ -54,14 +54,14 @@ FN qsort!(MUTABLE arr: Float64[], n: Int64) RETURNS !Void -> tmp = arr[i]; arr[i] = arr[hi]; arr[hi] = tmp; pi = i; - left_size = pi - lo; + left_size = pi - lo; right_size = hi - pi; IF left_size > right_size THEN - lo_stk[sp] = lo; hi_stk[sp] = pi - 1; sp += 1; - lo_stk[sp] = pi + 1; hi_stk[sp] = hi; sp += 1; + lo_stk[sp] = lo; hi_stk[sp] = pi - 1; sp += 1; + lo_stk[sp] = pi + 1; hi_stk[sp] = hi; sp += 1; ELSE - lo_stk[sp] = pi + 1; hi_stk[sp] = hi; sp += 1; - lo_stk[sp] = lo; hi_stk[sp] = pi - 1; sp += 1; + lo_stk[sp] = pi + 1; hi_stk[sp] = hi; sp += 1; + lo_stk[sp] = lo; hi_stk[sp] = pi - 1; sp += 1; END END END @@ -77,9 +77,13 @@ FN msort!(MUTABLE arr: Float64[], MUTABLE tmp: Float64[], n: Int64) RETURNS Void MUTABLE lo = 0; TIGHT WHILE lo < n DO MUTABLE mid = lo + width; - IF mid > n THEN mid = n; END + IF mid > n THEN + mid = n; + END MUTABLE hi = lo + width + width; - IF hi > n THEN hi = n; END + IF hi > n THEN + hi = n; + END IF mid < hi THEN MUTABLE i = lo; @@ -110,13 +114,13 @@ FN msort!(MUTABLE arr: Float64[], MUTABLE tmp: Float64[], n: Int64) RETURNS Void END FN main() RETURNS Void -> - n = 1000000_i64; + n = 1_000_000; # Build data: permutation of [0, N-1] via Knuth multiplicative hash MUTABLE arr: Float64[]@list = []; MUTABLE seed_i = 0; - WHILE seed_i < n DO - v = seed_i * 2654435761_i64 MOD n + 0.0; + TIGHT WHILE seed_i < n DO + v = seed_i * 2_654_435_761 MOD n + 0.0; arr.append(v); seed_i += 1; END @@ -131,14 +135,14 @@ FN main() RETURNS Void -> # Refill for mergesort seed_i = 0; TIGHT WHILE seed_i < n DO - v = seed_i * 2654435761_i64 MOD n + 0.0; + v = seed_i * 2_654_435_761 MOD n + 0.0; arr[seed_i] = v; seed_i += 1; END # Mergesort (needs a temp buffer of the same size) MUTABLE tmp_arr: Float64[]@list = []; - FOR ti IN (0_i64 ..< n) -> tmp_arr.append(0.0); + TIGHT FOR ti IN (0 ..< n) -> tmp_arr.append(0.0); t1 = timestampMs(); msort!(arr, tmp_arr, n); msortMs = timestampMs() - t1; diff --git a/benchmarks/sequential/09_frame_vs_heap/bench.cht b/benchmarks/sequential/09_frame_vs_heap/bench.cht index 41e744450..63c8d3cec 100644 --- a/benchmarks/sequential/09_frame_vs_heap/bench.cht +++ b/benchmarks/sequential/09_frame_vs_heap/bench.cht @@ -14,60 +14,62 @@ # The frame mark rewinds at the end of each loop iteration, so all # string data is freed in O(1) — zero GPA calls. FN benchFrame(n: Int64) RETURNS !Int64 -> - MUTABLE total: Int64 = 0; - FOR i IN (0_i64 ..< n) DO - s = "item-${i.toString()}-value"; - total += s.length(); - END - RETURN total; + MUTABLE total = 0; + FOR i IN (0 ..< n) DO + s = "item-${i.toString()}-value"; + total += s.length(); + END + RETURN total; END # Helper that returns a heap-promoted string (escape path). FN makeString(i: Int64) RETURNS !String -> - result = "item-${i.toString()}-value"; - RETURN result; + result = "item-${i.toString()}-value"; + RETURN result; END # Variant B: Heap-promoted strings. Each iteration calls makeString # which promotes the string to heap on return. The caller frees it # via defer. Every iteration pays GPA alloc + free. FN benchHeap(n: Int64) RETURNS !Int64 -> - MUTABLE total: Int64 = 0; - FOR i IN (0_i64 ..< n) DO - s = makeString(i); - total += s.length(); - END - RETURN total; + MUTABLE total = 0; + FOR i IN (0 ..< n) DO + s = makeString(i); + total += s.length(); + END + RETURN total; END FN main() RETURNS Void -> - n = 1000000; + n = 1_000_000; - # Warm up - benchFrame(1000); - benchHeap(1000); + # Warm up + benchFrame(1000); + benchHeap(1000); - # Benchmark Frame (fast path) - t0 = timestampMs(); - frameTotal = benchFrame(n); - frameMs = timestampMs() - t0; - frameRssKb = currentMemoryKb(); + # Benchmark Frame (fast path) + t0 = timestampMs(); + frameTotal = benchFrame(n); + frameMs = timestampMs() - t0; + frameRssKb = currentMemoryKb(); - # Benchmark Heap (escape path) - t1 = timestampMs(); - heapTotal = benchHeap(n); - heapMs = timestampMs() - t1; - heapRssKb = currentMemoryKb(); + # Benchmark Heap (escape path) + t1 = timestampMs(); + heapTotal = benchHeap(n); + heapMs = timestampMs() - t1; + heapRssKb = currentMemoryKb(); - ASSERT frameTotal == heapTotal, "both variants must produce same result"; + ASSERT frameTotal == heapTotal, "both variants must produce same result"; - # BENCH_RESULT reports the frame (no-malloc) path — the primary metric. - # Heap variant is secondary: shows escape overhead for reference. - print("BENCH_RESULT: ${frameMs.toString()} ms"); - print("Frame vs Heap Escape (${n.toString()} iterations)"); - print(" Frame (no escape): ${frameMs.toString()} ms RSS ${frameRssKb.toString()} KB"); - print(" Heap (promoted): ${heapMs.toString()} ms RSS ${heapRssKb.toString()} KB"); - print(" Heap overhead: ${(heapMs - frameMs).toString()} ms (${((heapMs * 100 / frameMs) - 100).toString()}% slower)"); - print(" Peak RSS (VmHWM): ${peakMemoryKb().toString()} KB"); - RETURN; + # BENCH_RESULT reports the frame (no-malloc) path — the primary metric. + # Heap variant is secondary: shows escape overhead for reference. + print("BENCH_RESULT: ${frameMs.toString()} ms"); + print("Frame vs Heap Escape (${n.toString()} iterations)"); + print(" Frame (no escape): ${frameMs.toString()} ms RSS ${frameRssKb.toString()} KB"); + print(" Heap (promoted): ${heapMs.toString()} ms RSS ${heapRssKb.toString()} KB"); + print( + " Heap overhead: ${(heapMs - frameMs).toString()} ms (${((heapMs * 100 / frameMs) - 100).toString()}% slower)" + ); + print(" Peak RSS (VmHWM): ${peakMemoryKb().toString()} KB"); + RETURN; END diff --git a/benchmarks/sequential/10_pool_vs_multiowned/bench.cht b/benchmarks/sequential/10_pool_vs_multiowned/bench.cht index 734d42273..f41f59e46 100644 --- a/benchmarks/sequential/10_pool_vs_multiowned/bench.cht +++ b/benchmarks/sequential/10_pool_vs_multiowned/bench.cht @@ -8,56 +8,60 @@ # Measures the overhead of Pool's generational bookkeeping vs # List's simpler dense array. -STRUCT Entity { x: Int64, y: Int64, health: Int64 } +STRUCT Entity { + x: Int64, + y: Int64, + health: Int64 +} FN benchPool(n: Int64) RETURNS !Int64 -> - MUTABLE pool: Entity[5000000]@pool = []; + pool: Entity[5_000_000]@pool = []; - FOR i IN (0_i64 ..< n) DO - pool.insert(Entity{ x: i, y: i * 2, health: 100 }); - END + TIGHT FOR i IN (0 ..< n) DO + pool.insert(Entity{ x: i, y: i * 2, health: 100 }); + END - MUTABLE sum = 0_i64; - FOR entity IN pool DO - sum += entity.health; - END - RETURN sum; + MUTABLE sum = 0; + TIGHT FOR entity IN pool DO + sum += entity.health; + END + RETURN sum; END FN benchList(n: Int64) RETURNS !Int64 -> - # Pre-allocate capacity to match C (malloc N*sizeof) and Go (make([]T, 0, N)) - MUTABLE items: Entity[5000000]@list = []; + # Pre-allocate capacity to match C (malloc N*sizeof) and Go (make([]T, 0, N)) + MUTABLE items: Entity[5_000_000]@list = []; - FOR i IN (0_i64 ..< n) -> items.append(Entity{ x: i, y: i * 2, health: 100 }); + TIGHT FOR i IN (0 ..< n) -> items.append(Entity{ x: i, y: i * 2, health: 100 }); - MUTABLE sum = 0_i64; - FOR item IN items DO - sum += item.health; - END - RETURN sum; + MUTABLE sum = 0; + TIGHT FOR item IN items DO + sum += item.health; + END + RETURN sum; END FN main() RETURNS Void -> - n = 5000000; - - t0 = timestampMs(); - listSum = benchList(n); - listMs = timestampMs() - t0; - listRssKb = currentMemoryKb(); - - t1 = timestampMs(); - poolSum = benchPool(n); - poolMs = timestampMs() - t1; - poolRssKb = currentMemoryKb(); - - ASSERT listSum == poolSum, "both must produce same health sum"; - - # BENCH_RESULT = List (dense array, fast path) - print("BENCH_RESULT: ${listMs.toString()} ms"); - print("Pool vs List (${n.toString()} entities, insert + sum health)"); - print(" List (dense): ${listMs.toString()} ms RSS ${listRssKb.toString()} KB"); - print(" Pool (handles): ${poolMs.toString()} ms RSS ${poolRssKb.toString()} KB"); - print(" Pool overhead: ${(poolMs - listMs).toString()} ms"); - print(" Peak RSS (VmHWM): ${peakMemoryKb().toString()} KB"); - RETURN; + n = 5_000_000; + + t0 = timestampMs(); + listSum = benchList(n); + listMs = timestampMs() - t0; + listRssKb = currentMemoryKb(); + + t1 = timestampMs(); + poolSum = benchPool(n); + poolMs = timestampMs() - t1; + poolRssKb = currentMemoryKb(); + + ASSERT listSum == poolSum, "both must produce same health sum"; + + # BENCH_RESULT = List (dense array, fast path) + print("BENCH_RESULT: ${listMs.toString()} ms"); + print("Pool vs List (${n.toString()} entities, insert + sum health)"); + print(" List (dense): ${listMs.toString()} ms RSS ${listRssKb.toString()} KB"); + print(" Pool (handles): ${poolMs.toString()} ms RSS ${poolRssKb.toString()} KB"); + print(" Pool overhead: ${(poolMs - listMs).toString()} ms"); + print(" Peak RSS (VmHWM): ${peakMemoryKb().toString()} KB"); + RETURN; END diff --git a/benchmarks/sequential/11_pipeline_overhead/bench.cht b/benchmarks/sequential/11_pipeline_overhead/bench.cht index 703dfec87..51b924470 100644 --- a/benchmarks/sequential/11_pipeline_overhead/bench.cht +++ b/benchmarks/sequential/11_pipeline_overhead/bench.cht @@ -29,17 +29,18 @@ # "natural" form) blocks LLVM vectorization and gives 962ms instead of 338ms. FN buildData(n: Int64) RETURNS !Float64[]@list -> - MUTABLE data: Float64[]@list = []; - data.reserve(n); # TODO: replace with Float64[n]@list once T[expr]@list supports runtime capacity - - MUTABLE state: Int64 = 42; - FOR i IN (0_i64 ..< n) DO - state = (state %* 6364136223846793005_i64 %+ (i %+ 1442695040888963407_i64)); - MUTABLE val = toFloat(state MOD 1000); - IF val < 0.0 -> val = 0.0 - val; - data.append(val); - END - RETURN data; + MUTABLE data: Float64[]@list = []; + data.reserve(n); + # TODO: replace with Float64[n]@list once T[expr]@list supports runtime capacity + + MUTABLE state = 42; + FOR i IN (0 ..< n) DO + state = (state %* 6_364_136_223_846_793_005 %+ (i %+ 1_442_695_040_888_963_407)); + MUTABLE val = (state MOD 1000).toFloat(); + IF val < 0.0 -> val = 0.0 - val; + data.append(val); + END + RETURN data; END # ============================================================ @@ -47,13 +48,13 @@ END # ============================================================ FN sumLoop(data: Float64[]) RETURNS Float64 -> - MUTABLE sum: Float64 = 0.0; - FOR i IN (0_i64 ..< data.length()) -> sum += data[i]; - RETURN sum; + MUTABLE sum = 0.0; + FOR i IN (0 ..< data.length()) -> sum += data[i]; + RETURN sum; END FN sumPipeline(data: Float64[]) RETURNS Float64 -> - RETURN data |> SUM _; + RETURN data |> SUM _; END # ============================================================ @@ -61,16 +62,19 @@ END # ============================================================ FN fusedLoop(data: Float64[]) RETURNS Float64 -> - MUTABLE sum: Float64 = 0.0; - FOR i IN (0_i64 ..< data.length()) DO - v = data[i]; - IF v > 500.0 -> sum += v * v; - END - RETURN sum; + MUTABLE sum = 0.0; + FOR i IN (0 ..< data.length()) DO + v = data[i]; + IF v > 500.0 -> sum += v * v; + END + RETURN sum; END FN chainedPipeline(data: Float64[]) RETURNS !Float64 -> - RETURN data |> WHERE _ > 500.0 |> SELECT _ * _ |> SUM _; + RETURN data + |> WHERE _ > 500.0 + |> SELECT _ * _ + |> SUM _; END # ============================================================ @@ -78,84 +82,90 @@ END # ============================================================ FN longFusedLoop(data: Float64[]) RETURNS Float64 -> - MUTABLE sum: Float64 = 0.0; - FOR i IN (0_i64 ..< data.length()) DO - v = data[i]; - IF v > 200.0 && v * 2.0 < 1500.0 THEN - sum += v * 2.0; - END + MUTABLE sum = 0.0; + FOR i IN (0 ..< data.length()) DO + v = data[i]; + IF v > 200.0 && v * 2.0 < 1500.0 THEN + sum += v * 2.0; END - RETURN sum; + END + RETURN sum; END FN longChainedPipeline(data: Float64[]) RETURNS !Float64 -> - RETURN data |> WHERE _ > 200.0 |> SELECT _ * 2.0 |> WHERE _ < 1500.0 |> SUM _; + RETURN data + |> WHERE _ > 200.0 + |> SELECT _ * 2.0 + |> WHERE _ < 1500.0 + |> SUM _; END FN main() RETURNS Void -> - n = 10000000; - iters = 20; - data = buildData(n); - - # ---- Test 1: SUM only (run multiple iters to accumulate measurable time) ---- - warmS1 = sumLoop(data); - warmS2 = sumPipeline(data); - ASSERT warmS1 == warmS2, "sum results must match"; - - MUTABLE accum: Float64 = 0.0; - - t0 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += sumLoop(data); - sumLoopMs = timestampMs() - t0; - - t1 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += sumPipeline(data); - sumPipeMs = timestampMs() - t1; - - # Memory snapshot after SUM tests (no intermediate allocs expected) - sumRssKb = currentMemoryKb(); - - # ---- Test 2: WHERE + SELECT + SUM ---- - warmF1 = fusedLoop(data); - warmF2 = chainedPipeline(data); - ASSERT warmF1 == warmF2, "fused results must match"; - - t2 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += fusedLoop(data); - fusedMs = timestampMs() - t2; - fusedRssKb = currentMemoryKb(); - - t3 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += chainedPipeline(data); - chainMs = timestampMs() - t3; - chainRssKb = currentMemoryKb(); - - # ---- Test 3: Long chain (4-stage) ---- - warmL1 = longFusedLoop(data); - warmL2 = longChainedPipeline(data); - ASSERT warmL1 == warmL2, "long chain results must match"; - - t4 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += longFusedLoop(data); - longFusedMs = timestampMs() - t4; - - t5 = timestampMs(); - FOR r IN (0_i64 ..< iters) -> accum += longChainedPipeline(data); - longChainMs = timestampMs() - t5; - - ASSERT accum != 0.0, "prevent dead code elimination"; - - # ---- Report ---- - # BENCH_RESULT = handwritten sum loop (cross-language baseline) - print("BENCH_RESULT: ${sumLoopMs.toString()} ms"); - print("Pipeline overhead (${n.toString()} elements x ${iters.toString()} iters)"); - print(" Sum loop (handwritten): ${sumLoopMs.toString()} ms"); - print(" Sum pipeline (|> SUM _): ${sumPipeMs.toString()} ms overhead: ${(sumPipeMs - sumLoopMs).toString()} ms"); - print(" Fused loop (2-stage): ${fusedMs.toString()} ms"); - print(" Chained pipeline (2-stage): ${chainMs.toString()} ms overhead: ${(chainMs - fusedMs).toString()} ms"); - print(" Fused loop (4-stage): ${longFusedMs.toString()} ms"); - print(" Chained pipeline (4-stage): ${longChainMs.toString()} ms overhead: ${(longChainMs - longFusedMs).toString()} ms"); - print(" Peak RSS: ${peakMemoryKb().toString()} KB"); - - RETURN; + n = 10_000_000; + iters = 20; + data = buildData(n); + + # ---- Test 1: SUM only (run multiple iters to accumulate measurable time) ---- + warmS1 = sumLoop(data); + warmS2 = sumPipeline(data); + ASSERT warmS1 == warmS2, "sum results must match"; + + MUTABLE accum = 0.0; + + t0 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += sumLoop(data); + sumLoopMs = timestampMs() - t0; + + t1 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += sumPipeline(data); + sumPipeMs = timestampMs() - t1; + + # Memory snapshot after SUM tests (no intermediate allocs expected) + sumRssKb = currentMemoryKb(); + + # ---- Test 2: WHERE + SELECT + SUM ---- + warmF1 = fusedLoop(data); + warmF2 = chainedPipeline(data); + ASSERT warmF1 == warmF2, "fused results must match"; + + t2 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += fusedLoop(data); + fusedMs = timestampMs() - t2; + fusedRssKb = currentMemoryKb(); + + t3 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += chainedPipeline(data); + chainMs = timestampMs() - t3; + chainRssKb = currentMemoryKb(); + + # ---- Test 3: Long chain (4-stage) ---- + warmL1 = longFusedLoop(data); + warmL2 = longChainedPipeline(data); + ASSERT warmL1 == warmL2, "long chain results must match"; + + t4 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += longFusedLoop(data); + longFusedMs = timestampMs() - t4; + + t5 = timestampMs(); + FOR r IN (0 ..< iters) -> accum += longChainedPipeline(data); + longChainMs = timestampMs() - t5; + + ASSERT accum != 0.0, "prevent dead code elimination"; + + # ---- Report ---- + # BENCH_RESULT = handwritten sum loop (cross-language baseline) + print("BENCH_RESULT: ${sumLoopMs.toString()} ms"); + print("Pipeline overhead (${n.toString()} elements x ${iters.toString()} iters)"); + print(" Sum loop (handwritten): ${sumLoopMs.toString()} ms"); + print(" Sum pipeline (|> SUM _): ${sumPipeMs.toString()} ms overhead: ${(sumPipeMs - sumLoopMs).toString()} ms"); + print(" Fused loop (2-stage): ${fusedMs.toString()} ms"); + print(" Chained pipeline (2-stage): ${chainMs.toString()} ms overhead: ${(chainMs - fusedMs).toString()} ms"); + print(" Fused loop (4-stage): ${longFusedMs.toString()} ms"); + print( + " Chained pipeline (4-stage): ${longChainMs.toString()} ms overhead: ${(longChainMs - longFusedMs).toString()} ms" + ); + print(" Peak RSS: ${peakMemoryKb().toString()} KB"); + + RETURN; END diff --git a/benchmarks/sequential/12_weak_ref_graph/bench.cht b/benchmarks/sequential/12_weak_ref_graph/bench.cht index 6ed78bbb2..5722130fc 100644 --- a/benchmarks/sequential/12_weak_ref_graph/bench.cht +++ b/benchmarks/sequential/12_weak_ref_graph/bench.cht @@ -19,58 +19,58 @@ # N = 200,000 nodes. STRUCT Node { - id: Int64 + id: Int64 } FN main() RETURNS Void -> - n_nodes = 200000_i64; + n_nodes = 200_000; - # Phase 1: BUILD - t0 = timestampMs(); + # Phase 1: BUILD + t0 = timestampMs(); - # Allocate N nodes as @multiowned (Rc) in a list - MUTABLE nodes: Node@multiowned[]@list = []; - FOR i IN (0_i64 ..< n_nodes) DO - nodes.append(Node{ id: i } @multiowned); - END + # Allocate N nodes as @multiowned (Rc) in a list + MUTABLE nodes: Node@multiowned[]@list = []; + TIGHT FOR i IN (0 ..< n_nodes) DO + nodes.append(Node{ id: i } @multiowned); + END - # Create @link (WeakRc) back-pointers: one per node - # parents[0] = dummy link to self (root has no parent) - # parents[i] = LINK to nodes[(i-1)/2] for i > 0 - MUTABLE parents: Node@link[]@list = []; - parents.append(LINK nodes[0]); - FOR i IN (1_i64 ..< n_nodes) DO - parent_idx = (i - 1_i64) / 2_i64; - parents.append(LINK nodes[parent_idx]); - END + # Create @link (WeakRc) back-pointers: one per node + # parents[0] = dummy link to self (root has no parent) + # parents[i] = LINK to nodes[(i-1)/2] for i > 0 + MUTABLE parents: Node@link[]@list = []; + parents.append(LINK nodes[0]); + TIGHT FOR i IN (1 ..< n_nodes) DO + parent_idx = (i - 1) / 2; + parents.append(LINK nodes[parent_idx]); + END - t1 = timestampMs(); + t1 = timestampMs(); - # Phase 2: WALK -- resolve every parent back-pointer - MUTABLE checksum = 0_i64; - FOR i IN (0_i64 ..< n_nodes) DO - checksum = checksum + nodes[i].id; + # Phase 2: WALK -- resolve every parent back-pointer + MUTABLE checksum = 0; + TIGHT FOR i IN (0 ..< n_nodes) DO + checksum = checksum + nodes[i].id; - IF i > 0_i64 THEN - IF RESOLVE parents[i] AS r THEN - checksum = checksum + r.id; - END - END + IF i > 0 THEN + IF RESOLVE parents[i] AS r THEN + checksum = checksum + r.id; + END END + END - t2 = timestampMs(); + t2 = timestampMs(); - build_ms = t1 - t0; - walk_ms = t2 - t1; - total_ms = t2 - t0; + build_ms = t1 - t0; + walk_ms = t2 - t1; + total_ms = t2 - t0; - ASSERT checksum > 0_i64, "checksum must be positive"; + ASSERT checksum > 0, "checksum must be positive"; - print("BENCH_RESULT: ${total_ms.toString()} ms"); - print("Weak-reference graph (${n_nodes.toString()} nodes)"); - print(" build: ${build_ms.toString()} ms"); - print(" walk: ${walk_ms.toString()} ms"); - print(" total: ${total_ms.toString()} ms"); + print("BENCH_RESULT: ${total_ms.toString()} ms"); + print("Weak-reference graph (${n_nodes.toString()} nodes)"); + print(" build: ${build_ms.toString()} ms"); + print(" walk: ${walk_ms.toString()} ms"); + print(" total: ${total_ms.toString()} ms"); - RETURN; + RETURN; END diff --git a/benchmarks/sequential/13_soa_layout/bench.cht b/benchmarks/sequential/13_soa_layout/bench.cht index 668854e8d..0a42479f2 100644 --- a/benchmarks/sequential/13_soa_layout/bench.cht +++ b/benchmarks/sequential/13_soa_layout/bench.cht @@ -11,62 +11,159 @@ # transposes the data automatically. No manual struct-splitting needed. STRUCT Particle { - x: Float64, y: Float64, z: Float64, - vx: Float64, vy: Float64, vz: Float64, - mass: Float64, radius: Float64, charge: Float64, - ax: Float64, ay: Float64, az: Float64, - age: Float64, energy: Float64, temperature: Float64, pressure: Float64, - r01: Float64, r02: Float64, r03: Float64, r04: Float64, - r05: Float64, r06: Float64, r07: Float64, r08: Float64, - r09: Float64, r10: Float64, r11: Float64, r12: Float64, - r13: Float64, r14: Float64, r15: Float64, r16: Float64, - r17: Float64, r18: Float64, r19: Float64, r20: Float64, - r21: Float64, r22: Float64, r23: Float64, r24: Float64, - r25: Float64, r26: Float64, r27: Float64, r28: Float64, - r29: Float64, r30: Float64, r31: Float64, r32: Float64, - r33: Float64, r34: Float64, r35: Float64, r36: Float64, - r37: Float64, r38: Float64, r39: Float64, r40: Float64, - r41: Float64, r42: Float64, r43: Float64, r44: Float64, - r45: Float64, r46: Float64, r47: Float64, r48: Float64 + x: Float64, + y: Float64, + z: Float64, + vx: Float64, + vy: Float64, + vz: Float64, + mass: Float64, + radius: Float64, + charge: Float64, + ax: Float64, + ay: Float64, + az: Float64, + age: Float64, + energy: Float64, + temperature: Float64, + pressure: Float64, + r01: Float64, + r02: Float64, + r03: Float64, + r04: Float64, + r05: Float64, + r06: Float64, + r07: Float64, + r08: Float64, + r09: Float64, + r10: Float64, + r11: Float64, + r12: Float64, + r13: Float64, + r14: Float64, + r15: Float64, + r16: Float64, + r17: Float64, + r18: Float64, + r19: Float64, + r20: Float64, + r21: Float64, + r22: Float64, + r23: Float64, + r24: Float64, + r25: Float64, + r26: Float64, + r27: Float64, + r28: Float64, + r29: Float64, + r30: Float64, + r31: Float64, + r32: Float64, + r33: Float64, + r34: Float64, + r35: Float64, + r36: Float64, + r37: Float64, + r38: Float64, + r39: Float64, + r40: Float64, + r41: Float64, + r42: Float64, + r43: Float64, + r44: Float64, + r45: Float64, + r46: Float64, + r47: Float64, + r48: Float64 } FN main() RETURNS Void -> - n = 100000_i64; - iterations = 100_i64; + n = 100_000; + iterations = 100; - # SOA layout via T[N]@soa (fixed-size SOA array) - MUTABLE soa: Particle[100000]@soa = []; - FOR i IN (0_i64 ..< n) DO - soa.append(Particle{ - x: toFloat(i), y: toFloat(i) * 2.0, z: 0.0, - vx: 1.0, vy: 0.5, vz: 0.0, - mass: 1.0, radius: 0.1, charge: 0.0, - ax: 0.0, ay: 0.0, az: 0.0, - age: 0.0, energy: 0.0, temperature: 0.0, pressure: 0.0, - r01: 0.0, r02: 0.0, r03: 0.0, r04: 0.0, - r05: 0.0, r06: 0.0, r07: 0.0, r08: 0.0, - r09: 0.0, r10: 0.0, r11: 0.0, r12: 0.0, - r13: 0.0, r14: 0.0, r15: 0.0, r16: 0.0, - r17: 0.0, r18: 0.0, r19: 0.0, r20: 0.0, - r21: 0.0, r22: 0.0, r23: 0.0, r24: 0.0, - r25: 0.0, r26: 0.0, r27: 0.0, r28: 0.0, - r29: 0.0, r30: 0.0, r31: 0.0, r32: 0.0, - r33: 0.0, r34: 0.0, r35: 0.0, r36: 0.0, - r37: 0.0, r38: 0.0, r39: 0.0, r40: 0.0, - r41: 0.0, r42: 0.0, r43: 0.0, r44: 0.0, - r45: 0.0, r46: 0.0, r47: 0.0, r48: 0.0 - }); - END + # SOA layout via T[N]@soa (fixed-size SOA array) + MUTABLE soa: Particle[100_000]@soa = []; + TIGHT FOR i IN (0 ..< n) DO + soa + .append( + Particle{ + x: i.toFloat(), + y: i.toFloat() * 2.0, + z: 0.0, + vx: 1.0, + vy: 0.5, + vz: 0.0, + mass: 1.0, + radius: 0.1, + charge: 0.0, + ax: 0.0, + ay: 0.0, + az: 0.0, + age: 0.0, + energy: 0.0, + temperature: 0.0, + pressure: 0.0, + r01: 0.0, + r02: 0.0, + r03: 0.0, + r04: 0.0, + r05: 0.0, + r06: 0.0, + r07: 0.0, + r08: 0.0, + r09: 0.0, + r10: 0.0, + r11: 0.0, + r12: 0.0, + r13: 0.0, + r14: 0.0, + r15: 0.0, + r16: 0.0, + r17: 0.0, + r18: 0.0, + r19: 0.0, + r20: 0.0, + r21: 0.0, + r22: 0.0, + r23: 0.0, + r24: 0.0, + r25: 0.0, + r26: 0.0, + r27: 0.0, + r28: 0.0, + r29: 0.0, + r30: 0.0, + r31: 0.0, + r32: 0.0, + r33: 0.0, + r34: 0.0, + r35: 0.0, + r36: 0.0, + r37: 0.0, + r38: 0.0, + r39: 0.0, + r40: 0.0, + r41: 0.0, + r42: 0.0, + r43: 0.0, + r44: 0.0, + r45: 0.0, + r46: 0.0, + r47: 0.0, + r48: 0.0 + } + ); + END - t0 = timestampMs(); - FOR iter IN (0_i64 ..< iterations) DO - soa |> EACH { _.x = _.x + _.vx; _.y = _.y + _.vy; }; - END - soaMs = timestampMs() - t0; + t0 = timestampMs(); + TIGHT FOR iter IN (0 ..< iterations) DO + soa |> EACH {_.x = _.x + _.vx; _.y = _.y + _.vy;}; + END + soaMs = timestampMs() - t0; - checksum = soa |> SUM _.x + _.y; - print("BENCH_RESULT: ${soaMs.toString()} ms"); - print("SOA layout (${n.toString()} particles x ${iterations.toString()} iters)"); - print(" SOA: ${soaMs.toString()} ms"); - print(" checksum: ${checksum.toString()}"); + checksum = soa |> SUM _.x + _.y; + print("BENCH_RESULT: ${soaMs.toString()} ms"); + print("SOA layout (${n.toString()} particles x ${iterations.toString()} iters)"); + print(" SOA: ${soaMs.toString()} ms"); + print(" checksum: ${checksum.toString()}"); END diff --git a/benchmarks/sequential/14_iterator/bench.cht b/benchmarks/sequential/14_iterator/bench.cht index 556617541..b960f0585 100644 --- a/benchmarks/sequential/14_iterator/bench.cht +++ b/benchmarks/sequential/14_iterator/bench.cht @@ -9,77 +9,77 @@ # This is the borrow system's performance stress test. STRUCT SliceIter { - source: BORROWED Int64[], - pos: Int64, - len: Int64 + source: BORROWED Int64[], + pos: Int64, + len: Int64 } -FN hasNext(iter: SliceIter) RETURNS Bool -> - RETURN iter.pos < iter.len; +METHOD hasNext(iter: SliceIter) RETURNS Bool -> + RETURN iter.pos < iter.len; END -FN currentVal(iter: SliceIter) RETURNS Int64 -> - RETURN iter.source[iter.pos]; +METHOD currentVal(iter: SliceIter) RETURNS Int64 -> + RETURN iter.source[iter.pos]; END -FN advance(iter: SliceIter) RETURNS SliceIter -> - RETURN SliceIter{ source: iter.source, pos: iter.pos + 1, len: iter.len }; +METHOD advance(iter: SliceIter) RETURNS SliceIter -> + RETURN SliceIter{ source: iter.source, pos: iter.pos + 1, len: iter.len }; END # Benchmark 1: Borrowed iterator (zero-copy) FN benchBorrowed(data: Int64[], n: Int64, iters: Int64) RETURNS Int64 -> - MUTABLE total: Int64 = 0; - FOR outer IN (0_i64 ..< iters) DO - WITH BORROWED data AS ref { - MUTABLE iter = SliceIter{ source: ref, pos: 0, len: n }; - WHILE hasNext(iter) DO - total = total %+ currentVal(iter); - iter = advance(iter); - END - } - END - RETURN total; + MUTABLE total = 0; + TIGHT FOR outer IN (0 ..< iters) DO + WITH BORROWED data AS ref { + MUTABLE iter = SliceIter{ source: ref, pos: 0, len: n }; + TIGHT WHILE iter.hasNext() DO + total = total %+ iter.currentVal(); + iter = iter.advance(); + END + } + END + RETURN total; END # Benchmark 2: Raw indexed loop (baseline) FN benchRawLoop(data: Int64[], n: Int64, iters: Int64) RETURNS Int64 -> - MUTABLE total: Int64 = 0; - FOR outer IN (0_i64 ..< iters) DO - FOR i IN (0_i64 ..< n) DO - total = total %+ data[i]; - END + MUTABLE total = 0; + TIGHT FOR outer IN (0 ..< iters) DO + TIGHT FOR i IN (0 ..< n) DO + total = total %+ data[i]; END - RETURN total; + END + RETURN total; END FN main() RETURNS Void -> - n: Int64 = 10000; - iters: Int64 = 1000; + n = 10_000; + iters = 1000; - # Build data array - MUTABLE data: Int64[10000]@list = []; - FOR i IN (0_i64 ..< n) -> data.append(i %* 7 %+ 13); + # Build data array + MUTABLE data: Int64[10_000]@list = []; + TIGHT FOR i IN (0 ..< n) -> data.append(i %* 7 %+ 13); - # Warm up - x = benchRawLoop(data, n, 1); + # Warm up + x = benchRawLoop(data, n, 1); - # Benchmark raw loop - t0 = timestampMs(); - result1 = benchRawLoop(data, n, iters); - t1 = timestampMs(); - rawMs = t1 - t0; + # Benchmark raw loop + t0 = timestampMs(); + result1 = benchRawLoop(data, n, iters); + t1 = timestampMs(); + rawMs = t1 - t0; - # Benchmark borrowed iterator - t2 = timestampMs(); - result2 = benchBorrowed(data, n, iters); - t3 = timestampMs(); - iterMs = t3 - t2; + # Benchmark borrowed iterator + t2 = timestampMs(); + result2 = benchBorrowed(data, n, iters); + t3 = timestampMs(); + iterMs = t3 - t2; - ASSERT result1 == result2; + ASSERT result1 == result2; - print("BENCH_RESULT: ${iterMs.toString()} ms"); - print("Iterator benchmark (${n.toString()} elements x ${iters.toString()} iters)"); - print(" Iterator: ${iterMs.toString()} ms"); - print(" Raw loop: ${rawMs.toString()} ms"); - RETURN; + print("BENCH_RESULT: ${iterMs.toString()} ms"); + print("Iterator benchmark (${n.toString()} elements x ${iters.toString()} iters)"); + print(" Iterator: ${iterMs.toString()} ms"); + print(" Raw loop: ${rawMs.toString()} ms"); + RETURN; END diff --git a/benchmarks/server/01_tcp_kvstore/server.cht b/benchmarks/server/01_tcp_kvstore/server.cht index c0c9b9c62..93345765b 100644 --- a/benchmarks/server/01_tcp_kvstore/server.cht +++ b/benchmarks/server/01_tcp_kvstore/server.cht @@ -9,150 +9,155 @@ # Bench: redis-benchmark -p 6390 -t set,get,incr -n 100000 -c 50 -P 16 FN handleClient!(client: TCPClient, MUTABLE store: HashMap, MUTABLE counters: HashMap) RETURNS !Void -> - MUTABLE running: Bool = TRUE; - WHILE running DO - data: String@raw = tcpRead(client); - IF data.length() == 0 THEN - running = FALSE; - ELSE - # Batch all responses into one string, write once at the end. - MUTABLE resp = ""; - MUTABLE pos: Int64 = 0; - WHILE pos < data.length() && running DO - MUTABLE ch = charAt(data, pos); - IF ch == "*" THEN - # RESP array: *N\r\n$L\r\nBULK\r\n... - pos += 1; - MUTABLE crPos = pos; - WHILE crPos < data.length() && charAt(data, crPos) != "\r" DO - crPos += 1; - END - MUTABLE countStr = substr(data, pos, crPos - pos); - MUTABLE argCount = toInt(toNumber(countStr) OR (0.0 - 1.0)); - pos = crPos + 2; + MUTABLE running = TRUE; + WHILE running DO + data: String@raw = client.tcpRead(); + IF data.empty?() THEN + running = FALSE; + ELSE + # Batch all responses into one string, write once at the end. + MUTABLE resp = ""; + MUTABLE pos = 0; + WHILE pos < data.length() && running DO + ch = data.charAt(pos); + IF ch == "*" THEN + # RESP array: *N\r\n$L\r\nBULK\r\n... + pos += 1; + MUTABLE crPos = pos; + WHILE crPos < data.length() && data.charAt(crPos) != "\r" DO + crPos += 1; + END + MUTABLE countStr = data.substr(pos, crPos - pos); + MUTABLE argCount = (countStr.toNumber() OR (0.0 - 1.0)).toInt(); + pos = crPos + 2; - MUTABLE arg0 = ""; - MUTABLE arg1 = ""; - MUTABLE arg2 = ""; - MUTABLE ai: Int64 = 0; - WHILE ai < argCount && pos < data.length() DO - IF charAt(data, pos) == "$" THEN - pos += 1; - MUTABLE crPos2 = pos; - WHILE crPos2 < data.length() && charAt(data, crPos2) != "\r" DO - crPos2 += 1; - END - MUTABLE lenStr = substr(data, pos, crPos2 - pos); - MUTABLE len = toInt(toNumber(lenStr) OR (0.0 - 1.0)); - pos = crPos2 + 2; - MUTABLE val = substr(data, pos, len); - pos += len; - pos += 2; - IF ai == 0 THEN arg0 = val; - ELSE_IF ai == 1 THEN arg1 = val; - ELSE_IF ai == 2 THEN arg2 = val; - END - END - ai += 1; - END + MUTABLE arg0 = ""; + MUTABLE arg1 = ""; + MUTABLE arg2 = ""; + MUTABLE ai = 0; + WHILE ai < argCount && pos < data.length() DO + IF data.charAt(pos) == "$" THEN + pos += 1; + MUTABLE crPos2 = pos; + WHILE crPos2 < data.length() && data.charAt(crPos2) != "\r" DO + crPos2 += 1; + END + MUTABLE lenStr = data.substr(pos, crPos2 - pos); + MUTABLE len = (lenStr.toNumber() OR (0.0 - 1.0)).toInt(); + pos = crPos2 + 2; + MUTABLE val = data.substr(pos, len); + pos += len; + pos += 2; + IF ai == 0 THEN + arg0 = val; + ELSE_IF ai == 1 THEN + arg1 = val; + ELSE_IF ai == 2 THEN + arg2 = val; + END + END + ai += 1; + END - # Dispatch command — append to response buffer - IF arg0 == "SET" || arg0 == "set" THEN - store[arg1] = arg2; - resp = resp + "+OK\r\n"; - ELSE_IF arg0 == "GET" || arg0 == "get" THEN - MUTABLE result = store[arg1] OR ""; - IF result.length() > 0 THEN - resp = resp + "$${result.length().toString()}\r\n${result}\r\n"; - ELSE - resp = resp + "$-1\r\n"; - END - ELSE_IF arg0 == "INCR" || arg0 == "incr" THEN - IF arg1.length() > 0 THEN - newVal = (counters[arg1] OR 0_i64) + 1; - counters[arg1] = newVal; - resp = resp + ":${newVal.toString()}\r\n"; - ELSE - resp = resp + "-ERR wrong number of arguments for 'INCR'\r\n"; - END - ELSE_IF arg0 == "DECR" || arg0 == "decr" THEN - IF arg1.length() > 0 THEN - newVal = (counters[arg1] OR 0_i64) - 1; - counters[arg1] = newVal; - resp = resp + ":${newVal.toString()}\r\n"; - ELSE - resp = resp + "-ERR wrong number of arguments for 'DECR'\r\n"; - END - ELSE_IF arg0 == "PING" || arg0 == "ping" THEN - resp = resp + "+PONG\r\n"; - ELSE_IF arg0 == "COMMAND" || arg0 == "command" THEN - resp = resp + "*0\r\n"; - ELSE_IF arg0 == "CONFIG" || arg0 == "config" THEN - # CONFIG GET returns empty list; CONFIG SET returns OK - IF arg1 == "SET" || arg1 == "set" THEN - resp = resp + "+OK\r\n"; - ELSE - resp = resp + "*0\r\n"; - END - ELSE_IF arg0 == "CLIENT" || arg0 == "client" THEN - resp = resp + "+OK\r\n"; - ELSE_IF arg0 == "FLUSHALL" || arg0 == "flushall" THEN - resp = resp + "+OK\r\n"; - ELSE_IF arg0 == "FLUSHDB" || arg0 == "flushdb" THEN - resp = resp + "+OK\r\n"; - ELSE_IF arg0 == "DBSIZE" || arg0 == "dbsize" THEN - resp = resp + ":0\r\n"; - ELSE_IF arg0 == "QUIT" || arg0 == "quit" THEN - resp = resp + "+OK\r\n"; - running = FALSE; - ELSE - resp = resp + "-ERR unknown command '${arg0}'\r\n"; - END - ELSE_IF ch == "\r" || ch == "\n" THEN - pos += 1; - ELSE - # Inline command: scan to \r or \n, extract with substr - MUTABLE cmdEnd = pos; - WHILE cmdEnd < data.length() && charAt(data, cmdEnd) != "\r" && charAt(data, cmdEnd) != "\n" DO - cmdEnd += 1; - END - MUTABLE cmd = substr(data, pos, cmdEnd - pos); - pos = cmdEnd; - WHILE pos < data.length() && (charAt(data, pos) == "\r" || charAt(data, pos) == "\n") DO - pos += 1; - END - IF cmd == "PING" || cmd == "ping" THEN - resp = resp + "+PONG\r\n"; - ELSE_IF cmd == "READY?" THEN - resp = resp + "+READY\r\n"; - ELSE_IF cmd == "QUIT" || cmd == "quit" THEN - resp = resp + "+OK\r\n"; - running = FALSE; - ELSE - resp = resp + "-ERR unknown command '${cmd}'\r\n"; - END - END + # Dispatch command — append to response buffer + IF arg0 == "SET" || arg0 == "set" THEN + store[arg1] = arg2; + resp = resp + "+OK\r\n"; + ELSE_IF arg0 == "GET" || arg0 == "get" THEN + MUTABLE result = store[arg1] OR ""; + IF result.any?() THEN + resp = resp + "$${result.length().toString()}\r\n${result}\r\n"; + ELSE + resp = resp + "$-1\r\n"; + END + ELSE_IF arg0 == "INCR" || arg0 == "incr" THEN + IF arg1.any?() THEN + newVal = (counters[arg1] OR 0) + 1; + counters[arg1] = newVal; + resp = resp + ":${newVal.toString()}\r\n"; + ELSE + resp = resp + "-ERR wrong number of arguments for 'INCR'\r\n"; + END + ELSE_IF arg0 == "DECR" || arg0 == "decr" THEN + IF arg1.any?() THEN + newVal = (counters[arg1] OR 0) - 1; + counters[arg1] = newVal; + resp = resp + ":${newVal.toString()}\r\n"; + ELSE + resp = resp + "-ERR wrong number of arguments for 'DECR'\r\n"; END - # Single write for ALL responses in this batch - IF resp.length() > 0 THEN - tcpWrite(client, resp); + ELSE_IF arg0 == "PING" || arg0 == "ping" THEN + resp = resp + "+PONG\r\n"; + ELSE_IF arg0 == "COMMAND" || arg0 == "command" THEN + resp = resp + "*0\r\n"; + ELSE_IF arg0 == "CONFIG" || arg0 == "config" THEN + # CONFIG GET returns empty list; CONFIG SET returns OK + IF arg1 == "SET" || arg1 == "set" THEN + resp = resp + "+OK\r\n"; + ELSE + resp = resp + "*0\r\n"; END + ELSE_IF arg0 == "CLIENT" || arg0 == "client" THEN + resp = resp + "+OK\r\n"; + ELSE_IF arg0 == "FLUSHALL" || arg0 == "flushall" THEN + resp = resp + "+OK\r\n"; + ELSE_IF arg0 == "FLUSHDB" || arg0 == "flushdb" THEN + resp = resp + "+OK\r\n"; + ELSE_IF arg0 == "DBSIZE" || arg0 == "dbsize" THEN + resp = resp + ":0\r\n"; + ELSE_IF arg0 == "QUIT" || arg0 == "quit" THEN + resp = resp + "+OK\r\n"; + running = FALSE; + ELSE + resp = resp + "-ERR unknown command '${arg0}'\r\n"; + END + ELSE_IF ch == "\r" || ch == "\n" THEN + pos += 1; + ELSE + # Inline command: scan to \r or \n, extract with substr + MUTABLE cmdEnd = pos; + WHILE cmdEnd < data.length() && data.charAt(cmdEnd) != "\r" && data.charAt(cmdEnd) != "\n" DO + cmdEnd += 1; + END + MUTABLE cmd = data.substr(pos, cmdEnd - pos); + pos = cmdEnd; + WHILE pos < data.length() && (data.charAt(pos) == "\r" || data.charAt(pos) == "\n") DO + pos += 1; + END + IF cmd == "PING" || cmd == "ping" THEN + resp = resp + "+PONG\r\n"; + ELSE_IF cmd == "READY?" THEN + resp = resp + "+READY\r\n"; + ELSE_IF cmd == "QUIT" || cmd == "quit" THEN + resp = resp + "+OK\r\n"; + running = FALSE; + ELSE + resp = resp + "-ERR unknown command '${cmd}'\r\n"; + END END + END + # Single write for ALL responses in this batch + IF resp.any?() THEN + client.tcpWrite(resp); + END END + END END FN main() RETURNS Void -> - MUTABLE store: HashMap@sharded(128):locked = {}; - MUTABLE counters: HashMap@sharded(128):locked = {}; + MUTABLE store: HashMap @sharded(128):locked = {}; + MUTABLE counters: HashMap @sharded(128):locked = {}; - server = TCPServer::listen(6390); - print("CLEAR kvstore listening on port 6390"); + server = TCPServer::listen(6390); + print("CLEAR kvstore listening on port 6390"); - MUTABLE tasks: ~Void[]@list = []; - WHILE TRUE DO - client = accept(server); - tasks.append(BG { - handleClient!(client, store, counters); - }); - END + MUTABLE tasks: ~Void[]@list = []; + WHILE TRUE DO + client = server.accept(); + tasks.append( + BG { + handleClient!(client, store, counters); + } + ); + END END diff --git a/benchmarks/server/02_json_api/server.cht b/benchmarks/server/02_json_api/server.cht index cc084fec4..9b2744d3c 100644 --- a/benchmarks/server/02_json_api/server.cht +++ b/benchmarks/server/02_json_api/server.cht @@ -11,99 +11,107 @@ # # Pure FFI: all native Zig operations declared via EXTERN, no shim files. -EXTERN STRUCT ParseOptions {} FROM "std.json"; -EXTERN STRUCT JsonRecord { id: Int64, data: Int64[] }; -EXTERN STRUCT Parsed { value: JsonRecord } CLOSE "deinit" AS "Parsed(JsonRecord)" FROM "std.json"; -EXTERN FN parseFromSlice(comptime: T, content: String, options: ParseOptions) RETURNS !Parsed EFFECTS :alloc:heap FROM "std.json"; +EXTERN STRUCT ParseOptions { +} FROM "std.json"; +EXTERN STRUCT JsonRecord { + id: Int64, + data: Int64[] +}; +EXTERN STRUCT Parsed { + value: JsonRecord +} CLOSE "deinit" AS "Parsed(JsonRecord)" FROM "std.json"; +EXTERN FN parseFromSlice < T > (comptime: T, content: String, options: ParseOptions) RETURNS !Parsed EFFECTS: alloc: heap FROM "std.json"; EXTERN FN mkdir(path: String, mode: UInt32) RETURNS Int64 FROM "std.c"; FN sizeForId(id: Int64) RETURNS Int64 -> - RETURN ((id * 7 + 13) MOD 997) + 10; + RETURN ((id * 7 + 13) MOD 997) + 10; END FN generateJson(id: Int64) RETURNS !String -> - sz = sizeForId(id); - MUTABLE parts: String[]@list = []; - FOR i IN (1_i64 ..= sz) -> parts.append(i.toString()); - RETURN "{\"id\":${id.toString()},\"data\":[${join(parts, ",")}]}"; + sz = sizeForId(id); + MUTABLE parts: String[]@list = []; + FOR i IN (1 ..= sz) -> parts.append(i.toString()); + RETURN "{\"id\":${id.toString()},\"data\":[${join(parts, ",")}]}"; END FN parseJsonArraySum(content: String) RETURNS !Int64 -> - parsed = parseFromSlice(JsonRecord, content, ParseOptions{}) OR RAISE; - MUTABLE sum: Int64 = 0; - FOR i IN (0_i64 ..< parsed.value.data.length()) DO - sum += parsed.value.data[i]; - END - RETURN sum; + parsed = parseFromSlice(JsonRecord, content, ParseOptions{}) OR RAISE; + MUTABLE sum = 0; + FOR i IN (0 ..< parsed.value.data.length()) DO + sum += parsed.value.data[i]; + END + RETURN sum; END FN handleClient!(client: TCPClient) RETURNS !Void -> - MUTABLE running: Bool = TRUE; - WHILE running DO - data: String@raw = tcpRead(client); - IF data.length() == 0 THEN - running = FALSE; - ELSE - MUTABLE resp = ""; - MUTABLE pos: Int64 = 0; - WHILE pos < data.length() DO - # Find end of line - MUTABLE eol = pos; - WHILE eol < data.length() && charAt(data, eol) != "\r" && charAt(data, eol) != "\n" DO - eol += 1; - END - line = substr(data, pos, eol - pos); + MUTABLE running = TRUE; + WHILE running DO + data: String@raw = client.tcpRead(); + IF data.empty?() THEN + running = FALSE; + ELSE + MUTABLE resp = ""; + MUTABLE pos = 0; + WHILE pos < data.length() DO + # Find end of line + MUTABLE eol = pos; + WHILE eol < data.length() && data.charAt(eol) != "\r" && data.charAt(eol) != "\n" DO + eol += 1; + END + line = data.substr(pos, eol - pos); - # Skip \r\n - pos = eol; - WHILE pos < data.length() && (charAt(data, pos) == "\r" || charAt(data, pos) == "\n") DO - pos += 1; - END + # Skip \r\n + pos = eol; + WHILE pos < data.length() && (data.charAt(pos) == "\r" || data.charAt(pos) == "\n") DO + pos += 1; + END - IF line.length() == 0 THEN - # skip empty lines - ELSE_IF startsWith?(line, "SET:") THEN - idStr = substr(line, 4, line.length() - 4); - id = toInt(toNumber(idStr) OR 0.0); - json = generateJson(id); - writeFile("data/${id.toString()}.json", json); - resp = resp + "+OK\r\n"; - ELSE_IF startsWith?(line, "GET:") THEN - idStr = substr(line, 4, line.length() - 4); - id = toInt(toNumber(idStr) OR 0.0); - content = readFile("data/${id.toString()}.json"); - sum = parseJsonArraySum(content) OR RAISE; - resp = resp + ":${sum.toString()}\r\n"; - ELSE_IF line == "QUIT" THEN - resp = resp + "+OK\r\n"; - running = FALSE; - ELSE_IF line == "READY?" THEN - resp = resp + "+READY\r\n"; - ELSE - resp = resp + "-ERR unknown command\r\n"; - END - END - IF resp.length() > 0 -> tcpWrite(client, resp); + IF line.empty?() THEN + # skip empty lines + ELSE_IF line.startsWith?("SET:") THEN + idStr = line.substr(4, line.length() - 4); + id = (idStr.toNumber() OR 0.0).toInt(); + json = generateJson(id); + ("data/${id.toString()}.json").writeFile(json); + resp = resp + "+OK\r\n"; + ELSE_IF line.startsWith?("GET:") THEN + idStr = line.substr(4, line.length() - 4); + id = (idStr.toNumber() OR 0.0).toInt(); + content = ("data/${id.toString()}.json").readFile(); + sum = parseJsonArraySum(content) OR RAISE; + resp = resp + ":${sum.toString()}\r\n"; + ELSE_IF line == "QUIT" THEN + resp = resp + "+OK\r\n"; + running = FALSE; + ELSE_IF line == "READY?" THEN + resp = resp + "+READY\r\n"; + ELSE + resp = resp + "-ERR unknown command\r\n"; END + END + IF resp.any?() -> client.tcpWrite(resp); END + END END FN main() RETURNS Void -> - mkdir("data", 0o755_u32); - server = TCPServer::listen(6390); - print("CLEAR json-api listening on port 6390"); + mkdir("data", 0o755_u32); + server = TCPServer::listen(6390); + print("CLEAR json-api listening on port 6390"); - # Cache pins BG fibers to the main scheduler, preventing fiber stealing - # which corrupts epoll fd registration. - MUTABLE cache: HashMap@sharded(32):locked = {}; + # Cache pins BG fibers to the main scheduler, preventing fiber stealing + # which corrupts epoll fd registration. + MUTABLE cache: HashMap @sharded(32):locked = {}; - MUTABLE tasks: ~Void[]@list = []; - WHILE TRUE DO - client = accept(server); - tasks.append(BG { - n = cache.count(); - handleClient!(client); - }); - END + MUTABLE tasks: ~Void[]@list = []; + WHILE TRUE DO + client = server.accept(); + tasks.append( + BG { + n = cache.count(); + handleClient!(client); + } + ); + END END diff --git a/benchmarks/server/03_pathological/server.cht b/benchmarks/server/03_pathological/server.cht index 52fe184a8..9e357c0cc 100644 --- a/benchmarks/server/03_pathological/server.cht +++ b/benchmarks/server/03_pathological/server.cht @@ -13,78 +13,82 @@ # Pure compute (no FFI, no onRootStack). Tests scheduler quality directly. FN heavyCompute(seed: Int64, n: Int64) RETURNS Int64 -> - MUTABLE x: Int64 = seed; - FOR i IN (0_i64 ..< n) DO - x = x %* 6364136223846793005_i64 %+ 1442695040888963407_i64; - x = x %* x %+ 1_i64; - END - IF x < 0 -> x = 0 - x; - RETURN x MOD 1000000000; + MUTABLE x = seed; + FOR i IN (0 ..< n) DO + x = x %* 6_364_136_223_846_793_005 %+ 1_442_695_040_888_963_407; + x = x %* x %+ 1; + END + IF x < 0 -> x = 0 - x; + RETURN x MOD 1_000_000_000; END FN handleClient!(client: TCPClient) RETURNS !Void -> - MUTABLE running: Bool = TRUE; - WHILE running DO - data: String@raw = tcpRead(client); - IF data.length() == 0 THEN - running = FALSE; - ELSE - MUTABLE resp = ""; - MUTABLE pos: Int64 = 0; - WHILE pos < data.length() DO - MUTABLE eol = pos; - WHILE eol < data.length() && charAt(data, eol) != "\r" && charAt(data, eol) != "\n" DO - eol += 1; - END - line = substr(data, pos, eol - pos); - pos = eol; - WHILE pos < data.length() && (charAt(data, pos) == "\r" || charAt(data, pos) == "\n") DO - pos += 1; - END + MUTABLE running = TRUE; + WHILE running DO + data: String@raw = client.tcpRead(); + IF data.empty?() THEN + running = FALSE; + ELSE + MUTABLE resp = ""; + MUTABLE pos = 0; + WHILE pos < data.length() DO + MUTABLE eol = pos; + WHILE eol < data.length() && data.charAt(eol) != "\r" && data.charAt(eol) != "\n" DO + eol += 1; + END + line = data.substr(pos, eol - pos); + pos = eol; + WHILE pos < data.length() && (data.charAt(pos) == "\r" || data.charAt(pos) == "\n") DO + pos += 1; + END - IF line.length() == 0 THEN - # skip - ELSE_IF startsWith?(line, "WORK:") THEN - rest: String@raw = substr(line, 5_i64, line.length() - 5); - MUTABLE colonPos: Int64 = 0; - WHILE colonPos < rest.length() && charAt(rest, colonPos) != ":" DO - colonPos += 1; - END - idStr = substr(rest, 0_i64, colonPos); - nStr = substr(rest, colonPos + 1, rest.length() - colonPos - 1); - MUTABLE id = toInt(toNumber(idStr) OR 0.0); - MUTABLE n = toInt(toNumber(nStr) OR 0.0); - IF n < 1 THEN n = 1; END - result = heavyCompute(id, n); - resp = resp + ":${result.toString()}\r\n"; - ELSE_IF line == "QUIT" THEN - resp = resp + "+OK\r\n"; - running = FALSE; - ELSE_IF line == "READY?" THEN - resp = resp + "+READY\r\n"; - ELSE - resp = resp + "-ERR unknown command\r\n"; - END - END - IF resp.length() > 0 -> tcpWrite(client, resp); + IF line.empty?() THEN + # skip + ELSE_IF line.startsWith?("WORK:") THEN + rest: String@raw = line.substr(5, line.length() - 5); + MUTABLE colonPos = 0; + WHILE colonPos < rest.length() && rest.charAt(colonPos) != ":" DO + colonPos += 1; + END + idStr = rest.substr(0, colonPos); + nStr = rest.substr(colonPos + 1, rest.length() - colonPos - 1); + MUTABLE id = (idStr.toNumber() OR 0.0).toInt(); + MUTABLE n = (nStr.toNumber() OR 0.0).toInt(); + IF n < 1 THEN + n = 1; + END + result = heavyCompute(id, n); + resp = resp + ":${result.toString()}\r\n"; + ELSE_IF line == "QUIT" THEN + resp = resp + "+OK\r\n"; + running = FALSE; + ELSE_IF line == "READY?" THEN + resp = resp + "+READY\r\n"; + ELSE + resp = resp + "-ERR unknown command\r\n"; END + END + IF resp.any?() -> client.tcpWrite(resp); END + END END FN main() RETURNS Void -> - server = TCPServer::listen(6390); - print("CLEAR pathological server listening on port 6390"); + server = TCPServer::listen(6390); + print("CLEAR pathological server listening on port 6390"); - # Pin BG fibers to the main scheduler to prevent work-stealing - # which corrupts epoll fd registration. - MUTABLE pin: HashMap@sharded(32):locked = {}; + # Pin BG fibers to the main scheduler to prevent work-stealing + # which corrupts epoll fd registration. + MUTABLE pin: HashMap @sharded(32):locked = {}; - MUTABLE tasks: ~Void[]@list = []; - WHILE TRUE DO - client = accept(server); - tasks.append(BG { - n = pin.count(); - handleClient!(client); - }); - END + MUTABLE tasks: ~Void[]@list = []; + WHILE TRUE DO + client = server.accept(); + tasks.append( + BG { + n = pin.count(); + handleClient!(client); + } + ); + END END diff --git a/clear b/clear index e29c2066e..49a441b81 100755 --- a/clear +++ b/clear @@ -1157,6 +1157,7 @@ when 'fix' require_relative 'src/ast/fixable_error' require_relative 'src/ast/syntax_typo_scanner' require_relative 'src/tools/predicate_rewriter' + require_relative 'src/tools/multi_statement_linter' require_relative 'src/annotator' args = ARGV[1..] || [] @@ -1192,6 +1193,7 @@ when 'fix' begin SyntaxTypoScanner.scan!(src) PredicateRewriter.lint!(src) + MultiStatementLinter.lint!(src) begin tokens = Lexer.new(src).tokenize ast = Parser.new(tokens, src).parse @@ -1408,6 +1410,7 @@ when 'fmt', 'format' args = ARGV[1..] || [] check_only = false to_stdout = false + verify_mode = false paths = [] warn_width = true @@ -1415,13 +1418,37 @@ when 'fmt', 'format' case a when '--check' then check_only = true when '--stdout' then to_stdout = true + when '--verify' then verify_mode = true when '--no-warn' then warn_width = false when /\A-/ then error "Unknown flag for fmt: #{a}" else paths << a end end - error "Usage: clear fmt [--check|--stdout] ..." if paths.empty? + error "Usage: clear fmt [--check|--stdout|--verify] ..." if paths.empty? + + # --verify: confirm fmt is semantics-preserving by transpiling the + # original AND the formatted source to Zig and comparing byte-for-byte. + # Useful for sweeping benchmarks/examples before applying fmt in bulk. + # Returns exit status = number of files where output differed (or + # transpilation raised). Does not write files. + if verify_mode + require_relative 'src/tools/fmt_verifier' + files = [] + paths.each do |p| + if File.directory?(p) + files.concat(Dir.glob(File.join(p, '**', '*.cht')).sort) + elsif File.file?(p) + files << p + else + error "No such file or directory: #{p}" + end + end + error "No .cht files found" if files.empty? + results = files.map { |f| FmtVerifier.verify(f) } + fail_count = FmtVerifier.report(results) + exit(fail_count.zero? ? 0 : 3) + end files = [] paths.each do |p| diff --git a/examples/concurrent_reports/reports.cht b/examples/concurrent_reports/reports.cht index a9743d90b..01024aaf6 100644 --- a/examples/concurrent_reports/reports.cht +++ b/examples/concurrent_reports/reports.cht @@ -12,88 +12,88 @@ # This is the highest-risk untested intersection in the compiler. STRUCT Report { - name: String, - itemCount: Int64, - total: Int64 + name: String, + itemCount: Int64, + total: Int64 } # Build a report: sum values from a delimited string. # Exercises: string ops (split, toInt), frame allocation, struct return. FN buildReport(name: String, data: String) RETURNS !Report -> - parts = split(data, ","); - MUTABLE sum: Int64 = 0; - FOR i IN (0_i64 ..< parts.length()) DO - val = toInt(parts[i]); - sum += val; - END - RETURN Report{ name: COPY name, itemCount: parts.length(), total: sum }; + parts = data.split(","); + MUTABLE sum = 0; + FOR i IN (0 ..< parts.length()) DO + val = parts[i].toInt(); + sum += val; + END + RETURN Report{ name: COPY name, itemCount: parts.length(), total: sum }; END # Build a report that can fail. # If data is empty, raises an error. Tests error propagation through BG. FN buildReportOrFail(name: String, data: String) RETURNS !Report -> - IF data.length() == 0 -> RAISE "empty data for report: ${name}"; - parts = split(data, ","); - MUTABLE sum: Int64 = 0; - FOR i IN (0_i64 ..< parts.length()) DO - val = toInt(parts[i]); - sum += val; - END - RETURN Report{ name: COPY name, itemCount: parts.length(), total: sum }; + IF data.empty?() -> RAISE "empty data for report: ${name}"; + parts = data.split(","); + MUTABLE sum = 0; + FOR i IN (0 ..< parts.length()) DO + val = parts[i].toInt(); + sum += val; + END + RETURN Report{ name: COPY name, itemCount: parts.length(), total: sum }; END FN main() RETURNS Void -> - # ------------------------------------------------------- - # Test 1: Concurrent struct returns with string fields - # ------------------------------------------------------- - r1: ~Report = BG { @large -> - buildReport("alpha", "10,20,30"); - }; - r2: ~Report = BG { @large -> - buildReport("beta", "5,15,25,35"); - }; - r3: ~Report = BG { @large -> - buildReport("gamma", "100"); - }; + # ------------------------------------------------------- + # Test 1: Concurrent struct returns with string fields + # ------------------------------------------------------- + r1: ~Report = BG {@large -> + buildReport("alpha", "10,20,30"); + }; + r2: ~Report = BG {@large -> + buildReport("beta", "5,15,25,35"); + }; + r3: ~Report = BG {@large -> + buildReport("gamma", "100"); + }; - report1: Report = NEXT r1; - report2: Report = NEXT r2; - report3: Report = NEXT r3; + report1 = NEXT r1; + report2 = NEXT r2; + report3 = NEXT r3; - ASSERT report1.name == "alpha", "report1 name"; - ASSERT report1.itemCount == 3, "report1 items"; - ASSERT report1.total == 60, "report1 total"; + ASSERT report1.name == "alpha", "report1 name"; + ASSERT report1.itemCount == 3, "report1 items"; + ASSERT report1.total == 60, "report1 total"; - ASSERT report2.name == "beta", "report2 name"; - ASSERT report2.itemCount == 4, "report2 items"; - ASSERT report2.total == 80, "report2 total"; + ASSERT report2.name == "beta", "report2 name"; + ASSERT report2.itemCount == 4, "report2 items"; + ASSERT report2.total == 80, "report2 total"; - ASSERT report3.name == "gamma", "report3 name"; - ASSERT report3.itemCount == 1, "report3 items"; - ASSERT report3.total == 100, "report3 total"; + ASSERT report3.name == "gamma", "report3 name"; + ASSERT report3.itemCount == 1, "report3 items"; + ASSERT report3.total == 100, "report3 total"; - # ------------------------------------------------------- - # Test 2: Error propagation through BG fiber - # ------------------------------------------------------- - # Launch a fiber that will error (empty data). - errFiber: ~Report = BG { @large -> - buildReportOrFail("bad", "") OR Report{ name: "fallback", itemCount: 0, total: 0 }; - }; + # ------------------------------------------------------- + # Test 2: Error propagation through BG fiber + # ------------------------------------------------------- + # Launch a fiber that will error (empty data). + errFiber: ~Report = BG {@large -> + buildReportOrFail("bad", "") OR Report{ name: "fallback", itemCount: 0, total: 0 }; + }; - errReport: Report = NEXT errFiber; - ASSERT errReport.name == "fallback", "error fiber should use OR fallback"; - ASSERT errReport.total == 0, "error fiber fallback total"; + errReport = NEXT errFiber; + ASSERT errReport.name == "fallback", "error fiber should use OR fallback"; + ASSERT errReport.total == 0, "error fiber fallback total"; - # ------------------------------------------------------- - # Test 3: Mix of success and fallback fibers - # ------------------------------------------------------- - ok: ~Report = BG { @large -> - buildReportOrFail("ok", "1,2,3") OR Report{ name: "err", itemCount: 0, total: 0 }; - }; + # ------------------------------------------------------- + # Test 3: Mix of success and fallback fibers + # ------------------------------------------------------- + ok: ~Report = BG {@large -> + buildReportOrFail("ok", "1,2,3") OR Report{ name: "err", itemCount: 0, total: 0 }; + }; - okReport: Report = NEXT ok; - ASSERT okReport.name == "ok", "success fiber returns real data"; - ASSERT okReport.total == 6, "success fiber total"; + okReport = NEXT ok; + ASSERT okReport.name == "ok", "success fiber returns real data"; + ASSERT okReport.total == 6, "success fiber total"; - print("All concurrent report tests passed!"); + print("All concurrent report tests passed!"); END diff --git a/examples/data_pipeline/pipeline.cht b/examples/data_pipeline/pipeline.cht index 616044e35..3b384717d 100644 --- a/examples/data_pipeline/pipeline.cht +++ b/examples/data_pipeline/pipeline.cht @@ -5,83 +5,84 @@ # aggregation operators on real data. FN generateReadings(n: Int64, seed: Int64) RETURNS !Float64[]@list -> - MUTABLE readings: Float64[]@list = []; - MUTABLE state: Int64 = seed; - FOR i IN (0_i64 ..< n) DO - state = (state * 13_i64 + 7_i64) MOD 10007; - modVal = state MOD 1000; - MUTABLE raw = toFloat(modVal); - IF raw < 0.0 -> raw = 0.0 - raw; - readings.append(raw); - END - RETURN readings; + MUTABLE readings: Float64[]@list = []; + MUTABLE state = seed; + FOR i IN (0 ..< n) DO + state = (state * 13 + 7) MOD 10_007; + modVal = state MOD 1000; + MUTABLE raw = modVal.toFloat(); + IF raw < 0.0 -> raw = 0.0 - raw; + readings.append(raw); + END + RETURN readings; END FN main() RETURNS Void -> - readings = generateReadings(1000, 42); - ASSERT readings.length() == 1000, "generated 1000 readings"; + readings = generateReadings(1000, 42); + ASSERT readings.length() == 1000, "generated 1000 readings"; - # ------------------------------------------------------- - # Test 1: SELECT — scale all readings by 0.1 - # ------------------------------------------------------- - scaled = readings |> SELECT _ * 0.1; - ASSERT scaled.length() == 1000, "SELECT preserves length"; + # ------------------------------------------------------- + # Test 1: SELECT — scale all readings by 0.1 + # ------------------------------------------------------- + scaled = readings |> SELECT _ * 0.1; + ASSERT scaled.length() == 1000, "SELECT preserves length"; - # ------------------------------------------------------- - # Test 2: WHERE — filter readings above threshold - # ------------------------------------------------------- - high = readings |> WHERE _ > 500.0; - low = readings |> WHERE _ <= 500.0; - ASSERT high.length() + low.length() == 1000, "WHERE partitions correctly"; + # ------------------------------------------------------- + # Test 2: WHERE — filter readings above threshold + # ------------------------------------------------------- + high = readings |> WHERE _ > 500.0; + low = readings |> WHERE _ <= 500.0; + ASSERT high.length() + low.length() == 1000, "WHERE partitions correctly"; - # ------------------------------------------------------- - # Test 3: Chained pipeline — SELECT then WHERE in one expression - # ------------------------------------------------------- - filtered = readings |> SELECT _ * 2.0 |> WHERE _ > 500.0; - ASSERT filtered.length() > 0, "chained SELECT then WHERE"; + # ------------------------------------------------------- + # Test 3: Chained pipeline — SELECT then WHERE in one expression + # ------------------------------------------------------- + filtered = readings + |> SELECT _ * 2.0 + |> WHERE _ > 500.0; + ASSERT filtered.any?(), "chained SELECT then WHERE"; - # ------------------------------------------------------- - # Test 3b: REDUCE — fold to sum - # ------------------------------------------------------- - total = readings |> REDUCE(0.0) acc + _; - ASSERT total > 0.0, "REDUCE sum is positive"; + # ------------------------------------------------------- + # Test 3b: REDUCE — fold to sum + # ------------------------------------------------------- + total = readings |> REDUCE(0.0) acc + _; + ASSERT total > 0.0, "REDUCE sum is positive"; - # ------------------------------------------------------- - # Test 4: Aggregation operators - # ------------------------------------------------------- - s = readings |> SUM _; - ASSERT s > 0.0, "SUM is positive"; - avg = readings |> AVERAGE _; - ASSERT avg > 0.0, "AVERAGE is positive"; - mn = readings |> MIN _; - mx = readings |> MAX _; - ASSERT mn <= avg, "MIN <= AVERAGE"; - ASSERT mx >= avg, "MAX >= AVERAGE"; + # ------------------------------------------------------- + # Test 4: Aggregation operators + # ------------------------------------------------------- + s = readings |> SUM _; + ASSERT s > 0.0, "SUM is positive"; + avg = readings |> AVERAGE _; + ASSERT avg > 0.0, "AVERAGE is positive"; + mn = readings |> MIN _; + mx = readings |> MAX _; + ASSERT mn <= avg, "MIN <= AVERAGE"; + ASSERT mx >= avg, "MAX >= AVERAGE"; - # ------------------------------------------------------- - # Test 5: CONCURRENT SELECT — parallel transform - # ------------------------------------------------------- - small = generateReadings(100, 7); - doubled = small |> CONCURRENT(workers: 4) SELECT _ * 2.0; - ASSERT doubled.length() == 100, "CONCURRENT SELECT preserves length"; + # ------------------------------------------------------- + # Test 5: CONCURRENT SELECT — parallel transform + # ------------------------------------------------------- + small = generateReadings(100, 7); + doubled = small |> CONCURRENT(workers: 4) SELECT _ * 2.0; + ASSERT doubled.length() == 100, "CONCURRENT SELECT preserves length"; - # ------------------------------------------------------- - # Test 6: CONCURRENT WHERE — parallel filter - # ------------------------------------------------------- - filtered2 = small |> CONCURRENT(workers: 4) WHERE _ > 300.0; - ASSERT filtered2.length() > 0, "CONCURRENT WHERE produces results"; - ASSERT filtered2.length() < 100, "CONCURRENT WHERE filters some items"; + # ------------------------------------------------------- + # Test 6: CONCURRENT WHERE — parallel filter + # ------------------------------------------------------- + filtered2 = small |> CONCURRENT(workers: 4) WHERE _ > 300.0; + ASSERT filtered2.any?(), "CONCURRENT WHERE produces results"; + ASSERT filtered2.length() < 100, "CONCURRENT WHERE filters some items"; - # ------------------------------------------------------- - # Test 7: SHARD pipeline — partitioned map operations - # ------------------------------------------------------- - MUTABLE buckets: HashMap@sharded(4) = {}; - n = 200_i64; - (0.. SHARD("b:${toString(_ MOD 10)}", buckets) |> CONCURRENT EACH { - cur = buckets[_] OR 0_i64; - buckets[_] = cur + 1; - }; - ASSERT buckets.count() == 10, "SHARD created 10 buckets"; + # ------------------------------------------------------- + # Test 7: SHARD pipeline — partitioned map operations + # ------------------------------------------------------- + MUTABLE buckets: HashMap @sharded(4) = {}; + n = 200; + (0 ..< n) + |> SHARD("b:${toString(_ MOD 10)}", buckets) + |> CONCURRENT EACH {cur = buckets[_] OR 0; buckets[_] = cur + 1;}; + ASSERT buckets.count() == 10, "SHARD created 10 buckets"; - print("All data pipeline tests passed!"); + print("All data pipeline tests passed!"); END diff --git a/examples/footguns/01_use_after_free/main.cht b/examples/footguns/01_use_after_free/main.cht index c6977f253..adb0413de 100644 --- a/examples/footguns/01_use_after_free/main.cht +++ b/examples/footguns/01_use_after_free/main.cht @@ -10,30 +10,35 @@ # stored in a longer-lived container), the compiler promotes it to the # heap at declaration time — no dangling frame reference is possible. -STRUCT Player { name: String, score: Int64 } +STRUCT Player { + name: String, + score: Int64 +} # Takes ownership of the player; it is freed when this function returns. FN consume(TAKES p: Player) RETURNS Void -> - RETURN; + RETURN; END FN main() RETURNS Void -> - p = Player{ name: "Alice", score: 100 }; - print(p.name); # OK: p is still owned here + p = Player{ name: "Alice", score: 100 }; + print(p.name); + # OK: p is still owned here - consume(GIVE p); # ownership transferred; p is gone + consume(GIVE p); + # ownership transferred; p is gone - # print(p.name); -- ERROR: 'p' was moved on the line above - # -- "variable 'p' used after GIVE" + # print(p.name); -- ERROR: 'p' was moved on the line above + # -- "variable 'p' used after GIVE" - # ---------------------------------------------------------------- - # Frame escape: CLEAR detects when a frame value outlives its scope - # and automatically promotes it to the heap. No dangling pointers. + # ---------------------------------------------------------------- + # Frame escape: CLEAR detects when a frame value outlives its scope + # and automatically promotes it to the heap. No dangling pointers. - items: []Int64 = [10, 20, 30]; - # Returning 'items' from this scope is safe: escape analysis sees - # it must outlive the frame and allocates it on the heap directly. - # The "frame then promote" pattern that causes UAF in C never occurs. + items: Int64[] = [10, 20, 30]; + # Returning 'items' from this scope is safe: escape analysis sees + # it must outlive the frame and allocates it on the heap directly. + # The "frame then promote" pattern that causes UAF in C never occurs. - print("use_after_free: CLEAR prevents this at compile time"); + print("use_after_free: CLEAR prevents this at compile time"); END diff --git a/examples/footguns/02_data_race/main.cht b/examples/footguns/02_data_race/main.cht index 100ced4f7..2abb3b2d8 100644 --- a/examples/footguns/02_data_race/main.cht +++ b/examples/footguns/02_data_race/main.cht @@ -8,36 +8,38 @@ # A raw mutable local cannot be captured by concurrent BG fibers at all: # the escape analysis sees the concurrent capture and requires @shared. -STRUCT Counter { value: Int64 } +STRUCT Counter { + value: Int64 +} FN main() RETURNS Void -> - # @shared:locked = Arc>. Mutation requires WITH EXCLUSIVE. - c = Counter{ value: 0 } @shared:locked; + # @shared:locked = Arc>. Mutation requires WITH EXCLUSIVE. + c = Counter{ value: 0 } @shared:locked; - p1 = BG { - REPEAT 1000000 TIMES - WITH EXCLUSIVE c AS inner { - inner.value = inner.value + 1; - } - END - }; + p1 = BG { + FOR i IN (0 ..< 1_000_000) DO + WITH EXCLUSIVE c AS inner { + inner.value = inner.value + 1; + } + END + }; - p2 = BG { - REPEAT 1000000 TIMES - WITH EXCLUSIVE c AS inner { - inner.value = inner.value + 1; - } - END - }; + p2 = BG { + FOR i IN (0 ..< 1_000_000) DO + WITH EXCLUSIVE c AS inner { + inner.value = inner.value + 1; + } + END + }; - NEXT p1; - NEXT p2; + NEXT p1; + NEXT p2; - WITH c AS inner { - # Always 2000000: WITH EXCLUSIVE serializes all mutations. - ASSERT inner.value == 2000000, "no data race"; - print(inner.value.str()); - } + WITH c AS inner { + # Always 2000000: WITH EXCLUSIVE serializes all mutations. + ASSERT inner.value == 2_000_000, "no data race"; + print(inner.value.str()); + } END # The broken version does not compile: diff --git a/examples/footguns/03_memory_leak/main.cht b/examples/footguns/03_memory_leak/main.cht index 34d20462a..3c5cf3825 100644 --- a/examples/footguns/03_memory_leak/main.cht +++ b/examples/footguns/03_memory_leak/main.cht @@ -7,45 +7,48 @@ # The C pattern — allocate, then return early without freeing — is caught # before the binary is produced. No valgrind needed. -FN process(input: String) RETURNS String OR RAISE -> - buf: String = "processed: " + input; # heap allocation +FN process(input: String) RETURNS !String -> + buf: String = "processed: " + input; + # heap allocation - IF input.startsWith("!") THEN - RAISE "bad input"; - # ERROR (if buf had no cleanup): MIRChecker: LEAK - # 'buf' allocated on line above has no cleanup on this path. - # CLEAR inserts the cleanup automatically (like defer in Go), - # so in practice this compiles and buf is freed on the error path. - END + IF input.startsWith("!") THEN + RAISE "bad input"; + # ERROR (if buf had no cleanup): MIRChecker: LEAK + # 'buf' allocated on line above has no cleanup on this path. + # CLEAR inserts the cleanup automatically (like defer in Go), + # so in practice this compiles and buf is freed on the error path. + END - RETURN buf; # ownership transferred to caller; buf not freed here + RETURN buf; + # ownership transferred to caller; buf not freed here END -FN main() RETURNS Void -> - # Happy path: buf ownership moves to result, freed when result goes - # out of scope. - result = process("hello") OR RAISE; - print(result); +FN main() RETURNS !Void -> + # Happy path: buf ownership moves to result, freed when result goes + # out of scope. + result = process("hello") OR RAISE; + print(result); - # Error path: buf inside process() is freed automatically before - # the error propagates. No leak. - process("!bad") OR { - print("error (no leak: CLEAR freed buf on the error path)"); - }; + # Error path (commented out — would propagate the error up): buf + # inside process() is freed automatically before the error + # propagates. No leak. CLEAR's `OR EXIT` / `OR RAISE` are the + # modern shapes; there is no `OR { handler }` block form. + # + # process("!bad") OR RAISE; - # ---------------------------------------------------------------- - # The broken version — what MIRChecker catches: - # - # FN broken(input: String) RETURNS String OR RAISE -> - # buf: String = concat("processed: ", input); -- heap alloc - # IF input.startsWith("!") THEN - # RAISE "bad"; -- ERROR: LEAK — buf has no cleanup here - # END -- MIRChecker: alloc at line N has no - # RETURN buf; -- matching Drop on the RAISE path - # END - # - # This is the same shape as the C bug. CLEAR rejects it at compile - # time; C silently leaks 256 bytes. + # ---------------------------------------------------------------- + # The broken version — what MIRChecker catches: + # + # FN broken(input: String) RETURNS !String -> + # buf: String = concat("processed: ", input); -- heap alloc + # IF input.startsWith("!") THEN + # RAISE "bad"; -- ERROR: LEAK — buf has no cleanup here + # END -- MIRChecker: alloc at line N has no + # RETURN buf; -- matching Drop on the RAISE path + # END + # + # This is the same shape as the C bug. CLEAR rejects it at compile + # time; C silently leaks 256 bytes. - print("memory_leak: CLEAR catches missing cleanup at compile time"); + print("memory_leak: CLEAR catches missing cleanup at compile time"); END diff --git a/examples/footguns/04_task_leak/main.cht b/examples/footguns/04_task_leak/main.cht index 22e3f2739..dbb2ef37f 100644 --- a/examples/footguns/04_task_leak/main.cht +++ b/examples/footguns/04_task_leak/main.cht @@ -10,36 +10,37 @@ # The scheduler also has a built-in timeout mechanism for BG blocks. FN fetch_result() RETURNS String -> - # Simulates work that completes and returns a value. - # In CLEAR, every BG block must terminate; there is no detach. - RETURN "result"; + # Simulates work that completes and returns a value. + # In CLEAR, every BG block must terminate; there is no detach. + RETURN "result"; END FN handle_request(id: Int64) RETURNS Void -> - # BG returns a promise. The promise MUST be awaited (NEXT). - # Dropping it without NEXT is a compile error: - # "BG promise 'p' is never awaited — possible task leak" - p = BG { fetch_result() }; - result = NEXT p; # structured: scope waits here until fiber done - print("request " + id.str() + ": " + result); + # BG returns a promise. The promise MUST be awaited (NEXT). + # Dropping it without NEXT is a compile error: + # "BG promise 'p' is never awaited — possible task leak" + p = BG {fetch_result();}; + result = NEXT p; + # structured: scope waits here until fiber done + print("request " + id.str() + ": " + result); END FN main() RETURNS Void -> - REPEAT 5 TIMES - handle_request(1); - END + FOR i IN (0 ..< 5) DO + handle_request(1); + END - # ---------------------------------------------------------------- - # The broken version does not compile: - # - # FN leaky() RETURNS Void -> - # BG { long_running_work() }; -- ERROR: unawaited BG promise - # END -- "BG result is never used; - # -- did you forget NEXT?" - # - # Even if you ignore the promise, the scheduler waits for the fiber - # before the enclosing function can return. Task leak is structurally - # impossible: the scope is the lifetime of all fibers in it. + # ---------------------------------------------------------------- + # The broken version does not compile: + # + # FN leaky() RETURNS Void -> + # BG { long_running_work() }; -- ERROR: unawaited BG promise + # END -- "BG result is never used; + # -- did you forget NEXT?" + # + # Even if you ignore the promise, the scheduler waits for the fiber + # before the enclosing function can return. Task leak is structurally + # impossible: the scope is the lifetime of all fibers in it. - print("task_leak: CLEAR's structured concurrency prevents this"); + print("task_leak: CLEAR's structured concurrency prevents this"); END diff --git a/examples/footguns/05_toctou/main.cht b/examples/footguns/05_toctou/main.cht index 0670e8327..2ca7883e6 100644 --- a/examples/footguns/05_toctou/main.cht +++ b/examples/footguns/05_toctou/main.cht @@ -6,60 +6,62 @@ # one atomic region by construction, making the correct pattern the # natural one and the broken pattern visibly wrong. -STRUCT Account { balance: Int64 } +STRUCT Account { + balance: Int64 +} FN main() RETURNS Void -> - acct = Account{ balance: 100 } @shared:locked; + acct = Account{ balance: 100 } @shared:locked; - # ---------------------------------------------------------------- - # BROKEN: check in one WITH, act in a separate WITH. - # The lock is released between the two blocks — a concurrent fiber - # can modify balance in the window, exactly as in C/Go/Rust. - # - # WITH acct AS a { - # IF a.balance >= 80 THEN - # -- lock released here; window opens - # END - # } - # WITH EXCLUSIVE acct AS a { - # a.balance = a.balance - 80; -- acting on a stale check - # } - # ---------------------------------------------------------------- + # ---------------------------------------------------------------- + # BROKEN: check in one WITH, act in a separate WITH. + # The lock is released between the two blocks — a concurrent fiber + # can modify balance in the window, exactly as in C/Go/Rust. + # + # WITH acct AS a { + # IF a.balance >= 80 THEN + # -- lock released here; window opens + # END + # } + # WITH EXCLUSIVE acct AS a { + # a.balance = a.balance - 80; -- acting on a stale check + # } + # ---------------------------------------------------------------- - # CORRECT: check and act inside one WITH EXCLUSIVE block. - # The lock is held for the entire check+act, closing the window. - # This is the natural way to write it in CLEAR; the block structure - # makes the atomic scope explicit. + # CORRECT: check and act inside one WITH EXCLUSIVE block. + # The lock is held for the entire check+act, closing the window. + # This is the natural way to write it in CLEAR; the block structure + # makes the atomic scope explicit. - p1 = BG { - WITH EXCLUSIVE acct AS a { - IF a.balance >= 80 THEN - a.balance = a.balance - 80; - print("fiber 1 withdrew 80"); - ELSE - print("fiber 1: insufficient funds"); - END - } - }; + p1 = BG { + WITH EXCLUSIVE acct AS a { + IF a.balance >= 80 THEN + a.balance = a.balance - 80; + print("fiber 1 withdrew 80"); + ELSE + print("fiber 1: insufficient funds"); + END + } + }; - p2 = BG { - WITH EXCLUSIVE acct AS a { - IF a.balance >= 80 THEN - a.balance = a.balance - 80; - print("fiber 2 withdrew 80"); - ELSE - print("fiber 2: insufficient funds"); - END - } - }; + p2 = BG { + WITH EXCLUSIVE acct AS a { + IF a.balance >= 80 THEN + a.balance = a.balance - 80; + print("fiber 2 withdrew 80"); + ELSE + print("fiber 2: insufficient funds"); + END + } + }; - NEXT p1; - NEXT p2; + NEXT p1; + NEXT p2; - WITH acct AS a { - # Always 20: exactly one withdrawal succeeded. - print("balance = " + a.balance.str()); - } + WITH acct AS a { + # Always 20: exactly one withdrawal succeeded. + print("balance = " + a.balance.str()); + } END # Note: CLEAR cannot prevent TOCTOU in general (e.g. filesystem races diff --git a/examples/footguns/06_memory_ordering/main.cht b/examples/footguns/06_memory_ordering/main.cht index ae5d8d033..54b1a0efb 100644 --- a/examples/footguns/06_memory_ordering/main.cht +++ b/examples/footguns/06_memory_ordering/main.cht @@ -12,31 +12,34 @@ # category of "wrong memory ordering" bugs is eliminated from the # language surface. -STRUCT State { message: String } +STRUCT State { + message: String +} FN main() RETURNS Void -> - # @shared:locked = Arc>. All accesses are SC by - # construction. There is no ordering annotation to get wrong. - s = State{ message: "" } @shared:locked; + # @shared:locked = Arc>. All accesses are SC by + # construction. There is no ordering annotation to get wrong. + s = State{ message: "" } @shared:locked; - producer = BG { - WITH EXCLUSIVE s AS inner { - inner.message = "hello from producer"; - # Mutex release here emits a full barrier. The consumer is - # guaranteed to see this write when it acquires the lock. - } - }; + producer = BG { + WITH EXCLUSIVE s AS inner { + inner.message = "hello from producer"; + # Mutex release here emits a full barrier. The consumer is + # guaranteed to see this write when it acquires the lock. + } + }; - consumer = BG { - NEXT producer; # wait for producer to finish - WITH s AS inner { - # Mutex acquire here emits a full barrier. Always sees the - # producer's write regardless of hardware memory model. - print(inner.message); - } - }; + consumer = BG { + NEXT producer; + # wait for producer to finish + WITH s AS inner { + # Mutex acquire here emits a full barrier. Always sees the + # producer's write regardless of hardware memory model. + print(inner.message); + } + }; - NEXT consumer; + NEXT consumer; END # Summary of memory ordering exposure per language: diff --git a/examples/footguns/07_causal_ordering/main.cht b/examples/footguns/07_causal_ordering/main.cht index cd98ec582..ed309a7dc 100644 --- a/examples/footguns/07_causal_ordering/main.cht +++ b/examples/footguns/07_causal_ordering/main.cht @@ -22,40 +22,42 @@ # from two separate WITH blocks, with another fiber modifying it in between. # This is the TOCTOU pattern (see 05_toctou), not a memory ordering issue. -STRUCT Payload { data: String } +STRUCT Payload { + data: String +} FN main() RETURNS Void -> - result = Payload{ data: "" } @shared:locked; + result = Payload{ data: "" } @shared:locked; - # Producer: writes data under the lock. - producer = BG { - WITH EXCLUSIVE result AS r { - r.data = "important result"; - # Mutex release here is a full barrier on all architectures. - } - }; + # Producer: writes data under the lock. + producer = BG { + WITH EXCLUSIVE result AS r { + r.data = "important result"; + # Mutex release here is a full barrier on all architectures. + } + }; - # Relay: waits for producer, then signals consumer. - # NEXT producer ensures all of producer's writes are visible before - # this fiber proceeds. The happens-before chain is preserved because - # NEXT is a synchronization point, not a plain channel forward. - relay = BG { - NEXT producer; - # Nothing to forward: the data lives in 'result'. Consumer - # will see it because WITH EXCLUSIVE issues a mutex acquire. - }; + # Relay: waits for producer, then signals consumer. + # NEXT producer ensures all of producer's writes are visible before + # this fiber proceeds. The happens-before chain is preserved because + # NEXT is a synchronization point, not a plain channel forward. + relay = BG { + NEXT producer; + # Nothing to forward: the data lives in 'result'. Consumer + # will see it because WITH EXCLUSIVE issues a mutex acquire. + }; - # Consumer: waits for relay, then reads data. - consumer = BG { - NEXT relay; - WITH result AS r { - # Mutex acquire here is a full barrier. Guaranteed to see - # the producer's write regardless of hardware memory model. - print("consumer saw: " + r.data); - } - }; + # Consumer: waits for relay, then reads data. + consumer = BG { + NEXT relay; + WITH result AS r { + # Mutex acquire here is a full barrier. Guaranteed to see + # the producer's write regardless of hardware memory model. + print("consumer saw: " + r.data); + } + }; - NEXT consumer; + NEXT consumer; END # Summary: diff --git a/examples/footguns/08_buffer_overflow/main.cht b/examples/footguns/08_buffer_overflow/main.cht index 50fc1547e..51cf21b6d 100644 --- a/examples/footguns/08_buffer_overflow/main.cht +++ b/examples/footguns/08_buffer_overflow/main.cht @@ -14,38 +14,43 @@ # char*. String indexing operates on codepoints, not raw bytes, and is # bounds-checked. There is no strncpy, strncat, or sprintf. -STRUCT ByteBuffer { data: [Int] } +STRUCT ByteBuffer { + data: Int64[] +} FN safe_access() RETURNS Void -> - buf = [0, 0, 0, 0]; # 4-element list + MUTABLE buf: Int64[] = [0, 0, 0, 0]; + # 4-element list - # Out-of-bounds access: - # val = buf[10]; -- runtime panic: index 10 out of bounds (len 4) + # Out-of-bounds access: + # val = buf[10]; -- runtime panic: index 10 out of bounds (len 4) - # Correct: iterate within bounds - i = 0; - WHILE i < buf.len() { - buf[i]! = i; - i = i + 1; - } + # Correct: iterate within bounds + MUTABLE i = 0; + WHILE i < buf.length() DO + buf[i] = i; + i = i + 1; + END - print("buf[0]=" + buf[0].toString()); - print("buf[3]=" + buf[3].toString()); + print("buf[0]=" + buf[0].toString()); + print("buf[3]=" + buf[3].toString()); END FN off_by_one() RETURNS Void -> - arr = [0, 1, 2, 3, 4, 5, 6, 7]; # 8 elements (indices 0-7) + arr: Int64[] = [0, 1, 2, 3, 4, 5, 6, 7]; + # 8 elements (indices 0-7) - # arr[8] would panic: index 8 out of bounds (len 8) - # The C footgun (i <= 8 instead of i < 8) cannot silently succeed. + # arr[8] would panic: index 8 out of bounds (len 8) + # The C footgun (i <= 8 instead of i < 8) cannot silently succeed. - last = arr[arr.len() - 1]; # safe: always the last element - print("arr[7]=" + last.toString()); + last = arr[arr.length() - 1]; + # safe: always the last element + print("arr[7]=" + last.toString()); END FN main() RETURNS Void -> - safe_access(); - off_by_one(); + safe_access(); + off_by_one(); END # Summary: diff --git a/examples/footguns/09_uninitialized_read/main.cht b/examples/footguns/09_uninitialized_read/main.cht index 29242bf81..39a4df8b2 100644 --- a/examples/footguns/09_uninitialized_read/main.cht +++ b/examples/footguns/09_uninitialized_read/main.cht @@ -13,34 +13,38 @@ # (unlike Go). It enforces initialization through static analysis instead # of runtime overhead. The result is the same safety property at lower cost. -STRUCT Triple { a: Int, b: Int, c: Int } +STRUCT Triple { + a: Int64, + b: Int64, + c: Int64 +} -FN conditional_sum(n: Int) RETURNS Int -> - # sum must be assigned on every path. - # In CLEAR there is no "declare without assign" — the assignment IS - # the declaration. A conditional that may not execute cannot leave - # sum uninitialized because sum doesn't exist until it's assigned. - sum = 0; - i = 0; - WHILE i < n { - sum = sum + i; - i = i + 1; - } - RETURN sum; +FN conditional_sum(n: Int64) RETURNS Int64 -> + # sum must be assigned on every path. + # In CLEAR there is no "declare without assign" — the assignment IS + # the declaration. A conditional that may not execute cannot leave + # sum uninitialized because sum doesn't exist until it's assigned. + MUTABLE sum = 0; + MUTABLE i = 0; + WHILE i < n DO + sum = sum + i; + i = i + 1; + END + RETURN sum; END FN init_struct() RETURNS Triple -> - # Every field must be provided in the struct literal. - # Omitting a field is a compile-time error. - RETURN Triple{ a: 1, b: 2, c: 0 }; + # Every field must be provided in the struct literal. + # Omitting a field is a compile-time error. + RETURN Triple{ a: 1, b: 2, c: 0 }; END FN main() RETURNS Void -> - result = conditional_sum(0); - print("conditional_sum(0) = " + result.toString()); + result = conditional_sum(0); + print("conditional_sum(0) = " + result.toString()); - t = init_struct(); - print("t.c = " + t.c.toString()); + t = init_struct(); + print("t.c = " + t.c.toString()); END # Summary: diff --git a/examples/footguns/10_alias_mutation/main.cht b/examples/footguns/10_alias_mutation/main.cht index dc4b08afd..65e01eab6 100644 --- a/examples/footguns/10_alias_mutation/main.cht +++ b/examples/footguns/10_alias_mutation/main.cht @@ -15,52 +15,59 @@ # The C "restrict lie" pattern cannot be expressed: the compiler never # assumes non-aliasing, and the language does not expose raw pointers. -STRUCT Point { x: Int, y: Int } +STRUCT Point { + x: Int, + y: Int +} FN demonstrate_ownership() RETURNS Void -> - p1 = Point{ x: 1, y: 2 }; + p1 = Point{ x: 1, y: 2 }; - # GIVE transfers ownership; p1 is no longer valid after this. - # Using p1 after GIVE is a compile-time error. - # p2 = GIVE p1; - # print(p1.x.toString()); -- compile error: p1 was moved + # GIVE transfers ownership; p1 is no longer valid after this. + # Using p1 after GIVE is a compile-time error. + # p2 = GIVE p1; + # print(p1.x.toString()); -- compile error: p1 was moved - # Without GIVE, structs in CLEAR are implicitly copied for Copy types. - # Point has only Int fields (Copy), so assignment copies the value. - p2 = p1; # copy: p2 is a fresh copy, p1 is still valid - # p2.x = 99; -- mutation requires MUTABLE declaration + # Without GIVE, structs in CLEAR are implicitly copied for Copy types. + # Point has only Int fields (Copy), so assignment copies the value. + p2 = p1; + # copy: p2 is a fresh copy, p1 is still valid + # p2.x = 99; -- mutation requires MUTABLE declaration - MUTABLE p3 = p1; # mutable copy - p3.x = 99; - print("p1.x after p3.x=99: " + p1.x.toString()); # still 1, copy semantics - print("p3.x = " + p3.x.toString()); # 99 + MUTABLE p3 = p1; + # mutable copy + p3.x = 99; + print("p1.x after p3.x=99: " + p1.x.toString()); + # still 1, copy semantics + print("p3.x = " + p3.x.toString()); + # 99 END FN shared_mutation() RETURNS Void -> - # To share mutable state, declare it @shared:locked. - # Access requires WITH EXCLUSIVE (write) or WITH (read). - # You cannot hold a reference AND mutate simultaneously: - # the mutex enforces that at most one accessor holds the lock. - counter = 0 @shared:locked; + # To share mutable state, declare it @shared:locked. + # Access requires WITH EXCLUSIVE (write) or WITH (read). + # You cannot hold a reference AND mutate simultaneously: + # the mutex enforces that at most one accessor holds the lock. + counter = 0 @shared:locked; - writer = BG { - WITH EXCLUSIVE counter AS c { - c! = 42; - # Lock released here. No other accessor can read/write c - # while this block executes. - } - }; + writer = BG { + WITH EXCLUSIVE counter AS c { + c! = 42; + # Lock released here. No other accessor can read/write c + # while this block executes. + } + }; - NEXT writer; + NEXT writer; - WITH counter AS c { - print("counter = " + c.toString()); # 42 - } + WITH counter AS c { + print("counter = " + c.toString()); # 42 + } END FN main() RETURNS Void -> - demonstrate_ownership(); - shared_mutation(); + demonstrate_ownership(); + shared_mutation(); END # Summary: diff --git a/examples/footguns/11_iterator_invalidation/main.cht b/examples/footguns/11_iterator_invalidation/main.cht index 16e38b904..782535a9e 100644 --- a/examples/footguns/11_iterator_invalidation/main.cht +++ b/examples/footguns/11_iterator_invalidation/main.cht @@ -3,8 +3,8 @@ # CLEAR's pipeline-based iteration makes iterator invalidation structurally # impossible for the common cases: # -# 1. Pipeline consumption: `list | filter(...) | map(...)` consumes the -# collection immutably. There is no separate iterator object to +# 1. Pipeline consumption: `list |> WHERE ... |> SELECT ...` consumes +# the collection immutably. There is no separate iterator object to # invalidate — the pipeline is a single expression. # # 2. Ownership prevents concurrent mutation: if you are iterating a list @@ -20,47 +20,49 @@ # produce new collections rather than mutating the source in-place. FN safe_filter() RETURNS Void -> - numbers = [1, 2, 3, 4, 5]; + numbers: Int64[] = [1, 2, 3, 4, 5]; - # Pipeline iteration: produces a new list, never modifies source. - # numbers is not mutated; odds is a fresh allocation. - odds = numbers | filter($n -> n % 2 != 0) | collect(); - print("odds: " + odds.toString()); + # Pipeline iteration: produces a new list, never modifies source. + # numbers is not mutated; odd_count is a fresh value. + odd_count = numbers + |> WHERE (_ MOD 2) != 0 + |> COUNT _; + print("odd_count: " + odd_count.toString()); - # The source list is unchanged. - print("numbers still intact: " + numbers.toString()); + # The source list is unchanged. + print("numbers length: " + numbers.length().toString()); END FN shared_list_access() RETURNS Void -> - items = [10, 20, 30, 40] @shared:locked; + items: Int64[]@list:shared:locked = [10, 20, 30, 40]; - # Reader: iterates while holding the lock. - reader = BG { - WITH items AS list { - # list is locked for read for the duration of this block. - # No other fiber can acquire EXCLUSIVE during this WITH. - sum = list | reduce(0, $acc $n -> acc + n); - print("sum = " + sum.toString()); - } - # Lock released here. - }; + # Reader: iterates while holding the lock. + reader = BG { + WITH items AS list { + # list is locked for read for the duration of this block. + # No other fiber can acquire EXCLUSIVE during this WITH. + sum = list |> REDUCE(0) acc + _; + print("sum = " + sum.toString()); + } + # Lock released here. + }; - # Writer: must wait for reader to release lock before it can modify. - writer = BG { - WITH EXCLUSIVE items AS list { - # Only runs after reader releases lock. - list.append!(50); - print("appended 50"); - } - }; + # Writer: must wait for reader to release lock before it can modify. + writer = BG { + WITH EXCLUSIVE items AS list { + # Only runs after reader releases lock. + list.append(50); + print("appended 50"); + } + }; - NEXT reader; - NEXT writer; + NEXT reader; + NEXT writer; END FN main() RETURNS Void -> - safe_filter(); - shared_list_access(); + safe_filter(); + shared_list_access(); END # Summary: diff --git a/examples/footguns/12_deadlock/main.cht b/examples/footguns/12_deadlock/main.cht index cb42b4fef..36bc9096d 100644 --- a/examples/footguns/12_deadlock/main.cht +++ b/examples/footguns/12_deadlock/main.cht @@ -22,8 +22,12 @@ # CLEAR does NOT yet enforce lock-ordering or re-entrancy at compile time. # That is planned for STRICT EXTREME (v0.3 roadmap). -STRUCT State { value: Int64 } -STRUCT Account { balance: Int64 } +STRUCT State { + value: Int64 +} +STRUCT Account { + balance: Int64 +} # ------------------------------------------------------------------------- # BROKEN 1: Re-entrant locking @@ -71,80 +75,81 @@ STRUCT Account { balance: Int64 } # CORRECT: Resolve promises before acquiring locks # ------------------------------------------------------------------------- FN correct_next_before_lock() RETURNS Void -> - s = State{ value: 0 } @locked; + s = State{ value: 0 } @locked; - producer = BG { - WITH EXCLUSIVE s AS inner { - inner.value = 42; - } - }; + producer = BG { + WITH EXCLUSIVE s AS inner { + inner.value = 42; + } + }; - NEXT producer; # wait for producer BEFORE acquiring the lock + NEXT producer; + # wait for producer BEFORE acquiring the lock - WITH s AS inner { - print("value = " + inner.value.toString()); # 42 - } + WITH s AS inner { + print("value = " + inner.value.toString()); # 42 + } END # ------------------------------------------------------------------------- # CORRECT: Pass already-locked value to helpers (avoid re-entrancy) # ------------------------------------------------------------------------- FN update_value(inner: State) RETURNS Void -> - # Receives the already-unlocked struct value — no lock needed. - inner.value = 2; + # Receives the already-unlocked struct value — no lock needed. + inner.value = 2; END FN correct_no_reentrant() RETURNS Void -> - s = State{ value: 0 } @locked; + s = State{ value: 0 } @locked; - t = BG { - WITH EXCLUSIVE s AS inner { - inner.value = 1; - update_value(inner); # pass the value, not the lock - } - }; + t = BG { + WITH EXCLUSIVE s AS inner { + inner.value = 1; + update_value(inner); # pass the value, not the lock + } + }; - NEXT t; + NEXT t; - WITH s AS inner { - print("final value = " + inner.value.toString()); - } + WITH s AS inner { + print("final value = " + inner.value.toString()); + } END # ------------------------------------------------------------------------- # CORRECT: Sequential locks — never hold two at once # ------------------------------------------------------------------------- FN sequential_transfer() RETURNS Void -> - a = Account{ balance: 100 } @locked; - b = Account{ balance: 200 } @locked; - - t = BG { - amount = 0; - WITH EXCLUSIVE a AS ra { - amount = ra.balance / 2; - ra.balance = ra.balance - amount; - } - # 'a' is unlocked here — no two locks held simultaneously - WITH EXCLUSIVE b AS rb { - rb.balance = rb.balance + amount; - } - }; - - NEXT t; - - WITH a AS ra { print("a = " + ra.balance.toString()); } - WITH b AS rb { print("b = " + rb.balance.toString()); } + a = Account{ balance: 100 } @locked; + b = Account{ balance: 200 } @locked; + + t = BG { + amount = 0; + WITH EXCLUSIVE a AS ra { + amount = ra.balance / 2; + ra.balance = ra.balance - amount; + } + # 'a' is unlocked here — no two locks held simultaneously + WITH EXCLUSIVE b AS rb { + rb.balance = rb.balance + amount; + } + }; + + NEXT t; + + WITH a AS ra {print("a = " + ra.balance.toString());} + WITH b AS rb {print("b = " + rb.balance.toString());} END FN main() RETURNS Void -> - print("--- correct: NEXT before lock ---"); - correct_next_before_lock(); + print("--- correct: NEXT before lock ---"); + correct_next_before_lock(); - print("--- correct: no re-entrancy ---"); - correct_no_reentrant(); + print("--- correct: no re-entrancy ---"); + correct_no_reentrant(); - print("--- correct: sequential locks ---"); - sequential_transfer(); + print("--- correct: sequential locks ---"); + sequential_transfer(); END # Summary: diff --git a/examples/graphdb/graph.cht b/examples/graphdb/graph.cht index 6d78de8b0..5236cee9e 100644 --- a/examples/graphdb/graph.cht +++ b/examples/graphdb/graph.cht @@ -8,272 +8,275 @@ # Node ID is the string key. Edges stored inside each node. STRUCT Edge { - target: String, - weight: Float64, + target: String, + weight: Float64, } STRUCT Node { - label: String, - edges: Edge[]@list, + label: String, + edges: Edge[]@list, } # Add a node with the given ID and label. FN addNode!(MUTABLE nodes: HashMap, id: String, label: String) RETURNS !Void -> - nodes[id] = Node{ label: COPY label, edges: [] }; - RETURN; + nodes[id] = Node{ label: COPY label, edges: [] }; + RETURN; END # Add a directed edge from -> to with a weight. FN addEdge!(MUTABLE nodes: HashMap, from: String, to: String, weight: Float64) RETURNS !Void -> - node = nodes[from] OR Node{ label: "", edges: [] }; - # Build new edges list (workaround: COPY on list-from-struct is broken) - MUTABLE edges: Edge[]@list = []; - FOR i IN (0_i64 ..< node.edges.length()) DO - edges.append(Edge{ target: COPY node.edges[i].target, weight: node.edges[i].weight }); - END - edges.append(Edge{ target: COPY to, weight: weight }); - nodes[from] = Node{ label: COPY node.label, edges: edges }; - RETURN; + node = nodes[from] OR Node{ label: "", edges: [] }; + # Build new edges list (workaround: COPY on list-from-struct is broken) + MUTABLE edges: Edge[]@list = []; + FOR i IN (0 ..< node.edges.length()) DO + edges.append(Edge{ target: COPY node.edges[i].target, weight: node.edges[i].weight }); + END + edges.append(Edge{ target: COPY to, weight: weight }); + nodes[from] = Node{ label: COPY node.label, edges: edges }; + RETURN; END # Get neighbor labels for a given node. FN neighbors!(nodes: HashMap, id: String) RETURNS !String[]@list -> - node = nodes[id] OR Node{ label: "", edges: [] }; - MUTABLE result: String[]@list = []; - FOR i IN (0_i64 ..< node.edges.length()) DO - targetId = node.edges[i].target; - targetNode = nodes[targetId] OR Node{ label: "?", edges: [] }; - result.append(COPY targetNode.label); - END - RETURN result; + node = nodes[id] OR Node{ label: "", edges: [] }; + MUTABLE result: String[]@list = []; + FOR i IN (0 ..< node.edges.length()) DO + targetId = node.edges[i].target; + targetNode = nodes[targetId] OR Node{ label: "?", edges: [] }; + result.append(COPY targetNode.label); + END + RETURN result; END # Get the label of a node by ID. FN getLabel(nodes: HashMap, id: String) RETURNS !String -> - node = nodes[id] OR Node{ label: "", edges: [] }; - RETURN node.label; + node = nodes[id] OR Node{ label: "", edges: [] }; + RETURN node.label; END # Check if a node exists. FN hasNode(nodes: HashMap, id: String) RETURNS !Bool -> - node = nodes[id] OR Node{ label: "", edges: [] }; - RETURN node.label != ""; + node = nodes[id] OR Node{ label: "", edges: [] }; + RETURN node.label != ""; END # BFS shortest path from source to target. Returns list of node IDs. # Returns empty list if no path exists or nodes not found. FN shortestPath!(nodes: HashMap, source: String, target: String) RETURNS !String[]@list -> - IF hasNode(nodes, source) == FALSE THEN - MUTABLE empty: String[]@list = []; - RETURN empty; - END - IF hasNode(nodes, target) == FALSE THEN - MUTABLE empty: String[]@list = []; - RETURN empty; - END - IF source == target THEN - MUTABLE path: String[]@list = []; - path.append(COPY source); - RETURN path; - END - - # BFS using index into queue (avoids shift/reassign issues with @list) - MUTABLE queue: String[]@list = []; - MUTABLE visited: HashMap = {}; - MUTABLE parent: HashMap = {}; - - queue.append(COPY source); - visited[source] = "1"; - - MUTABLE found = FALSE; - MUTABLE head: Int64 = 0; - MUTABLE iters: Int64 = 0; - WHILE iters < 20 && head < queue.length() && found == FALSE DO - iters = iters + 1; - current = COPY queue[head]; - head = head + 1; - - bfsNode = nodes[current] OR Node{ label: "", edges: [] }; - FOR i IN (0_i64 ..< bfsNode.edges.length()) DO - neighbor = COPY bfsNode.edges[i].target; - alreadyVisited = visited[neighbor] OR ""; - IF alreadyVisited == "" THEN - visited[neighbor] = "1"; - parent[neighbor] = COPY current; - IF neighbor == target THEN - found = TRUE; - ELSE - queue.append(COPY neighbor); - END - END - END - END - - IF found == FALSE THEN - MUTABLE noPath: String[]@list = []; - RETURN noPath; - END - - # Reconstruct path from target to source + IF hasNode(nodes, source) == FALSE THEN + MUTABLE empty: String[]@list = []; + RETURN empty; + END + IF hasNode(nodes, target) == FALSE THEN + MUTABLE empty: String[]@list = []; + RETURN empty; + END + IF source == target THEN MUTABLE path: String[]@list = []; - MUTABLE cur = COPY target; - WHILE cur != source DO - nextCur = parent[cur] OR ""; - path.append(COPY cur); - cur = COPY nextCur; - END path.append(COPY source); - - # Reverse path - MUTABLE reversed: String[]@list = []; - FOR ri IN (0_i64 ..< path.length()) DO - reversed.append(COPY path[path.length() - 1 - ri]); + RETURN path; + END + + # BFS using index into queue (avoids shift/reassign issues with @list) + MUTABLE queue: String[]@list = []; + MUTABLE visited: HashMap = {}; + MUTABLE parent: HashMap = {}; + + queue.append(COPY source); + visited[source] = "1"; + + MUTABLE found = FALSE; + MUTABLE head = 0; + MUTABLE iters = 0; + WHILE iters < 20 && head < queue.length() && found == FALSE DO + iters = iters + 1; + current = COPY queue[head]; + head = head + 1; + + bfsNode = nodes[current] OR Node{ label: "", edges: [] }; + FOR i IN (0 ..< bfsNode.edges.length()) DO + neighbor = COPY bfsNode.edges[i].target; + alreadyVisited = visited[neighbor] OR ""; + IF alreadyVisited == "" THEN + visited[neighbor] = "1"; + parent[neighbor] = COPY current; + IF neighbor == target THEN + found = TRUE; + ELSE + queue.append(COPY neighbor); + END + END END - RETURN reversed; + END + + IF found == FALSE THEN + MUTABLE noPath: String[]@list = []; + RETURN noPath; + END + + # Reconstruct path from target to source + MUTABLE path: String[]@list = []; + MUTABLE cur = COPY target; + WHILE cur != source DO + nextCur = parent[cur] OR ""; + path.append(COPY cur); + cur = COPY nextCur; + END + path.append(COPY source); + + # Reverse path + MUTABLE reversed: String[]@list = []; + FOR ri IN (0 ..< path.length()) DO + reversed.append(COPY path[path.length() - 1 - ri]); + END + RETURN reversed; END # DFS traversal from a start node. Returns visited node IDs in DFS order. FN dfs!(nodes: HashMap, start: String) RETURNS !String[]@list -> - MUTABLE stack: String[]@list = []; - MUTABLE visited: HashMap = {}; - MUTABLE result: String[]@list = []; - - stack.append(COPY start); - - MUTABLE iters: Int64 = 0; - WHILE iters < 100 && stack.length() > 0 DO - iters = iters + 1; - # Pop from end of stack - top = COPY stack[stack.length() - 1]; - # Shrink stack by rebuilding without last element - MUTABLE newStack: String[]@list = []; - FOR si IN (0_i64 ..< stack.length() - 1) DO - newStack.append(COPY stack[si]); - END - stack = newStack; - - alreadySeen = visited[top] OR ""; - IF alreadySeen == "" THEN - visited[top] = "1"; - result.append(COPY top); - - # Push neighbors in reverse order for consistent DFS ordering - dfsNode = nodes[top] OR Node{ label: "", edges: [] }; - FOR i IN (0_i64 ..< dfsNode.edges.length()) DO - idx = dfsNode.edges.length() - 1 - i; - neighbor = COPY dfsNode.edges[idx].target; - neighborSeen = visited[neighbor] OR ""; - IF neighborSeen == "" THEN - stack.append(COPY neighbor); - END - END + MUTABLE stack: String[]@list = []; + MUTABLE visited: HashMap = {}; + MUTABLE result: String[]@list = []; + + stack.append(COPY start); + + MUTABLE iters = 0; + WHILE iters < 100 && stack.any?() DO + iters = iters + 1; + # Pop from end of stack + top = COPY stack[stack.length() - 1]; + # Shrink stack by rebuilding without last element + MUTABLE newStack: String[]@list = []; + FOR si IN (0 ..< stack.length() - 1) DO + newStack.append(COPY stack[si]); + END + stack = newStack; + + alreadySeen = visited[top] OR ""; + IF alreadySeen == "" THEN + visited[top] = "1"; + result.append(COPY top); + + # Push neighbors in reverse order for consistent DFS ordering + dfsNode = nodes[top] OR Node{ label: "", edges: [] }; + FOR i IN (0 ..< dfsNode.edges.length()) DO + idx = dfsNode.edges.length() - 1 - i; + neighbor = COPY dfsNode.edges[idx].target; + neighborSeen = visited[neighbor] OR ""; + IF neighborSeen == "" THEN + stack.append(COPY neighbor); END + END END - RETURN result; + END + RETURN result; END # Count nodes reachable from a start node (connected component size). FN componentSize!(nodes: HashMap, start: String) RETURNS !Int64 -> - reachable: String[]@list = dfs!(nodes, start); - RETURN reachable.length(); + reachable: String[]@list = dfs!(nodes, start); + RETURN reachable.length(); END FN main() RETURNS Void -> - MUTABLE nodes: HashMap = {}; - - # Add nodes - addNode!(nodes, "a", "Alice"); - addNode!(nodes, "b", "Bob"); - addNode!(nodes, "c", "Carol"); - - # Add edges: Alice -> Bob, Alice -> Carol, Bob -> Carol - addEdge!(nodes, "a", "b", 1.0); - addEdge!(nodes, "a", "c", 2.0); - addEdge!(nodes, "b", "c", 1.5); - - # Check labels - ASSERT getLabel(nodes, "a") == "Alice", "node a is Alice"; - ASSERT getLabel(nodes, "b") == "Bob", "node b is Bob"; - ASSERT getLabel(nodes, "c") == "Carol", "node c is Carol"; - - # Check neighbors of Alice - aliceNeighbors = neighbors!(nodes, "a"); - ASSERT aliceNeighbors.length() == 2, "Alice has 2 neighbors"; - - # Check neighbors of Bob - bobNeighbors = neighbors!(nodes, "b"); - ASSERT bobNeighbors.length() == 1, "Bob has 1 neighbor"; - - # Check neighbors of Carol (none) - carolNeighbors = neighbors!(nodes, "c"); - ASSERT carolNeighbors.length() == 0, "Carol has 0 neighbors"; - - # Shortest path tests - pathAC: String[]@list = shortestPath!(nodes, "a", "c"); - ASSERT pathAC.length() == 2, "path a->c has 2 nodes"; - ASSERT pathAC[0] == "a", "path starts at a"; - ASSERT pathAC[1] == "c", "path ends at c"; - - pathAB: String[]@list = shortestPath!(nodes, "a", "b"); - ASSERT pathAB.length() == 2, "path a->b has 2 nodes"; - - # Self-path - pathAA: String[]@list = shortestPath!(nodes, "a", "a"); - ASSERT pathAA.length() == 1, "self-path has 1 node"; - ASSERT pathAA[0] == "a", "self-path is source"; - - # No reverse path (c has no outgoing edges) - pathCA: String[]@list = shortestPath!(nodes, "c", "a"); - ASSERT pathCA.length() == 0, "no path c->a"; - - # DFS traversal tests - dfsFromA: String[]@list = dfs!(nodes, "a"); - ASSERT dfsFromA.length() == 3, "DFS from a visits 3 nodes"; - ASSERT dfsFromA[0] == "a", "DFS starts at a"; - - dfsFromC: String[]@list = dfs!(nodes, "c"); - ASSERT dfsFromC.length() == 1, "DFS from c visits 1 node (no outgoing)"; - - # Component size - ASSERT componentSize!(nodes, "a") == 3, "component from a has 3 nodes"; - ASSERT componentSize!(nodes, "c") == 1, "component from c has 1 node"; - - print("graphdb: sequential tests PASSED"); - - # ================================================================ - # Concurrent tests: BG fibers add nodes/edges simultaneously - # ================================================================ - MUTABLE cNodes: HashMap@sharded(4):writeLocked = {}; - - # Concurrent node creation: 3 BG fibers each add a node - p1 = BG { addNode!(cNodes, "x", "Xena"); }; - p2 = BG { addNode!(cNodes, "y", "Yara"); }; - p3 = BG { addNode!(cNodes, "z", "Zane"); }; - NEXT p1; NEXT p2; NEXT p3; - - ASSERT getLabel(cNodes, "x") == "Xena", "concurrent: x is Xena"; - ASSERT getLabel(cNodes, "y") == "Yara", "concurrent: y is Yara"; - ASSERT getLabel(cNodes, "z") == "Zane", "concurrent: z is Zane"; - - # Concurrent edge creation - p4 = BG { addEdge!(cNodes, "x", "y", 1.0); }; - p5 = BG { addEdge!(cNodes, "x", "z", 2.0); }; - p6 = BG { addEdge!(cNodes, "y", "z", 3.0); }; - NEXT p4; NEXT p5; NEXT p6; - - # Verify edges - xNeighbors = neighbors!(cNodes, "x"); - ASSERT xNeighbors.length() == 2, "concurrent: x has 2 neighbors"; - - yNeighbors = neighbors!(cNodes, "y"); - ASSERT yNeighbors.length() == 1, "concurrent: y has 1 neighbor"; - - # Concurrent read while structure is stable - pathXZ: String[]@list = shortestPath!(cNodes, "x", "z"); - ASSERT pathXZ.length() == 2, "concurrent: path x->z has 2 nodes"; - - print("graphdb: concurrent tests PASSED"); - - print("graphdb: concurrent tests PASSED"); + MUTABLE nodes: HashMap = {}; + + # Add nodes + addNode!(nodes, "a", "Alice"); + addNode!(nodes, "b", "Bob"); + addNode!(nodes, "c", "Carol"); + + # Add edges: Alice -> Bob, Alice -> Carol, Bob -> Carol + addEdge!(nodes, "a", "b", 1.0); + addEdge!(nodes, "a", "c", 2.0); + addEdge!(nodes, "b", "c", 1.5); + + # Check labels + ASSERT getLabel(nodes, "a") == "Alice", "node a is Alice"; + ASSERT getLabel(nodes, "b") == "Bob", "node b is Bob"; + ASSERT getLabel(nodes, "c") == "Carol", "node c is Carol"; + + # Check neighbors of Alice + aliceNeighbors = neighbors!(nodes, "a"); + ASSERT aliceNeighbors.length() == 2, "Alice has 2 neighbors"; + + # Check neighbors of Bob + bobNeighbors = neighbors!(nodes, "b"); + ASSERT bobNeighbors.length() == 1, "Bob has 1 neighbor"; + + # Check neighbors of Carol (none) + carolNeighbors = neighbors!(nodes, "c"); + ASSERT carolNeighbors.empty?(), "Carol has 0 neighbors"; + + # Shortest path tests + pathAC: String[]@list = shortestPath!(nodes, "a", "c"); + ASSERT pathAC.length() == 2, "path a->c has 2 nodes"; + ASSERT pathAC[0] == "a", "path starts at a"; + ASSERT pathAC[1] == "c", "path ends at c"; + + pathAB: String[]@list = shortestPath!(nodes, "a", "b"); + ASSERT pathAB.length() == 2, "path a->b has 2 nodes"; + + # Self-path + pathAA: String[]@list = shortestPath!(nodes, "a", "a"); + ASSERT pathAA.length() == 1, "self-path has 1 node"; + ASSERT pathAA[0] == "a", "self-path is source"; + + # No reverse path (c has no outgoing edges) + pathCA: String[]@list = shortestPath!(nodes, "c", "a"); + ASSERT pathCA.empty?(), "no path c->a"; + + # DFS traversal tests + dfsFromA: String[]@list = dfs!(nodes, "a"); + ASSERT dfsFromA.length() == 3, "DFS from a visits 3 nodes"; + ASSERT dfsFromA[0] == "a", "DFS starts at a"; + + dfsFromC: String[]@list = dfs!(nodes, "c"); + ASSERT dfsFromC.length() == 1, "DFS from c visits 1 node (no outgoing)"; + + # Component size + ASSERT componentSize!(nodes, "a") == 3, "component from a has 3 nodes"; + ASSERT componentSize!(nodes, "c") == 1, "component from c has 1 node"; + + print("graphdb: sequential tests PASSED"); + + # ================================================================ + # Concurrent tests: BG fibers add nodes/edges simultaneously + # ================================================================ + MUTABLE cNodes: HashMap @sharded(4):writeLocked = {}; + + # Concurrent node creation: 3 BG fibers each add a node + p1 = BG {addNode!(cNodes, "x", "Xena");}; + p2 = BG {addNode!(cNodes, "y", "Yara");}; + p3 = BG {addNode!(cNodes, "z", "Zane");}; + NEXT p1; + NEXT p2; + NEXT p3; + + ASSERT getLabel(cNodes, "x") == "Xena", "concurrent: x is Xena"; + ASSERT getLabel(cNodes, "y") == "Yara", "concurrent: y is Yara"; + ASSERT getLabel(cNodes, "z") == "Zane", "concurrent: z is Zane"; + + # Concurrent edge creation + p4 = BG {addEdge!(cNodes, "x", "y", 1.0);}; + p5 = BG {addEdge!(cNodes, "x", "z", 2.0);}; + p6 = BG {addEdge!(cNodes, "y", "z", 3.0);}; + NEXT p4; + NEXT p5; + NEXT p6; + + # Verify edges + xNeighbors = neighbors!(cNodes, "x"); + ASSERT xNeighbors.length() == 2, "concurrent: x has 2 neighbors"; + + yNeighbors = neighbors!(cNodes, "y"); + ASSERT yNeighbors.length() == 1, "concurrent: y has 1 neighbor"; + + # Concurrent read while structure is stable + pathXZ: String[]@list = shortestPath!(cNodes, "x", "z"); + ASSERT pathXZ.length() == 2, "concurrent: path x->z has 2 nodes"; + + print("graphdb: concurrent tests PASSED"); + + print("graphdb: concurrent tests PASSED"); END - diff --git a/examples/json_parser/json.cht b/examples/json_parser/json.cht index 26350ef24..6b96d2d9f 100644 --- a/examples/json_parser/json.cht +++ b/examples/json_parser/json.cht @@ -11,38 +11,49 @@ # cd zig && zig build-exe interp.zig -lc switch.S onRoot.S && ./interp FN isDigit?(c: String) RETURNS Bool -> - IF c == "0" || c == "1" || c == "2" || c == "3" || c == "4" || c == "5" || c == "6" || c == "7" || c == "8" || c == "9" -> RETURN TRUE; - RETURN FALSE; + IF c == "0" || c == "1" || c == "2" || c == "3" || c == "4" || c == "5" || c == "6" || c == "7" || c == "8" || c == "9" -> RETURN TRUE; + RETURN FALSE; END UNION JsonValue { - Null, JBool: Bool, JNum: Float64, JStr: String, - JArray: JsonValue[], - JObj: HashMap + Null, + JBool: Bool, + JNum: Float64, + JStr: String, + JArray: JsonValue[], + JObj: HashMap } # ========================================================================= # Printer (for test output) # ========================================================================= -FN jsonToString(v: JsonValue) RETURNS !String EFFECTS REENTRANT -> - PARTIAL MATCH v START - JsonValue.Null -> RETURN "null";, - JsonValue.JBool AS b -> IF b THEN RETURN "true"; END RETURN "false";, - JsonValue.JNum AS n -> RETURN toInt(n).toString();, - JsonValue.JStr AS s -> RETURN "\"" + s + "\"";, - JsonValue.JArray AS items -> - MUTABLE out = "["; - FOR i IN (0_i64 ..< items.length()) DO - IF i > 0 THEN out = out + ","; END - out = out + jsonToString(items[i]); - END - RETURN out + "]";, - JsonValue.JObj -> - RETURN "{object}";, - DEFAULT -> RETURN "?"; - END - RETURN "?"; +FN jsonToString(v: JsonValue) + RETURNS !String + EFFECTS REENTRANT +-> + PARTIAL MATCH v START + JsonValue.Null -> RETURN "null";, + JsonValue.JBool AS b -> + IF b THEN + RETURN "true"; + END + RETURN "false";, + JsonValue.JNum AS n -> RETURN n.toInt().toString();, + JsonValue.JStr AS s -> RETURN "\"" + s + "\"";, + JsonValue.JArray AS items -> + MUTABLE out = "["; + FOR i IN (0 ..< items.length()) DO + IF i > 0 THEN + out = out + ","; + END + out = out + jsonToString(items[i]); + END + RETURN out + "]";, + JsonValue.JObj -> RETURN "{object}";, + DEFAULT -> RETURN "?"; + END + RETURN "?"; END # ========================================================================= @@ -50,166 +61,187 @@ END # ========================================================================= FN skipWhitespace(json: String, pos: Int64) RETURNS !Int64 -> - MUTABLE p: Int64 = pos; - WHILE p < json.length() DO - c = charAt(json, p); - IF c == " " || c == "\n" || c == "\r" || c == "\t" THEN - p += 1; - ELSE - RETURN p; - END + MUTABLE p = pos; + WHILE p < json.length() DO + c = json.charAt(p); + IF c == " " || c == "\n" || c == "\r" || c == "\t" THEN + p += 1; + ELSE + RETURN p; END - RETURN p; + END + RETURN p; END FN parseString!(json: String, pos: Int64, MUTABLE penv: HashMap) RETURNS !String -> - # pos points at the opening " - MUTABLE p: Int64 = pos + 1; - MUTABLE result = ""; - WHILE p < json.length() DO - c = charAt(json, p); - IF c == "\"" THEN - penv["__jp"] = p + 1; - RETURN result; - ELSE_IF c == "\\" THEN - p += 1; - esc = charAt(json, p); - IF esc == "\"" THEN result = result + "\""; - ELSE_IF esc == "\\" THEN result = result + "\\"; - ELSE_IF esc == "n" THEN result = result + "\n"; - ELSE_IF esc == "r" THEN result = result + "\r"; - ELSE_IF esc == "t" THEN result = result + "\t"; - ELSE_IF esc == "/" THEN result = result + "/"; - ELSE result = result + esc; - END - p += 1; - ELSE - result = result + c; - p += 1; - END + # pos points at the opening " + MUTABLE p = pos + 1; + MUTABLE result = ""; + WHILE p < json.length() DO + c = json.charAt(p); + IF c == "\"" THEN + penv["__jp"] = p + 1; + RETURN result; + ELSE_IF c == "\\" THEN + p += 1; + esc = json.charAt(p); + IF esc == "\"" THEN + result = result + "\""; + ELSE_IF esc == "\\" THEN + result = result + "\\"; + ELSE_IF esc == "n" THEN + result = result + "\n"; + ELSE_IF esc == "r" THEN + result = result + "\r"; + ELSE_IF esc == "t" THEN + result = result + "\t"; + ELSE_IF esc == "/" THEN + result = result + "/"; + ELSE + result = result + esc; + END + p += 1; + ELSE + result = result + c; + p += 1; END - penv["__jp"] = p; - RETURN result; + END + penv["__jp"] = p; + RETURN result; END -FN parseValue!(json: String, pos: Int64, MUTABLE penv: HashMap, depth: Int64) RETURNS !JsonValue EFFECTS REENTRANT -> - MUTABLE p = skipWhitespace(json, pos); - - IF p >= json.length() THEN - penv["__jp"] = p; - RETURN JsonValue.Null; - END - - c = charAt(json, p); - - # String - IF c == "\"" THEN - s = parseString!(json, p, penv); - RETURN JsonValue{ JStr: COPY s }; +FN parseValue!( + json: String, + pos: Int64, + MUTABLE penv: HashMap, + depth: Int64 +) + RETURNS !JsonValue + EFFECTS REENTRANT +-> + MUTABLE p = skipWhitespace(json, pos); + + IF p >= json.length() THEN + penv["__jp"] = p; + RETURN JsonValue.Null; + END + + c = json.charAt(p); + + # String + IF c == "\"" THEN + s = parseString!(json, p, penv); + RETURN JsonValue{ JStr: COPY s }; + END + + # Number (simple: digits, optional minus, optional dot) + IF c == "-" || isDigit?(c) THEN + MUTABLE numStr = ""; + MUTABLE inNum = TRUE; + WHILE p < json.length() && inNum DO + nc = json.charAt(p); + IF nc == "-" || nc == "." || nc == "+" || nc == "e" || nc == "E" || isDigit?(nc) THEN + numStr = numStr + nc; + p += 1; + ELSE + inNum = FALSE; + END END - - # Number (simple: digits, optional minus, optional dot) - IF c == "-" || isDigit?(c) THEN - MUTABLE numStr = ""; - MUTABLE inNum: Bool = TRUE; - WHILE p < json.length() && inNum DO - nc = charAt(json, p); - IF nc == "-" || nc == "." || nc == "+" || nc == "e" || nc == "E" || isDigit?(nc) THEN - numStr = numStr + nc; - p += 1; - ELSE - inNum = FALSE; - END - END - penv["__jp"] = p; - n = toNumber(numStr) OR 0.0; - RETURN JsonValue{ JNum: n }; + penv["__jp"] = p; + n = numStr.toNumber() OR 0.0; + RETURN JsonValue{ JNum: n }; + END + + # true + IF c == "t" THEN + penv["__jp"] = p + 4; + RETURN JsonValue{ JBool: TRUE }; + END + + # false + IF c == "f" THEN + penv["__jp"] = p + 5; + RETURN JsonValue{ JBool: FALSE }; + END + + # null + IF c == "n" THEN + penv["__jp"] = p + 4; + RETURN JsonValue.Null; + END + + # Array + IF c == "[" THEN + p += 1; + MUTABLE items: JsonValue[]@list = List[]; + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "]" THEN + penv["__jp"] = p + 1; + RETURN JsonValue{ JArray: items }; END - - # true - IF c == "t" THEN - penv["__jp"] = p + 4; - RETURN JsonValue{ JBool: TRUE }; + MUTABLE parsing = TRUE; + WHILE parsing && p < json.length() DO + item = parseValue!(json, p, penv, depth + 1); + items.append(item); + p = penv["__jp"] OR 0; + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "," THEN + p += 1; + ELSE + parsing = FALSE; + END END - - # false - IF c == "f" THEN - penv["__jp"] = p + 5; - RETURN JsonValue{ JBool: FALSE }; + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "]" THEN + p += 1; END - - # null - IF c == "n" THEN - penv["__jp"] = p + 4; - RETURN JsonValue.Null; + penv["__jp"] = p; + RETURN JsonValue{ JArray: items }; + END + + # Object + IF c == "{" THEN + p += 1; + MUTABLE obj: HashMap = {}; + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "}" THEN + penv["__jp"] = p + 1; + RETURN JsonValue{ JObj: obj }; END - - # Array - IF c == "[" THEN + MUTABLE parsing = TRUE; + WHILE parsing && p < json.length() DO + # Parse key + p = skipWhitespace(json, p); + key = parseString!(json, p, penv); + p = penv["__jp"] OR 0; + # Skip : + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == ":" THEN p += 1; - MUTABLE items: JsonValue[]@list = List[]; - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "]" THEN - penv["__jp"] = p + 1; - RETURN JsonValue{ JArray: items }; - END - MUTABLE parsing: Bool = TRUE; - WHILE parsing && p < json.length() DO - item = parseValue!(json, p, penv, depth + 1); - items.append(item); - p = penv["__jp"] OR 0; - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "," THEN - p += 1; - ELSE - parsing = FALSE; - END - END - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "]" THEN p += 1; END - penv["__jp"] = p; - RETURN JsonValue{ JArray: items }; - END - - # Object - IF c == "{" THEN + END + # Parse value + val = parseValue!(json, p, penv, depth + 1); + p = penv["__jp"] OR 0; + obj[key] = val; + # Skip comma or end + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "," THEN p += 1; - MUTABLE obj: HashMap = {}; - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "}" THEN - penv["__jp"] = p + 1; - RETURN JsonValue{ JObj: obj }; - END - MUTABLE parsing: Bool = TRUE; - WHILE parsing && p < json.length() DO - # Parse key - p = skipWhitespace(json, p); - key = parseString!(json, p, penv); - p = penv["__jp"] OR 0; - # Skip : - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == ":" THEN p += 1; END - # Parse value - val = parseValue!(json, p, penv, depth + 1); - p = penv["__jp"] OR 0; - obj[key] = val; - # Skip comma or end - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "," THEN - p += 1; - ELSE - parsing = FALSE; - END - END - p = skipWhitespace(json, p); - IF p < json.length() && charAt(json, p) == "}" THEN p += 1; END - penv["__jp"] = p; - RETURN JsonValue{ JObj: obj }; + ELSE + parsing = FALSE; + END + END + p = skipWhitespace(json, p); + IF p < json.length() && json.charAt(p) == "}" THEN + p += 1; END + penv["__jp"] = p; + RETURN JsonValue{ JObj: obj }; + END - # Unknown - penv["__jp"] = p + 1; - RETURN JsonValue.Null; + # Unknown + penv["__jp"] = p + 1; + RETURN JsonValue.Null; END # ========================================================================= @@ -217,22 +249,24 @@ END # ========================================================================= FN jsonGet!(MUTABLE v: JsonValue, key: String) RETURNS !JsonValue -> - PARTIAL MATCH v START - JsonValue.JObj AS map -> - raw = map[key] OR JsonValue.Null; - # COPY string values so caller owns an independent copy. - # Without COPY, both the map and the returned value share the same - # heap string pointer, causing a double-free when both are cleaned up. - PARTIAL MATCH raw START - JsonValue.JStr AS s -> RETURN JsonValue{ JStr: COPY s };, - JsonValue.JNum AS n -> RETURN JsonValue{ JNum: n };, - JsonValue.JBool AS b -> RETURN JsonValue{ JBool: b };, - JsonValue.Null -> RETURN JsonValue.Null;, - DEFAULT -> RETURN JsonValue.Null; - END, - DEFAULT -> RETURN JsonValue.Null; - END - RETURN JsonValue.Null; + PARTIAL MATCH v START + JsonValue.JObj AS map -> + raw = map[key] OR JsonValue.Null; + # COPY string values so caller owns an independent copy. + # Without COPY, both the map and the returned value share the same + # heap string pointer, causing a double-free when both are cleaned up. + PARTIAL MATCH raw START + JsonValue.JStr AS s -> RETURN JsonValue{ JStr: COPY s };, + JsonValue.JNum AS n -> RETURN JsonValue{ JNum: n };, + JsonValue.JBool AS b -> RETURN JsonValue{ JBool: b };, + JsonValue.Null -> RETURN JsonValue.Null;, + + DEFAULT -> RETURN JsonValue.Null; + END + , + DEFAULT -> RETURN JsonValue.Null; + END + RETURN JsonValue.Null; END # ========================================================================= @@ -240,75 +274,258 @@ END # ========================================================================= FN main() RETURNS Void -> - MUTABLE penv: HashMap = {}; - MUTABLE passed: Int64 = 0; - MUTABLE failed: Int64 = 0; - - # Test 1: null - r1 = parseValue!("null", 0, penv, 0); - PARTIAL MATCH r1 START JsonValue.Null -> passed += 1; print("PASS: null");, DEFAULT -> failed += 1; print("FAIL: null"); END - - # Test 2: true - r2 = parseValue!("true", 0, penv, 0); - PARTIAL MATCH r2 START JsonValue.JBool AS b -> IF b THEN passed += 1; print("PASS: true"); ELSE failed += 1; print("FAIL: true"); END, DEFAULT -> failed += 1; print("FAIL: true"); END - - # Test 3: false - r3 = parseValue!("false", 0, penv, 0); - PARTIAL MATCH r3 START JsonValue.JBool AS b -> IF b == FALSE THEN passed += 1; print("PASS: false"); ELSE failed += 1; print("FAIL: false"); END, DEFAULT -> failed += 1; print("FAIL: false"); END - - # Test 4: number - r4 = parseValue!("42", 0, penv, 0); - PARTIAL MATCH r4 START JsonValue.JNum AS n -> IF n == 42.0 THEN passed += 1; print("PASS: number 42"); ELSE failed += 1; print("FAIL: number 42"); END, DEFAULT -> failed += 1; print("FAIL: number 42"); END - - # Test 5: negative number - r5 = parseValue!("-7", 0, penv, 0); - PARTIAL MATCH r5 START JsonValue.JNum AS n -> IF n == 0.0 - 7.0 THEN passed += 1; print("PASS: number -7"); ELSE failed += 1; print("FAIL: number -7"); END, DEFAULT -> failed += 1; print("FAIL: number -7"); END - - # Test 6: string - r6 = parseValue!("\"hello\"", 0, penv, 0); - PARTIAL MATCH r6 START JsonValue.JStr AS s -> IF s == "hello" THEN passed += 1; print("PASS: string hello"); ELSE failed += 1; print("FAIL: string hello got " + s); END, DEFAULT -> failed += 1; print("FAIL: string hello"); END - - # Test 7: string with escapes - r7 = parseValue!("\"line1\\nline2\"", 0, penv, 0); - PARTIAL MATCH r7 START JsonValue.JStr AS s -> IF s.length() == 11 THEN passed += 1; print("PASS: string escapes"); ELSE failed += 1; print("FAIL: string escapes len=" + s.length().toString()); END, DEFAULT -> failed += 1; print("FAIL: string escapes"); END - - # Test 8: empty array - r8 = parseValue!("[]", 0, penv, 0); - PARTIAL MATCH r8 START JsonValue.JArray AS items -> IF items.length() == 0 THEN passed += 1; print("PASS: empty array"); ELSE failed += 1; print("FAIL: empty array"); END, DEFAULT -> failed += 1; print("FAIL: empty array"); END - - # Test 9: array of numbers - r9 = parseValue!("[1, 2, 3]", 0, penv, 0); - PARTIAL MATCH r9 START JsonValue.JArray AS items -> IF items.length() == 3 THEN passed += 1; print("PASS: array [1,2,3]"); ELSE failed += 1; print("FAIL: array [1,2,3] len=" + items.length().toString()); END, DEFAULT -> failed += 1; print("FAIL: array [1,2,3]"); END - - # Test 10: nested array - r10 = parseValue!("[[1], [2, 3]]", 0, penv, 0); - PARTIAL MATCH r10 START JsonValue.JArray AS items -> IF items.length() == 2 THEN passed += 1; print("PASS: nested array"); ELSE failed += 1; print("FAIL: nested array"); END, DEFAULT -> failed += 1; print("FAIL: nested array"); END - - # Test 11: empty object - r11 = parseValue!("{}", 0, penv, 0); - PARTIAL MATCH r11 START JsonValue.JObj -> passed += 1; print("PASS: empty object");, DEFAULT -> failed += 1; print("FAIL: empty object"); END - - # Test 12: simple object - MUTABLE r12 = parseValue!("{\"name\": \"CLEAR\", \"version\": 1}", 0, penv, 0); - nameVal = jsonGet!(r12, "name"); - PARTIAL MATCH nameVal START JsonValue.JStr AS s -> IF s == "CLEAR" THEN passed += 1; print("PASS: object.name"); ELSE failed += 1; print("FAIL: object.name got " + s); END, DEFAULT -> failed += 1; print("FAIL: object.name"); END - - # Test 13: bool in object - MUTABLE r13 = parseValue!("{\"ok\": true}", 0, penv, 0); - boolVal = jsonGet!(r13, "ok"); - PARTIAL MATCH boolVal START JsonValue.JBool AS b -> IF b THEN passed += 1; print("PASS: bool in object"); ELSE failed += 1; print("FAIL: bool in object"); END, DEFAULT -> failed += 1; print("FAIL: bool in object"); END - - # Test 14: mixed types in array - r14 = parseValue!("[1, \"two\", true, null]", 0, penv, 0); - PARTIAL MATCH r14 START JsonValue.JArray AS items -> IF items.length() == 4 THEN passed += 1; print("PASS: mixed array"); ELSE failed += 1; print("FAIL: mixed array"); END, DEFAULT -> failed += 1; print("FAIL: mixed array"); END - - # Test 15: whitespace handling - MUTABLE r15 = parseValue!(" { \"key\" : \"value\" } ", 0, penv, 0); - wsVal = jsonGet!(r15, "key"); - PARTIAL MATCH wsVal START JsonValue.JStr AS s -> IF s == "value" THEN passed += 1; print("PASS: whitespace"); ELSE failed += 1; print("FAIL: whitespace"); END, DEFAULT -> failed += 1; print("FAIL: whitespace"); END - - # Summary - print("---"); - print("JSON Parser: ${passed.toString()} passed, ${failed.toString()} failed"); - ASSERT failed == 0; + MUTABLE penv: HashMap = {}; + MUTABLE passed = 0; + MUTABLE failed = 0; + + # Test 1: null + r1 = parseValue!("null", 0, penv, 0); + PARTIAL MATCH r1 START + JsonValue.Null -> + passed += 1; + print("PASS: null");, + DEFAULT -> + failed += 1; + print("FAIL: null"); + END + + # Test 2: true + r2 = parseValue!("true", 0, penv, 0); + PARTIAL MATCH r2 START + JsonValue.JBool AS b -> + IF b THEN + passed += 1; + print("PASS: true"); + ELSE + failed += 1; + print("FAIL: true"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: true"); + END + + # Test 3: false + r3 = parseValue!("false", 0, penv, 0); + PARTIAL MATCH r3 START + JsonValue.JBool AS b -> + IF b == FALSE THEN + passed += 1; + print("PASS: false"); + ELSE + failed += 1; + print("FAIL: false"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: false"); + END + + # Test 4: number + r4 = parseValue!("42", 0, penv, 0); + PARTIAL MATCH r4 START + JsonValue.JNum AS n -> + IF n == 42.0 THEN + passed += 1; + print("PASS: number 42"); + ELSE + failed += 1; + print("FAIL: number 42"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: number 42"); + END + + # Test 5: negative number + r5 = parseValue!("-7", 0, penv, 0); + PARTIAL MATCH r5 START + JsonValue.JNum AS n -> + IF n == 0.0 - 7.0 THEN + passed += 1; + print("PASS: number -7"); + ELSE + failed += 1; + print("FAIL: number -7"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: number -7"); + END + + # Test 6: string + r6 = parseValue!("\"hello\"", 0, penv, 0); + PARTIAL MATCH r6 START + JsonValue.JStr AS s -> + IF s == "hello" THEN + passed += 1; + print("PASS: string hello"); + ELSE + failed += 1; + print("FAIL: string hello got " + s); + END + , + DEFAULT -> + failed += 1; + print("FAIL: string hello"); + END + + # Test 7: string with escapes + r7 = parseValue!("\"line1\\nline2\"", 0, penv, 0); + PARTIAL MATCH r7 START + JsonValue.JStr AS s -> + IF s.length() == 11 THEN + passed += 1; + print("PASS: string escapes"); + ELSE + failed += 1; + print("FAIL: string escapes len=" + s.length().toString()); + END + , + DEFAULT -> + failed += 1; + print("FAIL: string escapes"); + END + + # Test 8: empty array + r8 = parseValue!("[]", 0, penv, 0); + PARTIAL MATCH r8 START + JsonValue.JArray AS items -> + IF items.empty?() THEN + passed += 1; + print("PASS: empty array"); + ELSE + failed += 1; + print("FAIL: empty array"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: empty array"); + END + + # Test 9: array of numbers + r9 = parseValue!("[1, 2, 3]", 0, penv, 0); + PARTIAL MATCH r9 START + JsonValue.JArray AS items -> + IF items.length() == 3 THEN + passed += 1; + print("PASS: array [1,2,3]"); + ELSE + failed += 1; + print("FAIL: array [1,2,3] len=" + items.length().toString()); + END + , + DEFAULT -> + failed += 1; + print("FAIL: array [1,2,3]"); + END + + # Test 10: nested array + r10 = parseValue!("[[1], [2, 3]]", 0, penv, 0); + PARTIAL MATCH r10 START + JsonValue.JArray AS items -> + IF items.length() == 2 THEN + passed += 1; + print("PASS: nested array"); + ELSE + failed += 1; + print("FAIL: nested array"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: nested array"); + END + + # Test 11: empty object + r11 = parseValue!("{}", 0, penv, 0); + PARTIAL MATCH r11 START + JsonValue.JObj -> + passed += 1; + print("PASS: empty object");, + DEFAULT -> + failed += 1; + print("FAIL: empty object"); + END + + # Test 12: simple object + MUTABLE r12 = parseValue!("{\"name\": \"CLEAR\", \"version\": 1}", 0, penv, 0); + nameVal = jsonGet!(r12, "name"); + PARTIAL MATCH nameVal START + JsonValue.JStr AS s -> + IF s == "CLEAR" THEN + passed += 1; + print("PASS: object.name"); + ELSE + failed += 1; + print("FAIL: object.name got " + s); + END + , + DEFAULT -> + failed += 1; + print("FAIL: object.name"); + END + + # Test 13: bool in object + MUTABLE r13 = parseValue!("{\"ok\": true}", 0, penv, 0); + boolVal = jsonGet!(r13, "ok"); + PARTIAL MATCH boolVal START + JsonValue.JBool AS b -> + IF b THEN + passed += 1; + print("PASS: bool in object"); + ELSE + failed += 1; + print("FAIL: bool in object"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: bool in object"); + END + + # Test 14: mixed types in array + r14 = parseValue!("[1, \"two\", true, null]", 0, penv, 0); + PARTIAL MATCH r14 START + JsonValue.JArray AS items -> + IF items.length() == 4 THEN + passed += 1; + print("PASS: mixed array"); + ELSE + failed += 1; + print("FAIL: mixed array"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: mixed array"); + END + + # Test 15: whitespace handling + MUTABLE r15 = parseValue!(" { \"key\" : \"value\" } ", 0, penv, 0); + wsVal = jsonGet!(r15, "key"); + PARTIAL MATCH wsVal START + JsonValue.JStr AS s -> + IF s == "value" THEN + passed += 1; + print("PASS: whitespace"); + ELSE + failed += 1; + print("FAIL: whitespace"); + END + , + DEFAULT -> + failed += 1; + print("FAIL: whitespace"); + END + + # Summary + print("---"); + print("JSON Parser: ${passed.toString()} passed, ${failed.toString()} failed"); + ASSERT failed == 0; END diff --git a/examples/litedb/litedb.cht b/examples/litedb/litedb.cht index 6443afc8e..4ea13dbfa 100644 --- a/examples/litedb/litedb.cht +++ b/examples/litedb/litedb.cht @@ -1,205 +1,235 @@ # LiteDB: Concurrent B-Tree Key-Value Store # Compiler stress test: @shared:locked, lambdas, REDUCE, DO blocks, ENUM. -ENUM Op { Get, Put, Delete, Scan } +ENUM Op { + Get, + Put, + Delete, + Scan +} STRUCT Node { - keys: Int64[]@list, - vals: String[]@list, - numChildren: Int64, - isLeaf: Bool + keys: Int64[]@list, + vals: String[]@list, + numChildren: Int64, + isLeaf: Bool +} + +STRUCT TreeStats { + insertCount: Int64, + searchCount: Int64 +} +STRUCT ResultSet { + keys: Int64[]@list, + count: Int64 } -STRUCT TreeStats { insertCount: Int64, searchCount: Int64 } -STRUCT ResultSet { keys: Int64[]@list, count: Int64 } - -FN makeResultSet(count: Int64) RETURNS !ResultSet @multiowned -> - MUTABLE ks: Int64[]@list = []; - MUTABLE i: Int64 = 0; - WHILE i < count DO - ks.append((i + 1) * 10); - i += 1; - END - RETURN ResultSet{ keys: ks, count: count } @multiowned; +FN makeResultSet(count: Int64) RETURNS !ResultSet@multiowned -> + MUTABLE ks: Int64[]@list = []; + MUTABLE i = 0; + WHILE i < count DO + ks.append((i + 1) * 10); + i += 1; + END + RETURN ResultSet{ keys: ks, count: count } @multiowned; END FN childKey(parentIdx: Int64, pos: Int64) RETURNS !String -> - RETURN "${parentIdx.toString()}:${pos.toString()}"; + RETURN "${parentIdx.toString()}:${pos.toString()}"; END FN findKeyIndex(keys: Int64[]@list, key: Int64) RETURNS Int64 -> - MUTABLE i: Int64 = 0; - WHILE i < keys.length() DO - IF keys[i] >= key -> RETURN i; - i += 1; - END - RETURN i; + MUTABLE i = 0; + WHILE i < keys.length() DO + IF keys[i] >= key -> RETURN i; + i += 1; + END + RETURN i; END -FN scanWithFilter(keys: Int64[]@list, filter: FN(Int64) -> Bool) RETURNS !Int64 -> - MUTABLE count: Int64 = 0; - MUTABLE i: Int64 = 0; - WHILE i < keys.length() DO - IF filter(keys[i]) -> count += 1; - i += 1; - END - RETURN count; +FN scanWithFilter(keys: Int64[]@list, filter: FN (Int64) -> Bool) RETURNS !Int64 -> + MUTABLE count = 0; + MUTABLE i = 0; + WHILE i < keys.length() DO + IF filter(keys[i]) -> count += 1; + i += 1; + END + RETURN count; END FN main() RETURNS Void -> - MUTABLE nodes: Node[]@list = []; - MUTABLE children: HashMap = {}; - - # Build B-tree: root(30) -> left(10,20), right(40) - MUTABLE lk0: Int64[]@list = []; - MUTABLE lv0: String[]@list = []; - lk0.append(10); - lk0.append(20); - lv0.append("10"); - lv0.append("20"); - nodes.append(Node{ keys: lk0, vals: lv0, numChildren: 0, isLeaf: TRUE }); - MUTABLE rk0: Int64[]@list = []; - MUTABLE rv0: String[]@list = []; - rk0.append(40); - rv0.append("40"); - nodes.append(Node{ keys: rk0, vals: rv0, numChildren: 0, isLeaf: TRUE }); - MUTABLE rootK: Int64[]@list = []; - MUTABLE rootV: String[]@list = []; - rootK.append(30); - rootV.append("30"); - nodes.append(Node{ keys: rootK, vals: rootV, numChildren: 2, isLeaf: FALSE }); - children[childKey(2, 0)] = 0; - children[childKey(2, 1)] = 1; - MUTABLE root: Int64 = 2; - - # ENUM dispatch - MUTABLE opName = ""; - PARTIAL MATCH Op.Scan START - Op.Get -> opName = "get";, - Op.Put -> opName = "put";, - Op.Delete -> opName = "delete";, - Op.Scan -> opName = "scan"; - END - ASSERT opName == "scan", "ENUM dispatch"; - - # Tree structure - ASSERT nodes[root].isLeaf == FALSE, "root is internal"; - ASSERT nodes[root].keys[0] == 30, "root key 30"; - ASSERT nodes[0].keys[1] == 20, "left key 1 = 20"; - ASSERT nodes[1].keys[0] == 40, "right key 0 = 40"; - - # Tree search: descend from root to find key 20 - cp = findKeyIndex(nodes[root].keys, 20); - leafIdx = children[childKey(root, cp)] OR (0 - 1); - ASSERT nodes[leafIdx].keys[1] == 20, "tree search: key 20"; - - # Collect all keys for pipeline queries - MUTABLE allKeys: Int64[]@list = []; - allKeys.append(10); - allKeys.append(20); - allKeys.append(40); - - # Lambda filter - evenCount = scanWithFilter(allKeys, %(k: Int64) -> k MOD 2 == 0); - ASSERT evenCount == 3, "lambda: 3 even (10,20,40)"; - - # REDUCE pipeline - keySum = allKeys |> REDUCE(0_i64) acc + _; - ASSERT keySum == 70, "REDUCE sum = 70"; - - # WHERE + REDUCE - evenSum = allKeys |> WHERE _ MOD 2 == 0 |> REDUCE(0_i64) acc + _; - ASSERT evenSum == 70, "WHERE even REDUCE = 70 (all even)"; - - # WHERE + SELECT + REDUCE: keys > 15, doubled - bigDoubled = allKeys |> WHERE _ > 15 |> SELECT _ * 2 |> REDUCE(0_i64) acc + _; - ASSERT bigDoubled == 120, "WHERE>15 + SELECT*2 + REDUCE = 120 (20*2+40*2)"; - - # @shared:locked concurrent access - stats = TreeStats{ insertCount: 0, searchCount: 0 } @shared:locked; - - # BG fibers with WITH EXCLUSIVE - MUTABLE futures: ~Void[]@list = []; - MUTABLE fi: Int64 = 0; - WHILE fi < 4 DO - futures.append(BG { - WITH EXCLUSIVE stats AS s { s.insertCount = s.insertCount + 10; } - }); - fi += 1; - END - fi = 0; - WHILE fi < 4 DO - NEXT futures[fi]; - fi += 1; - END - WITH stats AS s { - ASSERT s.insertCount == 40, "4 BG fibers x 10 = 40"; - } - - # DO block: 4 concurrent branches - DO { - WITH EXCLUSIVE stats AS s { s.searchCount = s.searchCount + 5; }, - WITH EXCLUSIVE stats AS s { s.searchCount = s.searchCount + 5; }, - WITH EXCLUSIVE stats AS s { s.searchCount = s.searchCount + 5; }, - WITH EXCLUSIVE stats AS s { s.searchCount = s.searchCount + 5; } - } - WITH stats AS s { - ASSERT s.searchCount == 20, "DO: 4 x 5 = 20"; - } - - # ================================================================ - # Commit 5: @multiowned results + nested BG - # ================================================================ - - # @multiowned: Rc-wrapped struct with list fields - rs = makeResultSet(3); - ASSERT rs.count == 3, "multiowned: 3 items"; - # Rc clone: both rs and rs2 share the same ResultSet - rs2 = rs; - ASSERT rs2.count == 3, "multiowned clone shares data"; - - # Nested BG: outer BG spawns inner BG - outerP: ~Int64 = BG { - innerP: ~Int64 = BG { 42; }; - innerResult = NEXT innerP; - innerResult + 1; - }; - outerResult = NEXT outerP; - ASSERT outerResult == 43, "nested BG: 42 + 1 = 43"; - - # BG returning String (previously misclassified as BoundedStream) - strP: ~String = BG { "litedb"; }; - strResult = NEXT strP; - ASSERT strResult == "litedb", "BG returns string"; - - # ================================================================ - # Commit 6: Verification summary - # ================================================================ - # All features exercised: - # ENUM Op with PARTIAL MATCH dispatch - # STRUCT Node with Int64[]@list + String[]@list fields - # Node[]@list (list of structs with nested lists) - # Mutation through list[idx].field (needs_mut_ref) - # Struct literal with @list fields (direct ArrayList move) - # Field reassignment with pre-cleanup of old value - # HashMap for children index mapping - # findKeyIndex helper (read-only list param) - # scanWithFilter with FN(Int64)->Bool lambda parameter - # Pipeline: REDUCE, WHERE+REDUCE, WHERE+SELECT+REDUCE - # @shared:locked TreeStats (Arc>) - # BG fibers with WITH EXCLUSIVE + promise list join - # DO block with 4 concurrent branches - # @multiowned ResultSet (Rc with list fields) - # Nested BG (outer spawns inner) - # BG returning String (~String promise) - - # ENUM exhaustive check - MUTABLE opCount: Int64 = 0; - PARTIAL MATCH Op.Get START Op.Get -> opCount += 1;, DEFAULT -> opCount += 0; END - PARTIAL MATCH Op.Put START Op.Put -> opCount += 1;, DEFAULT -> opCount += 0; END - PARTIAL MATCH Op.Delete START Op.Delete -> opCount += 1;, DEFAULT -> opCount += 0; END - PARTIAL MATCH Op.Scan START Op.Scan -> opCount += 1;, DEFAULT -> opCount += 0; END - ASSERT opCount == 4, "ENUM: all 4 variants"; - - print("LiteDB: all tests PASSED (17 compiler features verified)"); + MUTABLE nodes: Node[]@list = []; + MUTABLE children: HashMap = {}; + + # Build B-tree: root(30) -> left(10,20), right(40) + MUTABLE lk0: Int64[]@list = []; + MUTABLE lv0: String[]@list = []; + lk0.append(10); + lk0.append(20); + lv0.append("10"); + lv0.append("20"); + nodes.append(Node{ keys: lk0, vals: lv0, numChildren: 0, isLeaf: TRUE }); + MUTABLE rk0: Int64[]@list = []; + MUTABLE rv0: String[]@list = []; + rk0.append(40); + rv0.append("40"); + nodes.append(Node{ keys: rk0, vals: rv0, numChildren: 0, isLeaf: TRUE }); + MUTABLE rootK: Int64[]@list = []; + MUTABLE rootV: String[]@list = []; + rootK.append(30); + rootV.append("30"); + nodes.append(Node{ keys: rootK, vals: rootV, numChildren: 2, isLeaf: FALSE }); + children[childKey(2, 0)] = 0; + children[childKey(2, 1)] = 1; + root = 2; + + # ENUM dispatch + MUTABLE opName = ""; + PARTIAL MATCH Op.Scan START + Op.Get -> opName = "get";, + Op.Put -> opName = "put";, + Op.Delete -> opName = "delete";, + Op.Scan -> opName = "scan"; + END + ASSERT opName == "scan", "ENUM dispatch"; + + # Tree structure + ASSERT nodes[root].isLeaf == FALSE, "root is internal"; + ASSERT nodes[root].keys[0] == 30, "root key 30"; + ASSERT nodes[0].keys[1] == 20, "left key 1 = 20"; + ASSERT nodes[1].keys[0] == 40, "right key 0 = 40"; + + # Tree search: descend from root to find key 20 + cp = findKeyIndex(nodes[root].keys, 20); + leafIdx = children[childKey(root, cp)] OR (0 - 1); + ASSERT nodes[leafIdx].keys[1] == 20, "tree search: key 20"; + + # Collect all keys for pipeline queries + MUTABLE allKeys: Int64[]@list = []; + allKeys.append(10); + allKeys.append(20); + allKeys.append(40); + + # Lambda filter + evenCount = scanWithFilter(allKeys, %(k: Int64) -> k MOD 2 == 0); + ASSERT evenCount == 3, "lambda: 3 even (10,20,40)"; + + # REDUCE pipeline + keySum = allKeys |> REDUCE(0) acc + _; + ASSERT keySum == 70, "REDUCE sum = 70"; + + # WHERE + REDUCE + evenSum = allKeys + |> WHERE _ MOD 2 == 0 + |> REDUCE(0) acc + _; + ASSERT evenSum == 70, "WHERE even REDUCE = 70 (all even)"; + + # WHERE + SELECT + REDUCE: keys > 15, doubled + bigDoubled = allKeys + |> WHERE _ > 15 + |> SELECT _ * 2 + |> REDUCE(0) acc + _; + ASSERT bigDoubled == 120, "WHERE>15 + SELECT*2 + REDUCE = 120 (20*2+40*2)"; + + # @shared:locked concurrent access + stats = TreeStats{ insertCount: 0, searchCount: 0 } @shared:locked; + + # BG fibers with WITH EXCLUSIVE + MUTABLE futures: ~Void[]@list = []; + MUTABLE fi = 0; + WHILE fi < 4 DO + futures.append( + BG { + WITH EXCLUSIVE stats AS s {s.insertCount = s.insertCount + 10;} + } + ); + fi += 1; + END + fi = 0; + WHILE fi < 4 DO + NEXT futures[fi]; + fi += 1; + END + WITH stats AS s { + ASSERT s.insertCount == 40, "4 BG fibers x 10 = 40"; + } + + # DO block: 4 concurrent branches + DO { + WITH EXCLUSIVE stats AS s {s.searchCount = s.searchCount + 5;}, + WITH EXCLUSIVE stats AS s {s.searchCount = s.searchCount + 5;}, + WITH EXCLUSIVE stats AS s {s.searchCount = s.searchCount + 5;}, + WITH EXCLUSIVE stats AS s {s.searchCount = s.searchCount + 5;} + } + WITH stats AS s { + ASSERT s.searchCount == 20, "DO: 4 x 5 = 20"; + } + + # ================================================================ + # Commit 5: @multiowned results + nested BG + # ================================================================ + + # @multiowned: Rc-wrapped struct with list fields + rs = makeResultSet(3); + ASSERT rs.count == 3, "multiowned: 3 items"; + # Rc clone: both rs and rs2 share the same ResultSet + rs2 = rs; + ASSERT rs2.count == 3, "multiowned clone shares data"; + + # Nested BG: outer BG spawns inner BG + outerP: ~Int64 = BG { + innerP: ~Int64 = BG {42;}; + innerResult = NEXT innerP; + innerResult + 1; + }; + outerResult = NEXT outerP; + ASSERT outerResult == 43, "nested BG: 42 + 1 = 43"; + + # BG returning String (previously misclassified as BoundedStream) + strP: ~String = BG {"litedb";}; + strResult = NEXT strP; + ASSERT strResult == "litedb", "BG returns string"; + + # ================================================================ + # Commit 6: Verification summary + # ================================================================ + # All features exercised: + # ENUM Op with PARTIAL MATCH dispatch + # STRUCT Node with Int64[]@list + String[]@list fields + # Node[]@list (list of structs with nested lists) + # Mutation through list[idx].field (needs_mut_ref) + # Struct literal with @list fields (direct ArrayList move) + # Field reassignment with pre-cleanup of old value + # HashMap for children index mapping + # findKeyIndex helper (read-only list param) + # scanWithFilter with FN(Int64)->Bool lambda parameter + # Pipeline: REDUCE, WHERE+REDUCE, WHERE+SELECT+REDUCE + # @shared:locked TreeStats (Arc>) + # BG fibers with WITH EXCLUSIVE + promise list join + # DO block with 4 concurrent branches + # @multiowned ResultSet (Rc with list fields) + # Nested BG (outer spawns inner) + # BG returning String (~String promise) + + # ENUM exhaustive check + MUTABLE opCount = 0; + PARTIAL MATCH Op.Get START + Op.Get -> opCount += 1;, + DEFAULT -> opCount += 0; + END + PARTIAL MATCH Op.Put START + Op.Put -> opCount += 1;, + DEFAULT -> opCount += 0; + END + PARTIAL MATCH Op.Delete START + Op.Delete -> opCount += 1;, + DEFAULT -> opCount += 0; + END + PARTIAL MATCH Op.Scan START + Op.Scan -> opCount += 1;, + DEFAULT -> opCount += 0; + END + ASSERT opCount == 4, "ENUM: all 4 variants"; + + print("LiteDB: all tests PASSED (17 compiler features verified)"); END diff --git a/examples/parallel_du/du.cht b/examples/parallel_du/du.cht index 534dc57df..332a2e0f2 100644 --- a/examples/parallel_du/du.cht +++ b/examples/parallel_du/du.cht @@ -10,11 +10,11 @@ FN entrySize(entry: String) RETURNS !Int64 EFFECTS REENTRANT -> - fullPath = substr(entry, 2, entry.length() - 2); - prefix = substr(entry, 0, 2); + fullPath = entry.substr(2, entry.length() - 2); + prefix = entry.substr(0, 2); IF prefix == "f:" THEN - RETURN fileSize(fullPath); + RETURN fullPath.fileSize(); ELSE_IF prefix == "d:" THEN RETURN scanDir(fullPath); END @@ -26,14 +26,14 @@ FN scanDir(path: String) RETURNS !Int64 EFFECTS REENTRANT -> - entries = listAll(path) + entries = path.listAll() |> SELECT "${substr(_, 0, 2)}${path}/${substr(_, 2, _.length() - 2)}"; children = entries |> CONCURRENT SELECT entrySize(_) |> REDUCE(0) acc + _; - RETURN fileSize(path) + children; + RETURN path.fileSize() + children; END FN main() RETURNS Void -> diff --git a/examples/testing/basic_test.cht b/examples/testing/basic_test.cht index a89c3eacb..944bb8730 100644 --- a/examples/testing/basic_test.cht +++ b/examples/testing/basic_test.cht @@ -4,18 +4,20 @@ # Run: clear test examples/testing/basic_test.cht FN add(a: Float64, b: Float64) RETURNS Float64 -> - RETURN a + b; + RETURN a + b; END FN multiply(a: Float64, b: Float64) RETURNS Float64 -> - RETURN a * b; + RETURN a * b; END PRIVATE FN secret() RETURNS Float64 -> - RETURN 42.0; + RETURN 42.0; END -FN main() RETURNS Void -> RETURN; END +FN main() RETURNS Void -> + RETURN; +END TEST Arithmetic DO WHEN "add" DO diff --git a/examples/testing/freeze_candidate.cht b/examples/testing/freeze_candidate.cht index 156552543..8107edc3b 100644 --- a/examples/testing/freeze_candidate.cht +++ b/examples/testing/freeze_candidate.cht @@ -14,65 +14,69 @@ # ./clear doctor examples/testing/freeze_candidate.profile/ STRUCT Doc { - title: String, - body: String, - tag: String + title: String, + body: String, + tag: String } -FN buildDoc(seed: Int64) RETURNS !Doc @multiowned -> - RETURN Doc{ - title: "doc_title_" + seed.toString(), - body: "body_content_" + (seed * 13).toString() + "_suffix", - tag: "tag_" + (seed MOD 50).toString() - } @multiowned; +FN buildDoc(seed: Int64) RETURNS !Doc@multiowned -> + RETURN Doc{ + title: "doc_title_" + seed.toString(), + body: "body_content_" + (seed * 13).toString() + "_suffix", + tag: "tag_" + (seed MOD 50).toString() + } @multiowned; END FN main() RETURNS Void -> - n_docs = 10000_i64; + n_docs = 10_000; - # ── RC version: n_docs scattered Doc allocations ───────────────────────── - # 5 heap allocs per Doc (ctrl, data, title, body, tag) → 50 000 allocs total. - # Working set ≈ n_docs × 80 B ≈ 800 KB, exceeds L1/L2 → LLC misses. - docs: Doc @multiowned []@list = List[]; - MUTABLE k = 0_i64; - WHILE k < n_docs DO - docs.append(buildDoc(k)); - k += 1; - END + # ── RC version: n_docs scattered Doc allocations ───────────────────────── + # 5 heap allocs per Doc (ctrl, data, title, body, tag) → 50 000 allocs total. + # Working set ≈ n_docs × 80 B ≈ 800 KB, exceeds L1/L2 → LLC misses. + docs: Doc@multiowned[]@list = List[]; + MUTABLE k = 0; + WHILE k < n_docs DO + docs.append(buildDoc(k)); + k += 1; + END - t0 = timestampMs(); - MUTABLE s_rc = 0_i64; - iters = 200_i64; - MUTABLE it = 0_i64; - WHILE it < iters DO - MUTABLE idx = 0_i64; - WHILE idx < n_docs DO - s_rc += docs[idx].title.length() + docs[idx].body.length() + docs[idx].tag.length(); - idx += 1; - END - it += 1; + t0 = timestampMs(); + MUTABLE s_rc = 0; + iters = 200; + MUTABLE it = 0; + WHILE it < iters DO + MUTABLE idx = 0; + WHILE idx < n_docs DO + s_rc += docs[idx].title.length() + docs[idx].body.length() + docs[idx].tag.length(); + idx += 1; END - rc_ms = timestampMs() - t0; + it += 1; + END + rc_ms = timestampMs() - t0; - # ── FREEZE version: one Doc, all fields in one contiguous buffer ────────── - # After FREEZE the frozen buffer holds the Doc struct + all 3 string bodies - # at adjacent offsets. Access is one pointer hop into the buffer. - # Working set ≈ 80 B → always in L1. - doc_fr = buildDoc(42); - frozen = FREEZE doc_fr; + # ── FREEZE version: one Doc, all fields in one contiguous buffer ────────── + # After FREEZE the frozen buffer holds the Doc struct + all 3 string bodies + # at adjacent offsets. Access is one pointer hop into the buffer. + # Working set ≈ 80 B → always in L1. + doc_fr = buildDoc(42); + frozen = FREEZE doc_fr; - t1 = timestampMs(); - MUTABLE s_fr = 0_i64; - frozen_iters = n_docs * iters; - MUTABLE fi = 0_i64; - WHILE fi < frozen_iters DO - s_fr += frozen.title.length() + frozen.body.length() + frozen.tag.length(); - fi += 1; - END - fr_ms = timestampMs() - t1; + t1 = timestampMs(); + MUTABLE s_fr = 0; + frozen_iters = n_docs * iters; + MUTABLE fi = 0; + WHILE fi < frozen_iters DO + s_fr += frozen.title.length() + frozen.body.length() + frozen.tag.length(); + fi += 1; + END + fr_ms = timestampMs() - t1; - print("RC (" + n_docs.toString() + " docs x " + iters.toString() + " passes): " + - rc_ms.toString() + "ms sum=" + s_rc.toString()); - print("Frozen (1 doc x " + frozen_iters.toString() + " reads): " + - fr_ms.toString() + "ms sum=" + s_fr.toString()); + print( + "RC (" + n_docs.toString() + " docs x " + iters.toString() + " passes): " + + rc_ms.toString() + "ms sum=" + s_rc.toString() + ); + print( + "Frozen (1 doc x " + frozen_iters.toString() + " reads): " + + fr_ms.toString() + "ms sum=" + s_fr.toString() + ); END diff --git a/examples/testing/stub_ufcs.cht b/examples/testing/stub_ufcs.cht index b0fe3bd23..33b2be7f8 100644 --- a/examples/testing/stub_ufcs.cht +++ b/examples/testing/stub_ufcs.cht @@ -3,17 +3,21 @@ # # Run: clear test examples/testing/stub_ufcs.cht -STRUCT Client { host: String } +STRUCT Client { + host: String +} FN query(client: Client, sql: String) RETURNS String -> - RETURN "real database result"; + RETURN "real database result"; END FN execute(client: Client, sql: String) RETURNS Void -> - RETURN; + RETURN; END -FN main() RETURNS Void -> RETURN; END +FN main() RETURNS Void -> + RETURN; +END TEST ClientStubs DO WHEN "stubbed query" DO diff --git a/examples/web_crawler/src/main.cht b/examples/web_crawler/src/main.cht index b11a2a175..b4a2c1bff 100644 --- a/examples/web_crawler/src/main.cht +++ b/examples/web_crawler/src/main.cht @@ -16,82 +16,85 @@ EXTERN FN freeString(ptr: String, len: Int64) RETURNS Void FROM "http"; # Page: a fetched page with its URL, title, and discovered links. STRUCT Page { - url: String, - title: String, - linkCount: Int64 + url: String, + title: String, + linkCount: Int64 } # Extract the ... from HTML. FN extractTitle(html: String) RETURNS !String -> - startTag = indexOf(html, "") OR -1; - IF startTag >= 0 THEN - titleStart = startTag + 7; - rest = substr(html, titleStart, html.length() - titleStart); - endTag = indexOf(rest, "") OR -1; - IF endTag >= 0 THEN - RETURN substr(html, titleStart, endTag); - END + startTag = html.indexOf("") OR -1; + IF startTag >= 0 THEN + titleStart = startTag + 7; + rest = html.substr(titleStart, html.length() - titleStart); + endTag = rest.indexOf("") OR -1; + IF endTag >= 0 THEN + RETURN html.substr(titleStart, endTag); END - RETURN "Untitled"; + END + RETURN "Untitled"; END # Count links in HTML. FN countLinks(html: String) RETURNS Int64 -> - RETURN countOccurrences(html, "href="); + RETURN html.countOccurrences("href="); END # Fetch a page and extract metadata. -FN fetchPage(baseUrl: String, path: String) RETURNS !Page EFFECTS REENTRANT -> - url = baseUrl + path; - html = httpGet(url); - title = extractTitle(html); - links = countLinks(html); - RETURN Page{ url: COPY path, title: COPY title, linkCount: links }; +FN fetchPage(baseUrl: String, path: String) + RETURNS !Page + EFFECTS REENTRANT +-> + url = baseUrl + path; + html = httpGet(url); + title = extractTitle(html); + links = countLinks(html); + RETURN Page{ url: COPY path, title: COPY title, linkCount: links }; END FN main() RETURNS Void -> - # Start the canned test server on port 19876 - startTestServer(19876); + # Start the canned test server on port 19876 + startTestServer(19_876); - # Give the server thread a moment to bind - # (cooperative scheduler: this is a no-op yield) + # Give the server thread a moment to bind + # (cooperative scheduler: this is a no-op yield) - baseUrl = "localhost:19876"; - baseUrlAbout = COPY baseUrl; - baseUrlBlog = COPY baseUrl; + baseUrl = "localhost:19876"; + baseUrlAbout = COPY baseUrl; + baseUrlBlog = COPY baseUrl; - # Fetch 3 pages concurrently using BG fibers - home: ~Page = BG { @service -> - fetchPage(baseUrl, "/"); - }; - about: ~Page = BG { @service -> - fetchPage(baseUrlAbout, "/about"); - }; - blog: ~Page = BG { @service -> - fetchPage(baseUrlBlog, "/blog"); - }; + # Fetch 3 pages concurrently using BG fibers + home: ~Page = BG {@service -> + fetchPage(baseUrl, "/"); + }; + about: ~Page = BG {@service -> + fetchPage(baseUrlAbout, "/about"); + }; + blog: ~Page = BG {@service -> + fetchPage(baseUrlBlog, "/blog"); + }; - # Await all results - homePage: Page = NEXT home; - aboutPage: Page = NEXT about; - blogPage: Page = NEXT blog; + # Await all results + homePage = NEXT home; + aboutPage = NEXT about; + blogPage = NEXT blog; - # Verify results - print("Home: ${homePage.title} (${homePage.linkCount.toString()} links)"); - print("About: ${aboutPage.title} (${aboutPage.linkCount.toString()} links)"); - print("Blog: ${blogPage.title} (${blogPage.linkCount.toString()} links)"); + # Verify results + print("Home: ${homePage.title} (${homePage.linkCount.toString()} links)"); + print("About: ${aboutPage.title} (${aboutPage.linkCount.toString()} links)"); + print("Blog: ${blogPage.title} (${blogPage.linkCount.toString()} links)"); - ASSERT homePage.title == "Home", "home title"; - ASSERT aboutPage.title == "About", "about title"; - ASSERT blogPage.title == "Blog", "blog title"; + ASSERT homePage.title == "Home", "home title"; + ASSERT aboutPage.title == "About", "about title"; + ASSERT blogPage.title == "Blog", "blog title"; - # Home has 2 links (about + blog), about has 1, blog has 2 - ASSERT homePage.linkCount == 2, "home links"; - ASSERT aboutPage.linkCount == 1, "about links"; - ASSERT blogPage.linkCount == 2, "blog links"; + # Home has 2 links (about + blog), about has 1, blog has 2 + ASSERT homePage.linkCount == 2, "home links"; + ASSERT aboutPage.linkCount == 1, "about links"; + ASSERT blogPage.linkCount == 2, "blog links"; - # Stop the test server - stopTestServer(); + # Stop the test server + stopTestServer(); - print("All crawler tests passed!"); + print("All crawler tests passed!"); END diff --git a/spec/clear_fmt_spec.rb b/spec/clear_fmt_spec.rb index 22c418498..48811f032 100644 --- a/spec/clear_fmt_spec.rb +++ b/spec/clear_fmt_spec.rb @@ -290,6 +290,8 @@ def write(name, content) end it "auto-inserts digit separators for decimal ints > 4 digits" do + # Type annotations dropped by LintFixRewriter (redundant with the + # literal RHS). Numeric separators still apply. src = <<~CLEAR FN main() RETURNS Int64 -> a: Int64 = 1000000; @@ -301,10 +303,10 @@ def write(name, content) CLEAR path = write("n_int.cht", src) out, _, _ = run_fmt("--no-warn", "--stdout", path) - expect(out).to include("a: Int64 = 1_000_000;") - expect(out).to include("b: Int64 = 12_345;") - expect(out).to include("c: Int64 = 1234;") - expect(out).to include("d: Int64 = 42;") + expect(out).to include("a = 1_000_000;") + expect(out).to include("b = 12_345;") + expect(out).to include("c = 1234;") + expect(out).to include("d = 42;") end it "auto-inserts digit separators for floats with >4 digits on either side" do @@ -318,9 +320,9 @@ def write(name, content) CLEAR path = write("n_float.cht", src) out, _, _ = run_fmt("--no-warn", "--stdout", path) - expect(out).to include("a: Float64 = 3.141_592_653_589_793;") - expect(out).to include("b: Float64 = 1_000_000.5;") - expect(out).to include("c: Float64 = 1.5;") + expect(out).to include("a = 3.141_592_653_589_793;") + expect(out).to include("b = 1_000_000.5;") + expect(out).to include("c = 1.5;") end it "preserves type suffixes through separator rewriting" do @@ -331,11 +333,13 @@ def write(name, content) end it "canonicalizes existing separators (1_0_0_0 -> 1000, 1000000 -> 1_000_000)" do + # `: Int64` annotations stripped by LintFixRewriter (redundant + # with the literal RHS). Numeric separator rules still apply. src = "FN main() RETURNS Int64 ->\n a: Int64 = 1_0_0_0;\n b: Int64 = 1000000;\n RETURN a;\nEND\n" path = write("n_canon.cht", src) out, _, _ = run_fmt("--no-warn", "--stdout", path) - expect(out).to include("a: Int64 = 1000;") - expect(out).to include("b: Int64 = 1_000_000;") + expect(out).to include("a = 1000;") + expect(out).to include("b = 1_000_000;") end it "leaves hex / oct / bin literals untouched" do @@ -477,10 +481,12 @@ def write(name, content) end it "strips _f64 suffix on decimal literals (default-type elision)" do + # `: Float64` annotation also dropped by LintFixRewriter when + # the literal RHS already determines the type. src = "FN main() RETURNS Void ->\n a: Float64 = 3.14_f64;\n RETURN;\nEND\n" path = write("f64suffix.cht", src) out, _, _ = run_fmt("--no-warn", "--stdout", path) - expect(out).to include("a: Float64 = 3.14;") + expect(out).to include("a = 3.14;") expect(out).not_to include("_f64") end @@ -906,6 +912,607 @@ def tokenize(src) end end + describe "BG body wrapping with nested DO/THEN blocks" do + # Found by FmtVerifier on benchmarks/concurrent/14_nested_lock. + # Three related fmt bugs that combined to crush the body of + # `BG { @parallel -> FOR i IN ... DO ...; ...; END }` onto one + # 600-char line: + # 1. count_statements_in_block didn't track DO/THEN/END + # nesting, so inner `;` was mis-counted as multi-statement + # at the BG level and triggered a wrong wrap. + # 2. expand_bg_do_blocks's wrap was skipped for single-statement + # bodies even when the single statement was a multi-line + # block (FOR/IF/WHILE), leaving the body inline. + # 3. expand_method_chains stripped ALL NLs from chain segments, + # including those nested inside argument-position + # `BG { ... }` blocks. + + it "wraps BG body when single statement is a FOR block" do + src = <<~CLEAR + FN main() RETURNS Void -> + task = BG { @parallel -> + FOR i IN (0 ..< 10) DO + MUTABLE a = i; + MUTABLE b = i + 1; + END + }; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to match(/FOR i IN .+ DO\n/) + expect(out).to match(/MUTABLE a = i;\n/) + expect(out).to match(/MUTABLE b = i \+ 1;\n/) + end + + it "preserves NLs inside BG-arg of a method chain" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE futures: ~Void[]@list = []; + futures.append(BG { + FOR i IN (0 ..< 5) DO + MUTABLE x = i; + MUTABLE y = i; + END + }); + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to match(/FOR i IN .+ DO\n/) + expect(out).to match(/MUTABLE x = i;\n/) + expect(out).to match(/MUTABLE y = i;\n/) + end + + it "leaves inner one-liner IF intact when expanding outer FOR" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE a = 3; + MUTABLE b = 5; + MUTABLE lo = 0; + MUTABLE hi = 0; + FOR i IN (0 ..< 1) DO IF a > b THEN lo = b; hi = a; END END + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("IF a > b THEN") + expect(out).to match(/IF a > b THEN\s+lo = b;\s+hi = a;\s+END/m) + end + end + + describe "MUTABLE never reassigned" do + it "drops MUTABLE when the binding is read but never reassigned" do + # The annotator only emits the MUTABLE-unused finding when the + # binding is actually READ. An entirely unused binding triggers + # the "unused variable" warning instead (different lint). + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE x = 5; + RETURN x; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("x = 5;") + expect(out).not_to include("MUTABLE") + end + + it "keeps MUTABLE when the binding is later reassigned" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE x = 5; + x = 7; + RETURN x; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("MUTABLE x = 5;") + end + end + + describe "redundant `: Type` annotations" do + it "drops `: Int64` when the literal RHS already determines it" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE total: Int64 = 0; + total = 5; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("MUTABLE total = 0;") + end + + it "drops `: Float64` when assigned a float literal" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE s: Float64 = 0.0; + s = 1.5; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("MUTABLE s = 0.0;") + end + + it "keeps decorated types (HashMap) — not redundant" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE m: HashMap = {}; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include(": HashMap") + end + end + + describe "generic-type bracket spacing" do + it "tightens `HashMap < T, U >` to `HashMap`" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE m: HashMap < Int64, Float64 > = {}; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("HashMap") + end + + it "normalizes `HashMap< T,U >` (mixed spacing) to `HashMap`" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE m: HashMap< Int64,Float64 > = {}; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("HashMap") + end + + it "leaves comparison `a < b` with spaces (not a generic)" do + src = <<~CLEAR + FN main() RETURNS Void -> + a = 1; + b = 2; + IF a < b -> RETURN; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("a < b") + end + end + + describe "struct-literal brace attach + padding" do + it "attaches `Type` to `{` and pads inside" do + src = <<~CLEAR + STRUCT N { + val: Int64, + } + FN main() RETURNS Void -> + x = N { val: 5 }; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("N{ val: 5 }") + end + + it "leaves STRUCT body declaration with space before `{`" do + src = <<~CLEAR + STRUCT Foo { + a: Int64, + } + FN main() RETURNS Void -> + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("STRUCT Foo {\n") + end + + it "keeps empty struct literal tight (`Foo{}`)" do + src = <<~CLEAR + STRUCT Empty {} + FN main() RETURNS Void -> + x = Empty{}; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("Empty{};") + expect(out).not_to include("Empty{ }") + end + end + + describe "`END` always opens a new line" do + # Repro from examples/json_parser/jsonToString JBool arm. An inner + # `IF ... END RETURN ...` left `END RETURN "false";` glued together + # because no walker forced a break between END and the trailing + # statement. The post-pass `nl_after_end` makes END a hard line + # boundary except when followed immediately by `)`, `]`, `}`, or `;`. + + it "splits `END RETURN` after an inner IF inside a MATCH arm" do + src = <<~CLEAR + ENUM B { T, F } + FN main() RETURNS String -> + PARTIAL MATCH B.T START + B.T AS b -> + IF b == B.T THEN + RETURN "true"; + END RETURN "false";, + B.F -> RETURN "no"; + END + END + CLEAR + out = Formatter.format(src) + expected = " IF b == B.T THEN\n" \ + " RETURN \"true\";\n" \ + " END\n" \ + " RETURN \"false\";,\n" + expect(out).to include(expected) + expect(out).not_to match(/END RETURN/) + end + + it "leaves `END }` and `END );` alone (close-brackets stay attached)" do + src = <<~CLEAR + FN main() RETURNS Void -> + futures.append(BG { + FOR i IN (0 ..< 5) DO + i; + END + }); + RETURN; + END + CLEAR + out = Formatter.format(src) + # END followed by `}` keeps `}` on the next render line via + # CLOSE_LEADING; we just need to confirm we didn't insert a + # spurious blank line between END and `}` either. + expect(out).not_to match(/END\s+\n\s*\n\s*\}/) + end + end + + describe "BG body with leading `@strategy ->` keeps indent balanced" do + # Repro from benchmarks/concurrent/09_kvstore. `BG { @parallel -> ... }` + # has a `->` whose OPEN_TERMINAL render rule raises body depth, but + # there's no END inside the BG to lower it back; only `}` closes, + # and that's a single -1. Without compensation, every statement + # after the BG ends up one column too deep, cascading off the + # enclosing FN body's indent. + + it "lifts depth back to the BG's `{` level before `}`" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE futures: ~Int64[]@list = []; + futures.append(BG { @parallel -> + MUTABLE total: Int64 = 0; + WHILE total < 10 DO + total += 1; + END + total; + }); + RETURN; + END + CLEAR + out = Formatter.format(src) + # The `}` (closes BG) sits at call-args body depth, `);` at FN + # body depth, `RETURN;` at FN body depth, last `END` at col 0. + expect(out).to match(/^ \}$/) + expect(out).to match(/^ \);$/) + expect(out).to match(/^ RETURN;$/) + expect(out).to match(/^END$/) + end + end + + describe "multi-branch IF/ELSE_IF chain with same-line branch bodies" do + # Repro from examples/json_parser/parseString. Each branch is in + # one-liner form (`IF cond THEN stmt;`) but the IF/ELSE_IF/ELSE/END + # spans multiple source lines. The old layout staggered ELSE_IF + # below IF because IF's `;`-terminated line did not bump indent + # (THEN was mid-line, not OPEN_TERMINAL), then ELSE_IF outdented + # against the (already-low) depth. + + it "expands each branch's inline body onto its own line" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE x = ""; + IF x == "a" THEN x = "1"; + ELSE_IF x == "b" THEN x = "2"; + ELSE_IF x == "c" THEN x = "3"; + ELSE x = "0"; + END + RETURN; + END + CLEAR + out = Formatter.format(src) + expected = " IF x == \"a\" THEN\n" \ + " x = \"1\";\n" \ + " ELSE_IF x == \"b\" THEN\n" \ + " x = \"2\";\n" \ + " ELSE_IF x == \"c\" THEN\n" \ + " x = \"3\";\n" \ + " ELSE\n" \ + " x = \"0\";\n" \ + " END\n" + expect(out).to include(expected) + end + + it "leaves a single-arm one-liner alone" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE x = 0; + IF x == 1 THEN x = 2; END + RETURN; + END + CLEAR + out = Formatter.format(src) + # Existing expand_if_while_for still expands true single-line + # one-liners — that's the established convention. + expect(out).to include("IF x == 1 THEN\n x = 2;\n END") + end + end + + describe "capability chain with parenthesized argument" do + # Repro from benchmarks/concurrent/09_kvstore. `@sharded(8):locked` + # was rendering as `@sharded(8): locked` because the chain-detector + # didn't skip the `(...)` between segments — the walk-back hit `)` + # first and bailed. + + it "keeps `:` flush after a `@cap(N)` segment" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE map: HashMap @sharded(8):locked = {}; + map["k"] = "v"; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("@sharded(8):locked") + expect(out).not_to include("@sharded(8): locked") + end + + it "keeps `:` flush across a 3-segment chain with a paren arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE map: HashMap @shared:sharded(128):locked = {}; + map["k"] = "v"; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("@shared:sharded(128):locked") + end + end + + describe "MATCH block layout" do + # Repro from examples/litedb. Multi-line MATCH was producing arms + # at the same column as `START` and END at column 0 because: + # - `START` did not bump indent for the body + # - the body never closed back to MATCH-level + # - DEFAULT in arm position was outdenting like CATCH/DEFAULT in + # a TRY block (it shouldn't — in MATCH it's a pattern at arm- + # depth) + # New layout: arms at +1 from `MATCH ... START`, multi-statement + # arm bodies at +2 with the trailing `,` flush against the last + # `;`, END at MATCH-line indent. DEFAULT renders at arm-depth. + + it "indents arms at +1 and END at the MATCH-line level" do + src = <<~CLEAR + ENUM Op { Get, Put } + FN main() RETURNS Void -> + MUTABLE n = ""; + PARTIAL MATCH Op.Get START + Op.Get -> n = "g";, + Op.Put -> n = "p"; + END + RETURN; + END + CLEAR + out = Formatter.format(src) + expected = " PARTIAL MATCH Op.Get START\n" \ + " Op.Get -> n = \"g\";,\n" \ + " Op.Put -> n = \"p\";\n" \ + " END\n" + expect(out).to include(expected) + end + + it "expands a single-line MATCH into one-arm-per-line layout" do + src = <<~CLEAR + ENUM Op { Get, Put } + FN main() RETURNS Void -> + MUTABLE n = 0; + PARTIAL MATCH Op.Get START Op.Get -> n = 1;, DEFAULT -> n = 0; END + RETURN; + END + CLEAR + out = Formatter.format(src) + expected = " PARTIAL MATCH Op.Get START\n" \ + " Op.Get -> n = 1;,\n" \ + " DEFAULT -> n = 0;\n" \ + " END\n" + expect(out).to include(expected) + end + + it "double-indents multi-statement arm bodies and lifts the comma" do + src = <<~CLEAR + ENUM Op { Get, Put } + FN main() RETURNS Void -> + MUTABLE x = 0; + MUTABLE y = 0; + PARTIAL MATCH Op.Get START + Op.Get -> + x = 1; + y = 2;, + Op.Put -> x = 3; + END + RETURN; + END + CLEAR + out = Formatter.format(src) + expected = " PARTIAL MATCH Op.Get START\n" \ + " Op.Get ->\n" \ + " x = 1;\n" \ + " y = 2;,\n" \ + " Op.Put -> x = 3;\n" \ + " END\n" + expect(out).to include(expected) + expect(out).not_to match(/y = 2;\n\s*,/) + end + + it "renders DEFAULT in MATCH at arm-depth (no CATCH-style outdent)" do + src = <<~CLEAR + ENUM Op { Get, Put } + FN main() RETURNS Void -> + MUTABLE n = 0; + PARTIAL MATCH Op.Get START + Op.Get -> n = 1;, + DEFAULT -> + n = 0; + n = n + 1; + END + RETURN; + END + CLEAR + out = Formatter.format(src) + expected = " PARTIAL MATCH Op.Get START\n" \ + " Op.Get -> n = 1;,\n" \ + " DEFAULT ->\n" \ + " n = 0;\n" \ + " n = n + 1;\n" \ + " END\n" + expect(out).to include(expected) + end + end + + describe "MATCH arm `;,` separator stays attached" do + # Repro from examples/litedb: `Op.Get -> opName = "get";,` was + # getting torn into `Op.Get -> opName = "get";` then a lone `,` + # on its own line because `;` at depth-0 forced a newline before + # the trailing comma was emitted. + + it "keeps `;,` together inside a MATCH inside a FN body" do + src = <<~CLEAR + ENUM Op { Get, Put } + FN main() RETURNS Void -> + MUTABLE n = ""; + PARTIAL MATCH Op.Get START + Op.Get -> n = "get";, + Op.Put -> n = "put"; + END + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include(%(n = "get";,)) + expect(out).not_to match(/;\n\s*,/) + end + end + + describe "inline capability chain (`@cap:cap`)" do + # Repro from examples/litedb: `... @shared: locked` was rendering + # with a space after the `:` because the default `:` rule (no space + # before, space after) treated it like a type annotation. The chain + # form keeps every segment flush. + + it "removes the space in `@shared:locked`" do + src = <<~CLEAR + STRUCT S { v: Int64 } + FN main() RETURNS Void -> + x = S{ v: 0 } @shared: locked; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("@shared:locked;") + expect(out).not_to include("@shared: locked") + end + + it "removes spaces across a 3-segment chain `@pool:shared:locked`" do + src = <<~CLEAR + STRUCT Env { v: Int64 } + FN main() RETURNS Void -> + MUTABLE pool: Env[10] @pool: shared: locked = []; + RETURN; + END + CLEAR + out = Formatter.format(src) + expect(out).to include("@pool:shared:locked") + expect(out).not_to include("@pool: shared") + expect(out).not_to include("shared: locked") + end + + it "leaves a normal type annotation `: Type` alone" do + src = <<~CLEAR + STRUCT Box { v: Int64 } + FN take(b: Box) RETURNS Int64 -> RETURN b.v; END + CLEAR + out = Formatter.format(src) + expect(out).to include("FN take(b: Box)") + expect(out).to include("v: Int64") + end + + it "leaves a `REQUIRES x: LOCKED` clause alone" do + src = <<~CLEAR + STRUCT Counter { v: Int64 } + FN incr!(MUTABLE c: Counter) REQUIRES c: LOCKED -> RETURN; END + CLEAR + out = Formatter.format(src) + expect(out).to include("REQUIRES c: LOCKED") + end + end + + describe "leading-comment internal whitespace" do + # Found by FmtVerifier sweep on benchmarks/sequential/11_pipeline_overhead. + # The original `canonicalize_comment` rule was "exactly one space after + # `#`," which destroyed deliberate prose indentation in ASCII tables, + # code samples, and indented bullet lists. Now: ensure AT LEAST one + # space, preserve user-typed extra spaces. + + it "preserves multi-space comment indentation" do + src = <<~CLEAR + # Section A + # subitem 1 + # subitem 2 + + FN main() RETURNS Void -> RETURN; END + CLEAR + out = Formatter.format(src) + expect(out).to include("# subitem 1") + expect(out).to include("# subitem 2") + end + + it "still inserts a space when the user wrote `#text` with no separator" do + src = <<~CLEAR + #notice + FN main() RETURNS Void -> RETURN; END + CLEAR + out = Formatter.format(src) + expect(out).to include("# notice") + end + + it "preserves an ASCII table laid out via comment indent" do + src = <<~CLEAR + # COL A | COL B + # row 1 | x + # row 2 | y + + FN main() RETURNS Void -> RETURN; END + CLEAR + out = Formatter.format(src) + expect(out).to include("# COL A | COL B") + expect(out).to include("# row 1 | x") + expect(out).to include("# row 2 | y") + end + + it "leaves an empty `#` comment untouched" do + src = "# header\n#\n# more\nFN main() RETURNS Void -> RETURN; END\n" + out = Formatter.format(src) + expect(out).to include("\n#\n") + end + end + it "is idempotent on the whole transpile-tests corpus" do root = File.expand_path("../transpile-tests", __dir__) files = Dir.glob(File.join(root, "**", "*.cht")) diff --git a/spec/fmt_verifier_spec.rb b/spec/fmt_verifier_spec.rb new file mode 100644 index 000000000..6e2db83a3 --- /dev/null +++ b/spec/fmt_verifier_spec.rb @@ -0,0 +1,194 @@ +require "rspec" +require "tmpdir" +require_relative "../src/tools/fmt_verifier" + +# FmtVerifier compares the Zig emitted from a .cht file before and +# after fmt to confirm fmt is semantics-preserving. Heavy parts +# (full transpile pipeline, real Formatter) are stubbed here so the +# tests focus on the equivalence-check logic itself: a normalized +# byte-compare of the two Zig outputs, with `// CLR:N` line markers +# stripped before comparing. + +RSpec.describe FmtVerifier do + around do |ex| + Dir.mktmpdir do |dir| + @tmp = dir + ex.run + end + end + + def write_cht(name, content = "FN main() RETURNS Void -> RETURN; END\n") + path = File.join(@tmp, name) + File.write(path, content) + path + end + + describe ".verify — equivalence logic" do + it "returns ok=true when both transpilations produce the same Zig" do + path = write_cht("a.cht") + allow(FmtVerifier).to receive(:transpile).and_return("pub fn clearMain() void {}\n") + allow(Formatter).to receive(:format).and_return("formatted source") + + result = FmtVerifier.verify(path) + + expect(result.ok).to be true + expect(result.error).to be_nil + expect(result.diff_excerpt).to be_nil + end + + it "returns ok=false with a diff excerpt when Zig outputs differ" do + path = write_cht("a.cht") + call_count = 0 + allow(FmtVerifier).to receive(:transpile) do + call_count += 1 + call_count == 1 ? "before line\n" : "after line\n" + end + allow(Formatter).to receive(:format).and_return("fmt'd source") + + result = FmtVerifier.verify(path) + + expect(result.ok).to be false + expect(result.error).to be_nil + expect(result.diff_excerpt).to include("before line") + expect(result.diff_excerpt).to include("after line") + end + + it "ignores `// CLR:N` line markers when comparing" do + path = write_cht("a.cht") + before_zig = <<~ZIG + // CLR:5 + const x = 1; + // CLR:6 + const y = 2; + ZIG + after_zig = <<~ZIG + // CLR:8 + const x = 1; + // CLR:9 + const y = 2; + ZIG + call_count = 0 + allow(FmtVerifier).to receive(:transpile) do + call_count += 1 + call_count == 1 ? before_zig : after_zig + end + allow(Formatter).to receive(:format).and_return("fmt'd") + + result = FmtVerifier.verify(path) + expect(result.ok).to be true + end + + it "still flags genuine code-line differences even alongside CLR markers" do + path = write_cht("a.cht") + before_zig = <<~ZIG + // CLR:5 + const x = 1; + ZIG + after_zig = <<~ZIG + // CLR:8 + const x = 2; + ZIG + call_count = 0 + allow(FmtVerifier).to receive(:transpile) do + call_count += 1 + call_count == 1 ? before_zig : after_zig + end + allow(Formatter).to receive(:format).and_return("fmt'd") + + result = FmtVerifier.verify(path) + expect(result.ok).to be false + expect(result.diff_excerpt).to include("const x = 1;") + expect(result.diff_excerpt).to include("const x = 2;") + end + + it "captures errors with class+message when transpile raises" do + path = write_cht("a.cht") + allow(FmtVerifier).to receive(:transpile) + .and_raise(ArgumentError, "missing source dir") + + result = FmtVerifier.verify(path) + expect(result.ok).to be false + expect(result.error).to include("ArgumentError") + expect(result.error).to include("missing source dir") + expect(result.diff_excerpt).to be_nil + end + + it "captures errors when Formatter.format raises" do + path = write_cht("a.cht") + allow(FmtVerifier).to receive(:transpile).and_return("zig output") + allow(Formatter).to receive(:format).and_raise(Formatter::Error, "parse error") + + result = FmtVerifier.verify(path) + expect(result.ok).to be false + expect(result.error).to include("Formatter::Error") + expect(result.error).to include("parse error") + end + end + + describe ".normalize_for_compare" do + it "strips standalone `// CLR:N` lines" do + input = "// CLR:5\nconst x = 1;\n// CLR:6\nconst y = 2;\n" + expected = "const x = 1;\nconst y = 2;\n" + expect(FmtVerifier.normalize_for_compare(input)).to eq(expected) + end + + it "leaves trailing inline `// CLR:N` markers alone (only matches whole-line)" do + # Inline form `code // CLR:7` is intentionally kept — the + # whole-line normalization targets only the standalone form + # the emitter actually produces. + input = "const x = 1; // CLR:5\n" + expect(FmtVerifier.normalize_for_compare(input)).to eq(input) + end + + it "is a no-op on Zig output without CLR markers" do + input = "pub fn main() void {\n return;\n}\n" + expect(FmtVerifier.normalize_for_compare(input)).to eq(input) + end + end + + describe ".verify_dir" do + it "returns one Result per .cht file under the directory" do + sub = File.join(@tmp, "nested") + FileUtils.mkdir_p(sub) + a = File.join(@tmp, "a.cht") + b = File.join(sub, "b.cht") + File.write(a, "FN main() RETURNS Void -> RETURN; END\n") + File.write(b, "FN main() RETURNS Void -> RETURN; END\n") + + allow(FmtVerifier).to receive(:transpile).and_return("zig\n") + allow(Formatter).to receive(:format).and_return("fmt\n") + + results = FmtVerifier.verify_dir(@tmp) + expect(results.length).to eq(2) + expect(results.map(&:path)).to include(a, b) + expect(results).to all(have_attributes(ok: true)) + end + end + + describe ".report" do + it "returns 0 fail count when all results are OK" do + results = [ + FmtVerifier::Result.new("a.cht", true, nil, nil), + FmtVerifier::Result.new("b.cht", true, nil, nil), + ] + io = StringIO.new + expect(FmtVerifier.report(results, io: io)).to eq(0) + expect(io.string).to include("2 passed, 0 failed") + end + + it "returns the count of failures and prints details" do + results = [ + FmtVerifier::Result.new("ok.cht", true, nil, nil), + FmtVerifier::Result.new("bad.cht", false, nil, "--- a\n+++ b\n@@ ...\n"), + FmtVerifier::Result.new("err.cht", false, "RuntimeError: boom", nil), + ] + io = StringIO.new + expect(FmtVerifier.report(results, io: io)).to eq(2) + expect(io.string).to include("ok.cht") + expect(io.string).to include("bad.cht") + expect(io.string).to include("err.cht") + expect(io.string).to include("RuntimeError: boom") + expect(io.string).to include("1 passed, 2 failed") + end + end +end diff --git a/spec/lint_fix_rewriter_spec.rb b/spec/lint_fix_rewriter_spec.rb new file mode 100644 index 000000000..992ec590b --- /dev/null +++ b/spec/lint_fix_rewriter_spec.rb @@ -0,0 +1,197 @@ +require "rspec" +require_relative "../src/tools/lint_fix_rewriter" + +# Direct unit tests for LintFixRewriter — the source-level pre-pass +# that drops unused MUTABLE keywords and redundant `: Type` +# annotations during fmt. End-to-end tests through Formatter.format +# live in spec/clear_fmt_spec.rb; this file targets the rewriter +# in isolation so failures point at the right code. + +RSpec.describe LintFixRewriter do + def rw(src) + LintFixRewriter.rewrite(src) + end + + describe "robustness on broken source" do + it "returns source unchanged when annotation fails" do + # `Node{}` with a required `val` field will raise a CompilerError. + # The rewriter must still return the source — fmt has to format + # files with errors. + src = <<~CLEAR + STRUCT Node { + val: Int64, + } + FN main() RETURNS Void -> + x = Node{}; + RETURN; + END + CLEAR + expect(rw(src)).to eq(src) + end + + it "returns source unchanged on a parse error" do + src = "FN main() RETURNS Void ->\n garbled (\n" + expect(rw(src)).to eq(src) + end + end + + describe "MUTABLE-never-reassigned drop" do + it "drops MUTABLE on a read-but-never-reassigned binding" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE n = 5; + RETURN n; + END + CLEAR + expect(rw(src)).to include("n = 5;") + expect(rw(src)).not_to include("MUTABLE") + end + + it "keeps MUTABLE when the binding is reassigned" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE n = 5; + n = 7; + RETURN n; + END + CLEAR + expect(rw(src)).to include("MUTABLE n = 5;") + end + end + + describe "redundant `: Type` annotation drop" do + it "drops `: Int64` when assigned an integer literal" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE total: Int64 = 0; + total = 5; + RETURN total; + END + CLEAR + out = rw(src) + expect(out).to include("MUTABLE total = 0;") + end + + it "drops `: Float64` when assigned a float literal" do + src = <<~CLEAR + FN main() RETURNS Float64 -> + MUTABLE s: Float64 = 0.0; + s = 1.5; + RETURN s; + END + CLEAR + out = rw(src) + expect(out).to include("MUTABLE s = 0.0;") + end + + it "keeps `: HashMap` (decorated type)" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE m: HashMap = {}; + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include(": HashMap") + end + + it "keeps `: Float64[]@list` (collection type)" do + src = <<~CLEAR + FN main() RETURNS Void -> + MUTABLE xs: Float64[]@list = []; + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include(": Float64[]@list") + end + + it "keeps `: ?Int64` (optional)" do + src = <<~CLEAR + FN main() RETURNS Void -> + x: ?Int64 = NIL; + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include(": ?Int64") + end + end + + describe "safety against false-positive MUTABLE drops" do + # The annotator's MUTABLE-never-reassigned lint doesn't propagate + # "mutably borrowed via callee" through BG-block captures — + # dropping MUTABLE there breaks the next compile (the param's + # mutability check fires at the call site). Defensively skip + # MUTABLE-drop for any binding whose name appears inside a BG + # block until the annotator is fixed. + + it "keeps MUTABLE on a binding referenced inside a BG block" do + src = <<~CLEAR + FN bar(MUTABLE xs: Int64[]) RETURNS Void -> + xs.append(1); + RETURN; + END + + FN main() RETURNS Void -> + MUTABLE arr: Int64[] = [1_i64, 2_i64]; + MUTABLE tasks: ~Void[]@list = []; + tasks.append(BG { bar(arr); }); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("MUTABLE arr") + end + + it "drops MUTABLE normally on bindings NOT referenced inside any BG block" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE n = 42; + RETURN n; + END + CLEAR + out = rw(src) + expect(out).to include("n = 42;") + expect(out).not_to include("MUTABLE") + end + end + + describe "redundant `: Type` annotation drop — sync awareness" do + # `String@raw` and `String` resolve to the same base type but use + # different indexing (byte vs UTF-8 codepoint). Dropping + # `: String@raw` silently changes semantics. The rewriter's + # decoration check uses `#sync` directly rather than `any_sync?` + # (the latter excludes `:raw` since it's a data-access mode). + + it "keeps `: String@raw` annotation (sync stamp = decoration)" do + src = <<~CLEAR + FN tcpRead(fd: Int64) RETURNS String@raw -> + RETURN "hi"; + END + FN main() RETURNS Void -> + data: String@raw = tcpRead(0_i64); + print(data); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include(": String@raw") + end + end + + describe "both rules combined" do + it "drops MUTABLE AND `: Type` independently when both apply" do + src = <<~CLEAR + FN main() RETURNS Int64 -> + MUTABLE n: Int64 = 5; + RETURN n; + END + CLEAR + out = rw(src) + expect(out).to include("n = 5;") + expect(out).not_to include("MUTABLE") + expect(out).not_to include(": Int64") + end + end +end diff --git a/spec/method_rewriter_spec.rb b/spec/method_rewriter_spec.rb index 97177b907..806b1ba1a 100644 --- a/spec/method_rewriter_spec.rb +++ b/spec/method_rewriter_spec.rb @@ -267,4 +267,129 @@ def rw(src) expect(rw(src)).to include("xs.push(5, 7);") end end + + describe "paren-wrapping the receiver to preserve precedence" do + # Found by FmtVerifier on benchmarks/sequential/11_pipeline_overhead. + # `toFloat(state MOD 1000)` was being rewritten to + # `state MOD 1000.toFloat()`, which Zig parses as + # `state MOD (1000.toFloat())` — integer mod silently became float + # mod. The first arg must be paren-wrapped when its top-level AST + # shape would bind looser than `.method()`. + + it "wraps a binary-op first arg" do + # toFloat is `is_method: true` in stdlib; rewriter picks it up + # without a METHOD declaration in the source. + src = <<~CLEAR + FN main() RETURNS Void -> + state: Int64 = 42; + val = toFloat(state MOD 1000); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("(state MOD 1000).toFloat()") + expect(out).not_to include("state MOD 1000.toFloat()") + end + + it "wraps a unary-op first arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + n: Int64 = 5; + val = toFloat(-n); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("(-n).toFloat()") + end + + it "does not wrap an identifier first arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + n: Int64 = 5; + val = toFloat(n); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("n.toFloat()") + expect(out).not_to include("(n).toFloat()") + end + + it "does not wrap a method-chain first arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + xs: Int64[] = [1_i64, 2_i64]; + n = toFloat(xs.length()); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("xs.length().toFloat()") + expect(out).not_to include("(xs.length()).toFloat()") + end + + it "does not wrap a literal first arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + val = toFloat(42_i64); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("42_i64.toFloat()").or include("42.toFloat()") + end + + it "does not double-wrap an already-parenthesized first arg" do + src = <<~CLEAR + FN main() RETURNS Void -> + state: Int64 = 5; + val = toFloat((state + 1)); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("(state + 1).toFloat()") + expect(out).not_to include("((state + 1)).toFloat()") + end + end + + describe "skip stdlib functions whose lowering is FSM-based" do + # Found by FmtVerifier on benchmarks/concurrent/02_concurrent_search. + # `readFile` is `is_method: true` so the rewriter would turn + # `readFile(filepath)` into `filepath.readFile()`. But its + # lowering reads positional args via FsmOps templates + # (`fsm_setup` in std_lib.rb) and crashes with + # "FsmOps arg index 0 out of range (0 args)" once the first + # arg has moved into the receiver slot. Detection is structural: + # `suspends: true` AND any `fsm_*` template key. + + it "leaves readFile in prefix form (FSM-lowered)" do + src = <<~CLEAR + FN main() RETURNS Void -> + path = "data/foo.txt"; + content = readFile(path); + print(content); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("readFile(path)") + expect(out).not_to include("path.readFile()") + end + + it "still rewrites non-FSM is_method stdlib like length" do + # Sanity: only FSM-lowered entries are skipped, normal + # is_method ones still get UFCS-rewritten. + src = <<~CLEAR + FN main() RETURNS Void -> + xs: Int64[] = [1_i64, 2_i64]; + n = length(xs); + RETURN; + END + CLEAR + out = rw(src) + expect(out).to include("xs.length()") + end + end end diff --git a/spec/multi_statement_linter_spec.rb b/spec/multi_statement_linter_spec.rb new file mode 100644 index 000000000..00c461e3f --- /dev/null +++ b/spec/multi_statement_linter_spec.rb @@ -0,0 +1,95 @@ +require "rspec" +require_relative "../src/tools/multi_statement_linter" + +# `MultiStatementLinter.lint!` warns when a single source line contains +# more than one `;`-terminated statement. Surfaced via FixCollector +# during `clear fix`. No auto-fix because splitting `a; b; c;` into +# three lines requires layout judgement (indent, blank lines, comment +# placement) that fmt shouldn't make automatically. + +RSpec.describe "MultiStatementLinter.lint!" do + def lint(src) + FixCollector.enable! + MultiStatementLinter.lint!(src) + FixCollector.drain + ensure + FixCollector.disable! + end + + it "warns on three statements on one line" do + src = <<~CLEAR + FN main() RETURNS Void -> + lo_stk[sp] = lo; hi_stk[sp] = pi - 1; sp += 1; + RETURN; + END + CLEAR + findings = lint(src) + expect(findings.size).to eq(1) + expect(findings.first.level).to eq(:warning) + expect(findings.first.category).to eq(:lint) + expect(findings.first.message).to match(/multiple statements/i) + end + + it "warns on two statements on one line" do + src = <<~CLEAR + FN main() RETURNS Void -> + a = 1; b = 2; + RETURN; + END + CLEAR + expect(lint(src).size).to eq(1) + end + + it "does NOT warn on a single statement per line" do + src = <<~CLEAR + FN main() RETURNS Void -> + a = 1; + b = 2; + RETURN; + END + CLEAR + expect(lint(src)).to be_empty + end + + it "does NOT warn on `;` inside a struct decl on one line" do + # Trailing `;` inside `STRUCT { ... }` is field separator, not + # statement terminator — and depth tracking ignores them anyway + # because they're inside `{ }`. + src = <<~CLEAR + STRUCT P { x: Int64, y: Int64 } + FN main() RETURNS Void -> RETURN; END + CLEAR + expect(lint(src)).to be_empty + end + + it "is a no-op when FixCollector is disabled" do + src = <<~CLEAR + FN main() RETURNS Void -> + a = 1; b = 2; + RETURN; + END + CLEAR + expect { MultiStatementLinter.lint!(src) }.not_to raise_error + expect(FixCollector.enabled?).to be false + end + + it "ignores `;` inside string literals" do + src = <<~CLEAR + FN main() RETURNS Void -> + s = "a; b; c;"; + RETURN; + END + CLEAR + expect(lint(src)).to be_empty + end + + it "ignores `;` inside comments" do + src = <<~CLEAR + FN main() RETURNS Void -> + a = 1; # legacy: a; b; c; + RETURN; + END + CLEAR + expect(lint(src)).to be_empty + end +end diff --git a/src/annotator.rb b/src/annotator.rb index aa012a935..e2b60cc55 100644 --- a/src/annotator.rb +++ b/src/annotator.rb @@ -5706,6 +5706,16 @@ def finalize_scope(node, branch: nil) next unless info.mutable next unless info.read || (info.reg&.respond_to?(:var_used) && info.reg.var_used) next if info.reg&.respond_to?(:var_mutated) && info.reg.var_mutated + # Also skip when the binding was passed as a MUTABLE arg to a + # callee — the binding's contents get mutated through the + # call, so the receiving function's MUTABLE-param signature + # forces the caller to keep MUTABLE on the local. function_analysis + # marks `info.mutated` (entry-level) for this case but + # intentionally does NOT set `info.reg.var_mutated` (which + # drives the Zig-level var/const choice). Without this skip, + # `clear fmt` strips MUTABLE here and the next build fails + # the param's mutability check at the call site. + next if info.respond_to?(:mutated) && info.mutated emit_mutable_unused_finding!(info.reg, name) end diff --git a/src/tools/fmt_verifier.rb b/src/tools/fmt_verifier.rb new file mode 100644 index 000000000..cd76454ea --- /dev/null +++ b/src/tools/fmt_verifier.rb @@ -0,0 +1,142 @@ +# Fmt-verifier — confirms `Formatter.format` is semantics-preserving for a +# given CLEAR source by transpiling the original AND the formatted form to +# Zig and comparing the outputs byte-for-byte. +# +# Why: the formatter operates on tokens (with two source-level pre-passes +# in MethodRewriter and PredicateRewriter). The tests under spec/ exercise +# specific formatter rules but can't catch every pathological interaction. +# This tool gives us a much wider net: every benchmark / example / test +# .cht file becomes a "did fmt change emitted Zig?" sanity check. +# +# Usage: +# FmtVerifier.verify("benchmarks/sequential/01_call_overhead/bench.cht") +# # => Result(ok: true, ...) +# +# FmtVerifier.verify_dir("benchmarks/sequential") +# # => array of Results +# +# CLI: `clear fmt --verify ` wires through to this module. + +require_relative '../backends/transpiler' +require_relative '../backends/importer' +require_relative 'formatter' +require 'tempfile' + +module FmtVerifier + Result = Struct.new(:path, :ok, :error, :diff_excerpt) do + def status_label + return "OK" if ok + return "ERROR" if error + "DIFFERS" + end + end + + module_function + + # Verify a single .cht file. Returns a Result struct. + # + # source_dir: directory used by the importer to resolve REQUIRE paths. + # Defaults to the file's containing directory, which is what `clear` + # itself uses when transpiling that file directly. + def verify(cht_path, source_dir: nil) + abs_path = File.expand_path(cht_path) + source_dir ||= File.dirname(abs_path) + source = File.read(abs_path) + + before = transpile(source, source_dir) + formatted = Formatter.format(source) + after = transpile(formatted, source_dir) + + norm_before = normalize_for_compare(before) + norm_after = normalize_for_compare(after) + return Result.new(cht_path, true, nil, nil) if norm_before == norm_after + Result.new(cht_path, false, nil, diff_excerpt(norm_before, norm_after)) + rescue => e + Result.new(cht_path, false, "#{e.class}: #{e.message}", nil) + end + + # Strip irrelevant differences from the emitted Zig before comparing: + # + # - `// CLR:N` line-number markers — debug metadata mapping a Zig + # statement back to the originating CLEAR source line. fmt + # intentionally rearranges source (one-liner expansion, blank-line + # normalization) so these line numbers shift; the actual code + # doesn't change. + # + # - `// CLEAR_PROFILE_TASK_SITE ...` — task-site profiling metadata + # that embeds line/column coordinates. Same shift, same non- + # semantic delta. + # + # Rule of thumb: any comment line whose only purpose is "remember + # where the emitter was when it wrote this," normalize away. + def normalize_for_compare(zig_source) + zig_source + .gsub(%r{^\s*// CLR:\d+\n}, '') + .gsub(%r{^\s*// CLEAR_PROFILE_TASK_SITE\b[^\n]*\n}, '') + # Lowering-emitted guard / temp identifiers carry a numeric ID + # that depends on AST node positions. Equivalent CLEAR programs + # whose AST shifted under fmt (predicate canonicalization, + # MUTABLE drop, etc) emit the same Zig logic with different IDs. + # Normalize the IDs to a placeholder so the byte-compare tracks + # semantics, not the lowerer's internal counter. + .gsub(/__([A-Za-z]\w*?)_(\d+)(_\d+)?\b/) { "__#{$1}_N#{$3}" } + end + + # Verify every .cht file under `dir` (recursive). Useful for sweeping + # large corpora like benchmarks/ or examples/. Returns an Array of + # Results in path order. + def verify_dir(dir) + paths = Dir.glob(File.join(dir, '**', '*.cht')).sort + paths.map { |p| verify(p) } + end + + # Print a one-line summary per result and a totals footer. + # Returns the count of non-OK results so callers can use it as an + # exit code: zero on clean, positive on any failure. + def report(results, io: $stdout) + fail_count = 0 + results.each do |r| + label = r.ok ? "\e[32mOK\e[0m" : "\e[31m#{r.status_label}\e[0m" + io.puts " #{label} #{r.path}" + next if r.ok + fail_count += 1 + if r.error + io.puts " \e[33m#{r.error}\e[0m" + elsif r.diff_excerpt + r.diff_excerpt.each_line { |ln| io.puts " #{ln.chomp}" } + end + end + io.puts "" + total = results.length + if fail_count.zero? + io.puts " \e[32m#{total} passed, 0 failed.\e[0m" + else + io.puts " \e[31m#{total - fail_count} passed, #{fail_count} failed.\e[0m" + end + fail_count + end + + # ---- internals ---- + + def transpile(cheat_code, source_dir) + importer = ModuleImporter.new(base_dir: source_dir, use_mir: true) + ZigTranspiler.new(importer: importer, source_dir: source_dir).transpile(cheat_code) + end + + # Use shell `diff -u` for a familiar unified diff. Truncates to the + # first ~40 lines of context — enough to see what shifted, not so + # much that a sweep of N files spams the terminal. + def diff_excerpt(before, after, max_lines: 40) + Tempfile.create('before') do |bf| + Tempfile.create('after') do |af| + bf.write(before); bf.flush + af.write(after); af.flush + out = `diff -u #{bf.path} #{af.path} 2>&1` + lines = out.lines + excerpt = lines.first(max_lines).join + excerpt += " ... (#{lines.length - max_lines} more lines)\n" if lines.length > max_lines + excerpt + end + end + end +end diff --git a/src/tools/formatter.rb b/src/tools/formatter.rb index 9e30c7c87..1ae2ab7d8 100644 --- a/src/tools/formatter.rb +++ b/src/tools/formatter.rb @@ -41,6 +41,7 @@ require_relative '../ast/parser' require_relative 'method_rewriter' require_relative 'predicate_rewriter' +require_relative 'lint_fix_rewriter' require 'strscan' require 'set' @@ -71,7 +72,7 @@ class Error < StandardError; end .. ..< ..<= ..= %* %+ %- !* !+ !- ].to_set.freeze - OPEN_TERMINAL = %w[-> { THEN DO].freeze + OPEN_TERMINAL = %w[-> { THEN DO START].freeze CLOSE_LEADING = %w[END }].freeze OUTDENT_LEADING = %w[ELSE ELSE_IF CATCH DEFAULT].freeze BLANK_BEFORE = %w[CATCH DEFAULT].freeze @@ -93,11 +94,16 @@ def initialize(source) def format validate_parse! + # Lint-fix pre-pass: drop unused MUTABLE keywords and redundant + # `: Type` annotations. Runs first so subsequent rewriters see + # the cleanest source. Falls back to the original on annotation + # failure (fmt must format files with errors). + rewritten = LintFixRewriter.rewrite(@source) # Predicate canonicalization runs before METHOD-UFCS rewriting: # `x == NIL` -> `x.nil?()` may produce a new prefix call site # that MethodRewriter then converts to UFCS form. Doing them in # the reverse order would miss the second pass. - rewritten = PredicateRewriter.rewrite(@source) + rewritten = PredicateRewriter.rewrite(rewritten) rewritten = MethodRewriter.rewrite(rewritten) tokens = FormatLexer.new(rewritten).tokenize Emitter.new(tokens).emit @@ -262,6 +268,7 @@ def emit toks = @tokens.reject { |t| t.type == :WS } toks = collapse_newlines(toks) toks = canonicalize_numerics(toks) + toks = expand_match_blocks(toks) toks = expand_fn_blocks(toks) toks = expand_then_do_blocks(toks) toks = expand_with_blocks(toks) @@ -271,10 +278,40 @@ def emit toks = expand_bg_do_blocks(toks) toks = expand_record_types(toks) toks = expand_call_args(toks) + toks = nl_after_end(toks) toks = collapse_newlines(toks) render(toks) end + # `END` always opens a new line. Whatever follows (a continuation + # statement, an outer END, an `;`-less return-expression) belongs on + # its own line at the corresponding indent. Without this pass, an + # inner-IF inside a MATCH arm body emitted `END RETURN "false";` + # because no walker forced a break after END. (Repro: + # examples/json_parser/jsonToString JBool arm.) Excludes the case + # where END is followed by a close-bracket — `BG { ... END }` keeps + # the `}` on the next render line via CLOSE_LEADING anyway. + def nl_after_end(toks) + out = [] + i = 0 + while i < toks.length + t = toks[i] + out << t + if t.type == :KEYWORD && t.raw == 'END' + k = i + 1 + k += 1 while k < toks.length && [:COMMENT].include?(toks[k].type) + if k < toks.length + nxt = toks[k] + should_nl = !(nxt.type == :NL || + (nxt.type == :SYM && [')', ']', '}', ';'].include?(nxt.raw))) + insert_nl(out) if should_nl + end + end + i += 1 + end + out + end + private # ---- numeric literal canonicalization (§8) ---------------------------- @@ -361,6 +398,284 @@ def collapse_newlines(toks) out end + # ---- MATCH layout (§3.7) --------------------------------------------- + # + # `[PARTIAL] MATCH expr START arm, arm, ..., DEFAULT -> stmt; END` + # — expand to multi-line form with arms one-per-line at MATCH+1 indent. + # Multi-statement arms (multiple `;` after `->`) put the body on its + # own lines at MATCH+2 indent. The body lifts back to arm-depth via a + # phantom INDENT_CLOSE before the trailing `,` (or before END for the + # last arm). DEFAULT in arm position is wrapped in INDENT_OPEN/CLOSE + # phantoms so its OUTDENT_LEADING render rule (used for CATCH/DEFAULT + # in TRY/CATCH) is canceled — in MATCH it is just an arm pattern at + # arm-depth. + def expand_match_blocks(toks) + out = [] + i = 0 + while i < toks.length + t = toks[i] + if t.type == :KEYWORD && t.raw == 'START' && match_block_start?(toks, i) + end_idx = find_match_block_end(toks, i) + if end_idx + i = emit_match_block(out, toks, i, end_idx) + next + end + end + out << t + i += 1 + end + out + end + + # True when the `START` keyword at `idx` belongs to a `MATCH ... START` + # construct (not `SYNC POLICY START`). Walks back at depth 0 and looks + # for `MATCH` before any statement boundary; returns false if it sees + # `POLICY` first. + def match_block_start?(toks, idx) + depth = 0 + j = idx - 1 + while j >= 0 + t = toks[j] + if t.type == :KEYWORD + return true if t.raw == 'MATCH' + return false if t.raw == 'POLICY' + return false if %w[FN END THEN DO ELSE ELSE_IF CATCH].include?(t.raw) + end + if t.type == :SYM + case t.raw + when ')', ']', '}' then depth += 1 + when '(', '[', '{' + depth -= 1 + return false if depth < 0 + when ';' + return false if depth.zero? + end + end + j -= 1 + end + false + end + + # Find the END that closes the MATCH block whose START is at `start_idx`. + # Tracks nested keyword blocks (anything that opens a matching END) and + # bracket depth so a stray `END` inside a nested IF/FOR isn't mistaken + # for the closer. + def find_match_block_end(toks, start_idx) + bdepth = 0 + kdepth = 0 + j = start_idx + 1 + while j < toks.length + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + end + elsif bdepth.zero? && t.type == :KEYWORD + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN', 'START' + kdepth += 1 + when 'END' + return j if kdepth.zero? + kdepth -= 1 + end + end + j += 1 + end + nil + end + + # Lay out a MATCH block from `start_idx` (`START`) to `end_idx` (`END`). + def emit_match_block(out, toks, start_idx, end_idx) + out << toks[start_idx] + insert_nl(out) + + arms = scan_match_arms(toks, start_idx + 1, end_idx) + arms.each do |arm| + emit_match_arm(out, toks, arm) + end + + out << toks[end_idx] + end_idx + 1 + end + + # Returns a list of arm hashes: { start:, end:, arrow:, sep:, multi: }. + # `start..end` covers the arm tokens (excluding the trailing separator). + # `sep` is the index of the separating `,` (or nil for the last arm). + # `arrow` is the index of the arm's top-level `->` (or nil for bare + # arms like `DEFAULT` followed by an unparenthesized expression). + # `multi` is true if the arm body has more than one statement or + # contains a nested keyword block (IF/FOR/WHILE/...). + def scan_match_arms(toks, start, stop) + arms = [] + arm_start = skip_nls(toks, start) + return arms if arm_start >= stop + + bdepth = 0 + kdepth = 0 + arrow_idx = nil + j = arm_start + while j < stop + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + when ',' + if bdepth.zero? && kdepth.zero? + arms << build_match_arm(toks, arm_start, j, arrow_idx, j) + arm_start = skip_nls(toks, j + 1) + j = arm_start + arrow_idx = nil + next + end + end + elsif t.type == :OP && t.raw == '->' && bdepth.zero? && kdepth.zero? + arrow_idx ||= j + elsif t.type == :KEYWORD && bdepth.zero? + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN', 'START' + kdepth += 1 + when 'END' + kdepth -= 1 if kdepth.positive? + end + end + j += 1 + end + arms << build_match_arm(toks, arm_start, stop, arrow_idx, nil) if arm_start < stop + arms + end + + def build_match_arm(toks, s, e, arrow, sep) + body_end = sep || e + semi_count = 0 + has_block = false + bdepth = 0 + kdepth = 0 + if arrow + ((arrow + 1)...body_end).each do |k| + t = toks[k] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + when ';' + semi_count += 1 if bdepth.zero? && kdepth.zero? + end + elsif t.type == :KEYWORD && bdepth.zero? + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN', 'START' + has_block = true if kdepth.zero? + kdepth += 1 + when 'END' + kdepth -= 1 if kdepth.positive? + end + end + end + end + multi = semi_count > 1 || has_block + { start: s, end: e, body_end: body_end, arrow: arrow, sep: sep, multi: multi } + end + + # Emit one arm: pattern, `->`, body, separator. Wraps DEFAULT in + # INDENT_OPEN/CLOSE phantoms to neutralize its OUTDENT_LEADING render + # rule. Multi-line arms emit body on its own lines at +1 indent and + # close the indent before the separator. + def emit_match_arm(out, toks, arm) + s, e, body_end, arrow, sep, multi = + arm[:start], arm[:end], arm[:body_end], arm[:arrow], arm[:sep], arm[:multi] + + leader = first_code_at(toks, s, body_end) + cancel_outdent = leader && leader.type == :KEYWORD && + OUTDENT_LEADING.include?(leader.raw) + + # KEEP_INDENT on the leader line tells render to skip its + # OUTDENT_LEADING outdent — DEFAULT in MATCH is an arm pattern, not + # a CATCH/TRY-style outdent. + out << phantom(:KEEP_INDENT) if cancel_outdent + + if arrow && multi + copy_arm_tokens(out, toks, s, arrow + 1) + insert_nl(out) + emit_match_body(out, toks, arrow + 1, body_end) + # Drop trailing NL so the `,` (or INDENT_CLOSE) sits on the same + # line as the body's last `;`. Without this, the comma orphan- + # lands on its own line. + out.pop while out.last && out.last.type == :NL + out << phantom(:INDENT_CLOSE) + out << toks[sep] if sep + else + copy_arm_tokens(out, toks, s, body_end) + out << toks[sep] if sep + end + + insert_nl(out) + end + + def first_code_at(toks, s, e) + j = s + while j < e + t = toks[j] + return t unless [:NL, :COMMENT, :WS, :INDENT_OPEN, :INDENT_CLOSE].include?(t.type) + j += 1 + end + nil + end + + def copy_arm_tokens(out, toks, s, e) + (s...e).each do |k| + t = toks[k] + next if t.type == :NL + out << t + end + end + + # Emit a multi-statement arm body at +1 indent, splitting at `;` at + # depth 0. The arm-separator `,` (or the closing END for the last + # arm) is emitted by the caller, so we never look past `e` here. + # Preserves source NLs inside nested blocks so expand_then_do_blocks + # still sees the user's multi-line shape; collapses redundant NLs at + # arm-body top-level since `;` already inserts one. + def emit_match_body(out, toks, s, e) + bdepth = 0 + kdepth = 0 + j = skip_nls(toks, s) + while j < e + t = toks[j] + if t.type == :NL + # Collapse adjacent NLs but keep one. This preserves user line + # breaks (between END of a nested block and the next statement) + # without producing the orphan blank lines that arise from the + # source-NL after a `;` we just NL-terminated ourselves. + out << t unless out.last && out.last.type == :NL + j += 1 + next + end + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1; out << t; j += 1; next + when ')', ']', '}' then bdepth -= 1; out << t; j += 1; next + when ';' + if bdepth.zero? && kdepth.zero? + out << t + j += 1 + insert_nl(out) + next + end + end + elsif t.type == :KEYWORD && bdepth.zero? + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN', 'START' + kdepth += 1 + when 'END' + kdepth -= 1 if kdepth.positive? + end + end + out << t + j += 1 + end + end + # For each top-level `FN ... -> ... END` (or any nested FN), ensure that # the body is multi-line: a newline follows `->` and precedes `END`, and # statements in between are split on `;` boundaries. @@ -420,7 +735,7 @@ def emit_fn_block(out, toks, start) # actually closes with END. Brace-terminated blocks (WITH/MATCH/ # STRUCT/UNION/ENUM/BG) are handled by the `{`/`}` branches below. # Filter WITH (`CATCH Input WITH(...)`) is not a block opener. - if tj.type == :KEYWORD && %w[FN IF WHILE FOR TEST WHEN].include?(tj.raw) + if tj.type == :KEYWORD && %w[FN IF WHILE FOR TEST WHEN START].include?(tj.raw) if tj.raw == 'FN' j = emit_fn_block(out, toks, j) next @@ -453,10 +768,7 @@ def emit_fn_block(out, toks, start) # skip exactly one following source NL (it's redundant with the one # I just inserted). Additional source NLs pass through as blank lines. if tj.type == :SYM && tj.raw == ';' && depth == 0 - out << tj - j += 1 - insert_nl(out) - j += 1 if j < toks.length && toks[j].type == :NL + j = emit_stmt_terminator(out, toks, j) next end # ELSE / ELSE_IF / CATCH / DEFAULT at this depth: ensure they start @@ -712,17 +1024,22 @@ def find_fn_arrow(toks, fn_idx) nil end - # Expand one-liner IF / WHILE / FOR blocks that use THEN or DO...END. - # Detects blocks where no newline appears between the opening keyword and - # the matching END, and expands them so the body is on its own line(s). - # Multi-line forms are left untouched. + # Expand IF / WHILE / FOR blocks where any branch has its body inline + # with the THEN/DO/ELSE keyword. Two triggers: + # 1. The whole construct fits on a single source line (`one_liner_end` + # returns non-nil). + # 2. At least one branch's THEN/DO/ELSE is followed by inline body + # tokens (multi-line IF/ELSE_IF chain where each branch is a + # one-liner — repro from examples/json_parser/parseString). + # Already-multi-line forms (every branch already has its body on its + # own line) are left untouched. def expand_then_do_blocks(toks) out = [] i = 0 while i < toks.length t = toks[i] if t.type == :KEYWORD && %w[IF WHILE FOR].include?(t.raw) - end_idx = one_liner_end(toks, i) + end_idx = one_liner_end(toks, i) || branch_end_for_inline_expansion(toks, i) if end_idx i = expand_if_while_for(out, toks, i, end_idx) else @@ -737,6 +1054,71 @@ def expand_then_do_blocks(toks) out end + # Returns the index of the matching END for the IF/WHILE/FOR at `start` + # iff at least one of its branches (THEN / ELSE / ELSE_IF / DO) is + # followed inline (no NL) by body code rather than NL, END, ELSE, + # or ELSE_IF. Otherwise nil. + def branch_end_for_inline_expansion(toks, start) + end_idx = matching_end(toks, start) + return nil unless end_idx + bdepth = 0 + kdepth = 0 + j = start + 1 + while j < end_idx + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + end + elsif t.type == :KEYWORD && bdepth.zero? + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN' + kdepth += 1 + when 'END' + kdepth -= 1 if kdepth.positive? + when 'THEN', 'DO', 'ELSE' + if kdepth.zero? + k = j + 1 + k += 1 while k < end_idx && toks[k].type == :COMMENT + return end_idx if k < end_idx && toks[k].type != :NL && + !(toks[k].type == :KEYWORD && + %w[END ELSE ELSE_IF].include?(toks[k].raw)) + end + end + end + j += 1 + end + nil + end + + # Find matching END for the IF/WHILE/FOR at `start` (no NL constraint). + # Returns nil if unmatched. + def matching_end(toks, start) + bdepth = 0 + kdepth = 0 + j = start + 1 + while j < toks.length + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + end + elsif bdepth.zero? && t.type == :KEYWORD + case t.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN' + kdepth += 1 + when 'END' + return j if kdepth.zero? + kdepth -= 1 + end + end + j += 1 + end + nil + end + # Returns the index of the matching END if and only if no :NL appears # anywhere between `start` and that END (i.e., the whole construct is # a single source line). Otherwise nil. @@ -794,8 +1176,19 @@ def expand_if_while_for(out, toks, start, end_idx) insert_nl(out) # Walk body tokens. Split `;` boundaries; outdent ELSE/ELSE_IF. + # Track BOTH bracket depth (`(`, `[`, `{`) AND keyword-block depth + # so that a `;` belonging to an inner one-liner block + # (`IF cond THEN a; b; END`) doesn't get torn into separate lines. + # block_depth tracks IF/WHILE/FOR/TEST/WHEN — the constructs that + # OPEN a new keyword block. THEN/DO/ELSE_IF do NOT bump it: they're + # mid-block markers in the SAME enclosing IF, and treating them as + # block-openers breaks multi-branch IF chains where each ELSE_IF's + # own THEN would falsely nest. (Repro: examples/json_parser + # parseString, where the IF/ELSE_IF chain staggered.) depth = 0 + block_depth = 0 j = term_idx + 1 + j += 1 while j < end_idx && toks[j].type == :NL body_start = out.length while j < end_idx tj = toks[j] @@ -806,16 +1199,57 @@ def expand_if_while_for(out, toks, start, end_idx) if tj.type == :SYM && tj.raw == '{' then depth += 1; out << tj; j += 1; next end if tj.type == :SYM && tj.raw == '}' then depth -= 1; out << tj; j += 1; next end - if tj.type == :SYM && tj.raw == ';' && depth == 0 - out << tj + if tj.type == :KEYWORD + case tj.raw + when 'IF', 'WHILE', 'FOR', 'TEST', 'WHEN', 'FN' then block_depth += 1 + when 'END' then block_depth -= 1 if block_depth > 0 + end + end + + if tj.type == :NL + # Collapse adjacent NLs but preserve user-written line breaks + # inside nested blocks so further passes still see structure. + out << tj unless out.last && out.last.type == :NL j += 1 - insert_nl(out) next end - if tj.type == :KEYWORD && %w[ELSE ELSE_IF].include?(tj.raw) && depth == 0 + + if tj.type == :SYM && tj.raw == ';' && depth.zero? && block_depth.zero? + j = emit_stmt_terminator(out, toks, j) + next + end + if tj.type == :KEYWORD && %w[ELSE ELSE_IF].include?(tj.raw) && depth.zero? && block_depth.zero? insert_nl(out) out << tj j += 1 + # `ELSE` with an inline body (`ELSE x = 0;`) — push body to its + # own line. ELSE_IF's body comes after THEN, handled below. + if tj.raw == 'ELSE' + k = j + k += 1 while k < end_idx && toks[k].type == :COMMENT + if k < end_idx && toks[k].type != :NL && + !(toks[k].type == :KEYWORD && %w[END ELSE ELSE_IF].include?(toks[k].raw)) + insert_nl(out) + end + end + next + end + # THEN/DO with an inline body (`IF c THEN s;` continuation in a + # multi-line chain) — push body onto its own line so the indent + # ladder doesn't stagger. (Repro: examples/json_parser + # parseString, where each `ELSE_IF cond THEN stmt;` was a one- + # liner in a multi-line chain, and the missing NL collapsed the + # whole ladder against the IF column.) + if tj.type == :KEYWORD && %w[THEN DO].include?(tj.raw) && + depth.zero? && block_depth.zero? + out << tj + j += 1 + k = j + k += 1 while k < end_idx && toks[k].type == :COMMENT + if k < end_idx && toks[k].type != :NL && + !(toks[k].type == :KEYWORD && %w[END ELSE ELSE_IF].include?(toks[k].raw)) + insert_nl(out) + end next end out << tj @@ -1159,9 +1593,26 @@ def expand_method_chains(toks) out << phantom(:INDENT_OPEN) segments.each_with_index do |seg, k| insert_nl(out) if k > 0 + # Strip NLs that are between segment tokens at the chain's + # top level — those are line breaks the user put between + # `.foo()` and `.bar()` that the renderer is about to + # supply itself. NLs nested inside the segment's argument + # list (`.foo(BG { @parallel -> ...\n... })`) must be + # preserved: they belong to the argument's own multi-line + # layout, not to the chain. Without this depth guard, the + # whole BG body collapsed onto one line. + # (Repro: benchmarks/concurrent/14_nested_lock pre-fix.) + seg_depth = 0 (seg[:start]...seg[:end]).each do |j| - next if toks[j].type == :NL - out << toks[j] + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then seg_depth += 1 + when ')', ']', '}' then seg_depth -= 1 + end + end + next if t.type == :NL && seg_depth.zero? + out << t end end out << phantom(:INDENT_CLOSE) @@ -1403,7 +1854,7 @@ def expand_bg_do_blocks(toks) brace_idx = find_bg_brace(toks, i) if brace_idx close_idx = find_matching_close_brace(toks, brace_idx) - if close_idx && count_statements_in_block(toks, brace_idx, close_idx) >= 2 + if close_idx && bg_body_needs_wrap?(toks, brace_idx, close_idx) i = emit_bg_do_wrapped(out, toks, i, brace_idx, close_idx) next end @@ -1415,6 +1866,35 @@ def expand_bg_do_blocks(toks) out end + # A BG/DO body needs the multi-line wrap when it has 2+ statements + # at the top level OR when its single statement opens a nested block + # (`FOR ... DO ... END`, `IF ... THEN ... END`, etc). Without the + # second condition, a body like `BG { @parallel -> FOR ... DO ... END }` + # was being skipped by the wrap pass and ended up collapsed onto a + # single 600-char line by `collapse_newlines`. The DO/THEN check + # specifically targets that case while leaving short trivial bodies + # like `BG { @micro -> doWork(); }` inline. + def bg_body_needs_wrap?(toks, brace_idx, close_idx) + return true if count_statements_in_block(toks, brace_idx, close_idx) >= 2 + body_has_top_level_block?(toks, brace_idx, close_idx) + end + + def body_has_top_level_block?(toks, brace_idx, close_idx) + depth = 0 + (brace_idx + 1 ... close_idx).each do |j| + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then depth += 1 + when ')', ']', '}' then depth -= 1 + end + elsif t.type == :KEYWORD && depth.zero? && %w[DO THEN].include?(t.raw) + return true + end + end + false + end + def find_bg_brace(toks, start) j = start + 1 while j < toks.length && [:NL, :COMMENT].include?(toks[j].type) @@ -1425,9 +1905,25 @@ def find_bg_brace(toks, start) end def count_statements_in_block(toks, open_brace, close_brace) + # Counts top-level `;` inside the BG/DO `{ ... }` body to decide + # whether it's a one-liner or needs the multi-line wrap. + # + # Treats `DO`/`THEN ... END` as nested (anything inside them is a + # single sub-statement at the BG level). Also treats a single + # top-level statement that opens a DO/THEN block — e.g. + # `FOR i IN ... DO ... END` or `IF cond THEN ... END` — as one + # statement: count it once even though it has no terminating `;`. + # Without this, BGs like + # `BG { @parallel -> FOR i IN ... DO a; b; END }` (single FOR at + # the BG level, multiple `;` inside its DO body) used to mis-count + # as multi-statement and trigger the wrap, which then collapsed + # the FOR's DO body into a 300-char mess. + # (Repro: benchmarks/concurrent/14_nested_lock pre-fix.) depth = 0 + block_depth = 0 count = 0 has_tokens = false + saw_block_at_top = false (open_brace + 1 ... close_brace).each do |j| t = toks[j] if t.type == :SYM @@ -1435,12 +1931,21 @@ def count_statements_in_block(toks, open_brace, close_brace) when '(', '[', '{' then depth += 1 when ')', ']', '}' then depth -= 1 when ';' - if depth == 0 + if depth.zero? && block_depth.zero? count += 1 if has_tokens has_tokens = false + saw_block_at_top = false next end end + elsif t.type == :KEYWORD + case t.raw + when 'DO', 'THEN' + saw_block_at_top = true if depth.zero? && block_depth.zero? + block_depth += 1 + when 'END' + block_depth -= 1 if block_depth > 0 + end end next if [:NL, :COMMENT].include?(t.type) has_tokens = true @@ -1457,7 +1962,16 @@ def emit_bg_do_wrapped(out, toks, kw_idx, brace_idx, close_idx) end insert_nl(out) + # `BG { @parallel -> body }` — the leading `@xxx ->` is a strategy + # tag whose `->` opens body indent (OPEN_TERMINAL on `->`) but has + # no matching END inside the BG body; the `}` only closes the `{`. + # Without compensation the body's depth never unwinds and every + # statement after the BG ends up one column too deep. Detect that + # shape and emit a balancing INDENT_CLOSE before `}`. + needs_arrow_balance = bg_body_has_strategy_arrow?(toks, brace_idx, close_idx) + depth = 0 + block_depth = 0 j = brace_idx + 1 j = skip_nls(toks, j) body_start = out.length @@ -1468,14 +1982,22 @@ def emit_bg_do_wrapped(out, toks, kw_idx, brace_idx, close_idx) when '(', '[', '{' then depth += 1; out << t; j += 1; next when ')', ']', '}' then depth -= 1; out << t; j += 1; next when ';' - if depth == 0 - out << t - j += 1 - insert_nl(out) - j += 1 if j < toks.length && toks[j].type == :NL + # Insert NL on `;` only at the BG-level top — inside a + # `DO ... END` or `THEN ... END` block, leave the `;` to be + # handled by the inner block's own expansion (expand_then_do_blocks + # runs earlier in the pipeline). Without this guard, the FOR + # body in `BG { @parallel -> FOR i IN ... DO a; b; END }` + # would be torn apart. + if depth.zero? && block_depth.zero? + j = emit_stmt_terminator(out, toks, j) next end end + elsif t.type == :KEYWORD + case t.raw + when 'DO', 'THEN' then block_depth += 1 + when 'END' then block_depth -= 1 if block_depth > 0 + end end out << t j += 1 @@ -1483,12 +2005,40 @@ def emit_bg_do_wrapped(out, toks, kw_idx, brace_idx, close_idx) if body_start < out.length out.pop while out.last && out.last.type == :NL && out.length > body_start + out << phantom(:INDENT_CLOSE) if needs_arrow_balance insert_nl(out) end out << toks[close_idx] close_idx + 1 end + # True if the BG/DO body opens with a strategy-tag arrow at top + # level (`@parallel ->`, `@micro ->`, ...). The `->` raises render + # depth via OPEN_TERMINAL but the body has no END to lower it back, + # so we need an explicit INDENT_CLOSE before `}`. + def bg_body_has_strategy_arrow?(toks, brace_idx, close_idx) + bdepth = 0 + j = brace_idx + 1 + j += 1 while j < close_idx && [:NL, :COMMENT].include?(toks[j].type) + while j < close_idx + t = toks[j] + if t.type == :SYM + case t.raw + when '(', '[', '{' then bdepth += 1 + when ')', ']', '}' then bdepth -= 1 + end + elsif t.type == :OP && t.raw == '->' && bdepth.zero? + return true + elsif t.type == :KEYWORD && bdepth.zero? && + %w[FN IF WHILE FOR TEST WHEN START].include?(t.raw) + return false + end + return false if t.type == :SYM && t.raw == ';' && bdepth.zero? + j += 1 + end + false + end + # ---- Pipeline forced wraps (§3.4, §3.7) -------------------------------- # # When a pipeline chain has 2+ `|>` stages (at the same expression depth, @@ -1727,6 +2277,26 @@ def insert_nl(out) out << Formatter::FormatLexer::Token.new(:NL, "\n", 0, 0) end + # Emit `;` at index `j` as a statement terminator, then a newline. + # MATCH arms have the shape `Pat -> stmt;,` where the `,` is the + # arm separator — keep it on the same line as `;`. Without this, + # `;` forced a NL and the orphan `,` landed on its own line. + # Also handles the idempotent case where prior formatting already + # left a NL between `;` and `,`. + def emit_stmt_terminator(out, toks, j) + out << toks[j] # `;` + j += 1 + k = j + k += 1 while k < toks.length && toks[k].type == :NL + if k < toks.length && toks[k].type == :SYM && toks[k].raw == ',' + out << toks[k] + j = k + 1 + end + insert_nl(out) + j += 1 if j < toks.length && toks[j].type == :NL + j + end + # ---- rendering ------------------------------------------------------ # Walks the transformed token stream and produces the final string. @@ -1742,6 +2312,7 @@ def render(toks) out = +"" lines.each do |raw_line| half_indent = raw_line.any? { |t| t.type == :HALF_INDENT } + keep_indent = raw_line.any? { |t| t.type == :KEEP_INDENT } pre_delta, post_delta, line = split_indent_markers(raw_line) depth = [depth + pre_delta, 0].max @@ -1759,7 +2330,7 @@ def render(toks) if first && CLOSE_LEADING.include?(first.raw) depth = [depth - 1, 0].max line_depth = depth - elsif first && OUTDENT_LEADING.include?(first.raw) + elsif !keep_indent && first && OUTDENT_LEADING.include?(first.raw) depth = [depth - 1, 0].max line_depth = depth outdent_leading = true @@ -1801,6 +2372,8 @@ def split_indent_markers(line) seen_code ? post -= 1 : pre -= 1 elsif t.type == :HALF_INDENT # Marker only; consumed by render's half_indent flag. + elsif t.type == :KEEP_INDENT + # Marker only; consumed by render's keep_indent flag. else filtered << t seen_code = true if t.type != :COMMENT @@ -1855,6 +2428,8 @@ def drop_trailing_blanks(lines) # Emit a line's tokens with canonical intra-line spacing. # Comments get 2-space prefix if inline, 1 space after `#`. def format_line_body(line) + @generic_bracket_indices = compute_generic_bracket_indices(line) + @struct_lit_brace_indices = compute_struct_lit_brace_indices(line) buf = +"" prev = nil # previous emitted *code* token line.each_with_index do |t, idx| @@ -1879,14 +2454,141 @@ def format_line_body(line) buf end - # Normalize a `#...` comment: exactly one space after `#`, trailing - # whitespace stripped. `#` alone (empty comment) stays `#`. + # Pre-pass: identify which `<` / `>` tokens on this line are + # generic-type brackets (rather than comparison operators) so + # `needs_space?` can attach them tightly. + # + # A `<` is a generic open when: + # - the previous code token is a TYPE_ID (`Foo<...>`), AND + # - a matching `>` exists later on the line, AND + # - the span between them contains only TYPE_IDs, `,`, sigils, + # and nested `<>` pairs (no `(` `)` `[` `]` `{` `}` `=` etc). + # + # Returns a Set of token indices that are generic brackets (both + # opens and closes). + def compute_generic_bracket_indices(line) + set = Set.new + line.each_with_index do |t, i| + next unless t.type == :SYM && t.raw == '<' + prev = preceding_code_token(line, i) + next unless prev && prev.type == :TYPE_ID + close_idx = find_generic_close_idx(line, i + 1) + next unless close_idx + set << i + set << close_idx + end + set + end + + # Walk forward from `start_idx` looking for the `>` that closes a + # generic span opened just before. Tracks nested `<>` depth. Returns + # the close index, or nil if anything that disqualifies the span as + # a generic appears (call/index/struct-lit brackets, `=`, etc). + def find_generic_close_idx(line, start_idx) + depth = 1 + i = start_idx + while i < line.length + t = line[i] + if t.type == :SYM + case t.raw + when '<' then depth += 1 + when '>' + depth -= 1 + return i if depth.zero? + when '(', ')', '[', ']', '{', '}' + return nil + when '=' + return nil + end + end + i += 1 + end + nil + end + + def preceding_code_token(line, idx) + j = idx - 1 + while j >= 0 + t = line[j] + return t unless [:COMMENT, :INDENT_OPEN, :INDENT_CLOSE].include?(t.type) + j -= 1 + end + nil + end + + # True when `line` contains a STRUCT / UNION / ENUM keyword at any + # depth before token index `idx`. Used to disambiguate struct-body + # `{` (`STRUCT Foo {`) from struct-literal `{` (`Foo{ ... }`). + def line_has_struct_decl_keyword?(line, idx) + line.first(idx).any? do |t| + t.type == :KEYWORD && %w[STRUCT UNION ENUM].include?(t.raw) + end + end + + # Pre-pass: identify which `{` / `}` token indices on this line + # belong to a struct literal (`Foo{ field: v }`), as opposed to a + # block scope, hash literal, or struct-decl body. + # + # A `{` is a struct-literal open when: + # - the previous code token is a TYPE_ID, AND + # - the line does NOT introduce a STRUCT / UNION / ENUM + # declaration (in which case `{` is the body open). + # + # The matching `}` is found by simple `{` / `}` brace counting. + def compute_struct_lit_brace_indices(line) + set = Set.new + line.each_with_index do |t, i| + next unless t.type == :SYM && t.raw == '{' + prev = preceding_code_token(line, i) + next unless prev && prev.type == :TYPE_ID + next if line_has_struct_decl_keyword?(line, i) + close_idx = find_matching_brace(line, i + 1) + next unless close_idx + set << i + set << close_idx + end + set + end + + def find_matching_brace(line, start_idx) + depth = 1 + i = start_idx + while i < line.length + t = line[i] + if t.type == :SYM + case t.raw + when '{' then depth += 1 + when '}' + depth -= 1 + return i if depth.zero? + end + end + i += 1 + end + nil + end + + # Normalize a `#...` comment: ensure AT LEAST one space after `#`, + # preserving additional spaces the user wrote. Trailing whitespace + # stripped. `#` alone stays `#`. + # + # Why preserve extra leading spaces? ASCII tables, indented prose, + # and code samples in comment bodies use leading whitespace as + # layout. Collapsing `# row` to `# row` destroys alignment in + # tables like: + # + # # COL_A | COL_B + # # row1 | x + # # row2 | y + # + # User-typed indent is meaningful; only synthesize the missing + # first space when the user wrote `#text` with no separator. def canonicalize_comment(raw) body = raw[1..].to_s body = body.rstrip return '#' if body.empty? - body = body.sub(/\A\s+/, '') - "# #{body}" + return "# #{body}" unless body.start_with?(' ', "\t") + "##{body}" end # Spacing decision between two adjacent code tokens A (prev) and B (cur). @@ -1899,6 +2601,22 @@ def needs_space?(a, b, line, b_idx) return false if type_like_prev && in_type_context?(line, b_idx - 1) end + # Struct-literal brace padding override (`Foo{ field: v }`): + # add space after the `{` and before the matching `}`, EXCEPT + # when they're the empty pair `Foo{}`. Sits before the generic + # "no space inside brackets" rule below so the override wins. + if @struct_lit_brace_indices && !@struct_lit_brace_indices.empty? + a_idx = b_idx - 1 + a_is_struct_open = a_idx >= 0 && + @struct_lit_brace_indices.include?(a_idx) && + line[a_idx]&.raw == '{' + b_is_struct_close = @struct_lit_brace_indices.include?(b_idx) && + b.type == :SYM && b.raw == '}' + empty_pair = a_is_struct_open && b_is_struct_close + return true if a_is_struct_open && !empty_pair + return true if b_is_struct_close && !empty_pair + end + # No space inside opening/closing brackets. return false if a.type == :SYM && ['(', '[', '{'].include?(a.raw) return false if b.type == :SYM && [')', ']', '}'].include?(b.raw) @@ -1920,6 +2638,36 @@ def needs_space?(a, b, line, b_idx) return false if a.type == :SYM && [')', ']'].include?(a.raw) end + # Struct literal attach: `Foo{ field: v }`. The TYPE_ID-then-`{` + # pattern is a struct literal UNLESS this line opens a STRUCT / + # UNION / ENUM declaration, in which case `{` is the body open + # and needs its leading space (`STRUCT Foo {`). + if b.type == :SYM && b.raw == '{' && a.type == :TYPE_ID && + !line_has_struct_decl_keyword?(line, b_idx) + return false + end + + # Generic-bracket attach: `Foo`. Detected per-line by + # `compute_generic_bracket_indices`; the indices are stamped on + # `@generic_bracket_indices` while `format_line_body` runs. + if @generic_bracket_indices + a_idx = b_idx - 1 + a_is_generic = a_idx >= 0 && @generic_bracket_indices.include?(a_idx) + b_is_generic = @generic_bracket_indices.include?(b_idx) + # `Foo<` — no space between the type and the generic-open `<`. + if b_is_generic && b.raw == '<' + return false + end + # `` — no space before the generic close. + if b_is_generic && b.raw == '>' + return false + end + end + # No space before `,` or `;`. if b.type == :SYM && (b.raw == ',' || b.raw == ';') return false @@ -1930,6 +2678,15 @@ def needs_space?(a, b, line, b_idx) return false end + # Inline capability chain (`@shared:locked`, `@pool:shared:locked`): + # no space after `:` when it chains off an `@cap` binding. The chain + # is `@cap (: ident)+` — walk back through alternating colons and + # identifiers; if the leftmost identifier starts with `@`, suppress + # the space between this `:` and the next ident. + if a.type == :SYM && a.raw == ':' && capability_chain_colon?(line, b_idx - 1) + return false + end + # Tense sigils (`!` `?` `%` `~`) attach to following type / sigil. if a.type == :SYM && %w[! ? % ~].include?(a.raw) if b.type == :TYPE_ID @@ -1955,6 +2712,63 @@ def needs_space?(a, b, line, b_idx) true end + # Walk back from a `:` at `colon_idx` through alternating + # `:identifier` segments. Returns true iff the chain bottoms out at + # a `@cap` VAR_ID (i.e., `@cap (: ident)+ :`). A segment may carry a + # parenthesized argument (`@sharded(8)`, `@shared:sharded(128):locked`); + # we skip a trailing `(...)` group before falling back to the ident. + # Used to decide whether a `:` belongs to an inline capability chain + # rather than a normal type annotation. + def capability_chain_colon?(line, colon_idx) + j = colon_idx + while j >= 0 + t = line[j] + return false unless t.type == :SYM && t.raw == ':' + k = j - 1 + while k >= 0 && [:NL, :COMMENT, :INDENT_OPEN, :INDENT_CLOSE].include?(line[k].type) + k -= 1 + end + return false if k < 0 + # Skip a trailing `(...)` (e.g., `@sharded(8)`). + if line[k].type == :SYM && line[k].raw == ')' + k = skip_paren_group_back(line, k) + return false if k < 0 + while k >= 0 && [:NL, :COMMENT, :INDENT_OPEN, :INDENT_CLOSE].include?(line[k].type) + k -= 1 + end + return false if k < 0 + end + prev = line[k] + return true if prev.type == :VAR_ID && prev.raw.start_with?('@') + return false unless prev.type == :VAR_ID || prev.type == :TYPE_ID + j = k - 1 + while j >= 0 && [:NL, :COMMENT, :INDENT_OPEN, :INDENT_CLOSE].include?(line[j].type) + j -= 1 + end + end + false + end + + # Walk back from a `)` at `idx` to the matching `(`. Returns the index + # before the `(` (i.e., one to the left), or -1 if no match. + def skip_paren_group_back(line, idx) + depth = 0 + j = idx + while j >= 0 + t = line[j] + if t.type == :SYM + case t.raw + when ')' then depth += 1 + when '(' + depth -= 1 + return j - 1 if depth.zero? + end + end + j -= 1 + end + -1 + end + # Scan back from `line[idx]` to determine whether the position is a # type-annotation context. Returns true if the nearest non-nested # separator is `:` or `RETURNS`; false for `=` / `,` / `;` / start. diff --git a/src/tools/lint_fix_rewriter.rb b/src/tools/lint_fix_rewriter.rb new file mode 100644 index 000000000..a2225a5c5 --- /dev/null +++ b/src/tools/lint_fix_rewriter.rb @@ -0,0 +1,323 @@ +# LintFixRewriter — source-level pre-pass for `clear fmt` that applies +# stylistic auto-fixes detectable only via real semantic analysis: +# +# 1. Drop `MUTABLE ` from declarations whose binding is never +# reassigned. The annotator already emits a `:auto`-confidence +# lint finding for this; we drain it here and apply the edit. +# +# 2. Drop redundant explicit type annotations like +# `s: Float64 = 0.0` -> `s = 0.0`, where the right-hand side +# already determines the same type the user wrote. Walks the AST +# and compares VarDecl.type against the value's inferred +# full_type. Only triggers when the declared type is "bare" +# (no sigils — `?`, `!`, `~`, `@`, `%` — and no array/optional/ +# error-union/capability decoration), because those carry +# semantic intent that isn't always recoverable from the value +# alone. +# +# Both rules need the annotator's output, so this module runs the +# annotator once and serves both. If annotation raises (the file has +# a compile error), the rewriter returns the source unchanged — fmt +# must still format files with errors. +# +# Idempotent: a second pass finds nothing left to rewrite. + +require 'set' +require_relative '../ast/lexer' +require_relative '../ast/parser' +require_relative '../ast/ast' +require_relative '../ast/fixable_error' +require_relative '../annotator' + +module LintFixRewriter + module_function + + def rewrite(source) + ast, findings = annotate(source) + return source unless ast + bg_names = collect_bg_referenced_names(ast) + edits = [] + edits.concat(mutable_unused_edits(findings, bg_names)) + edits.concat(redundant_type_annotation_edits(ast, source)) + return source if edits.empty? + apply_edits(source, edits) + end + + # Walk the AST collecting every Identifier name referenced inside a + # `BG { ... }` or `BG STREAM { ... }` block. The annotator's + # MUTABLE-never-reassigned check doesn't propagate "mutably borrowed + # via a callee" through BG captures, so a binding can be flagged as + # unused-MUTABLE even when a BG-captured call mutates it via a + # MUTABLE param. Dropping MUTABLE in that case breaks the next + # build (the param's mutability check fires at the call site). + # Skip those names defensively until the annotator is fixed. + def collect_bg_referenced_names(ast) + set = Set.new + walk_for_bg_names(ast, false, set) + set + end + + def walk_for_bg_names(node, in_bg, set) + return if terminal?(node) + if node.is_a?(Array) + node.each { |n| walk_for_bg_names(n, in_bg, set) } + return + end + inside = in_bg || node.is_a?(AST::BgBlock) || node.is_a?(AST::BgStreamBlock) + if inside && node.is_a?(AST::Identifier) && node.respond_to?(:name) + set << node.name + end + return unless node.respond_to?(:each_pair) + node.each_pair { |_, v| walk_for_bg_names(v, inside, set) } + end + + # Run the annotator with FixCollector enabled. Returns + # [annotated_ast, findings] on success; [nil, []] if anything + # raised. Errors are swallowed because fmt must remain robust + # against files with compile errors. + def annotate(source) + FixCollector.enable! + begin + tokens = ::Lexer.new(source).tokenize + ast = ::Parser.new(tokens, source).parse + annotator = SemanticAnnotator.new + annotator.source_code = source if annotator.respond_to?(:source_code=) + annotator.annotate!(ast) + [ast, FixCollector.drain] + rescue StandardError, CompilerError, ParserError + FixCollector.drain # clear collector even on error + [nil, []] + end + ensure + FixCollector.disable! + end + + # ---- Rule 1: MUTABLE never reassigned ---- + + def mutable_unused_edits(findings, bg_names) + findings.flat_map do |finding| + next [] unless mutable_unused_finding?(finding) + next [] if mentions_bg_referenced_name?(finding, bg_names) + finding.fixes + .select { |fx| fx.confidence == :auto } + .flat_map(&:edits) + .map { |e| edit_from_span(e.span, e.replacement) } + end + end + + def mutable_unused_finding?(finding) + finding.respond_to?(:message) && + finding.message&.include?("is never reassigned") + end + + # Pull the binding name out of the finding's message + # ("MUTABLE 'name' is never reassigned ...") and check it against + # the set of names referenced inside any BG block. + def mentions_bg_referenced_name?(finding, bg_names) + return false if bg_names.empty? + msg = finding.respond_to?(:message) ? finding.message.to_s : "" + m = msg.match(/MUTABLE '([^']+)'/) + return false unless m + bg_names.include?(m[1]) + end + + # Translate a Span/Edit (1-based line/col) into a flat byte-offset + # edit so we can apply both rules through the same machinery. + def edit_from_span(span, replacement) + { line: span.line, col: span.col, length: span.length, replacement: replacement.to_s } + end + + # ---- Rule 2: redundant type annotation ---- + + def redundant_type_annotation_edits(ast, source) + edits = [] + walk_for_redundant_type(ast, source, edits) + edits + end + + def walk_for_redundant_type(node, source, edits) + return if node.nil? || terminal?(node) + if node.is_a?(Array) + node.each { |n| walk_for_redundant_type(n, source, edits) } + return + end + if (node.is_a?(AST::VarDecl) || decl_mode_bind_expr?(node)) && node.type + edit = compute_redundant_type_edit(node, source) + edits << edit if edit + end + return unless node.respond_to?(:each_pair) + node.each_pair { |_, v| walk_for_redundant_type(v, source, edits) } + end + + def terminal?(n) + n.nil? || n.is_a?(Symbol) || n.is_a?(String) || n.is_a?(Integer) || + n.is_a?(Float) || n.is_a?(TrueClass) || n.is_a?(FalseClass) + end + + def decl_mode_bind_expr?(node) + node.is_a?(AST::BindExpr) && node.respond_to?(:mode) && node.mode == :decl + end + + # Return an edit that strips the `: Type` annotation, or nil if + # the annotation should be kept. Conservative: keeps the annotation + # whenever the declared and inferred types don't match exactly, OR + # when the declared type carries any decoration (sigil, capability, + # array, optional, error union, generic instance). + def compute_redundant_type_edit(node, source) + declared = node.type + inferred = node.value && node.value.respond_to?(:full_type) ? node.value.full_type : nil + return nil unless inferred + return nil unless types_match_for_drop?(declared, inferred) + + span = locate_type_annotation_span(node, source) + return nil unless span + { line: span[:line], col: span[:col], length: span[:length], replacement: '' } + end + + # True only when dropping `: Type` keeps semantics identical. + def types_match_for_drop?(declared, inferred) + decl_t = to_type(declared) + inf_t = to_type(inferred) + return false unless decl_t && inf_t + # Bail on any decoration — sigils, capabilities, array/optional/ + # error-union/generic — because those convey intent the value + # alone may not fully express (e.g. `?Int64 = NIL`, + # `@Counter = ...`, `String[]@list = []`). + return false if any_decoration?(decl_t) + return false if any_decoration?(inf_t) + decl_t.resolved == inf_t.resolved + end + + def to_type(t) + return nil if t.nil? + return t if t.respond_to?(:resolved) && t.respond_to?(:any_sync?) + Type.new(t) rescue nil + end + + def any_decoration?(t) + return true if t.respond_to?(:optional?) && t.optional? + return true if t.respond_to?(:error_union?) && t.error_union? + return true if t.respond_to?(:array?) && t.array? + return true if t.respond_to?(:map?) && t.map? + return true if t.respond_to?(:future?) && t.future? + # Use `#sync` directly rather than `any_sync?` — the latter + # excludes `:raw` and `:symbol` (data-access modes, not locks), + # but for drop-the-annotation purposes ANY sync stamp changes + # semantics. `String@raw` uses byte indexing; `String` uses + # UTF-8 codepoint indexing — same resolved type, different + # behavior. Keep the annotation either way. + return true if t.respond_to?(:sync) && t.sync + return true if t.respond_to?(:ownership) && t.ownership && t.ownership != :affine + return true if t.respond_to?(:generic_instance?) && t.generic_instance? + false + end + + # Locate the `: Type` span on a VarDecl / BindExpr in source. + # Returns { line:, col:, length: } that, when removed, yields a + # well-formed declaration the formatter can re-space. Span starts + # at the `:` and ends just before the `=` (after stripping trailing + # whitespace), so the surrounding spacing is left to the formatter. + def locate_type_annotation_span(node, source) + return nil unless node.token + name_off = offset_for(source, node.token.line, node.token.column) + return nil unless name_off + # token.column points at the var name (or at MUTABLE for mutable + # decls). We want the `:` immediately after the name. Skip past + # the name + any whitespace. + name_start = source.index(node.name, name_off) + return nil unless name_start + cursor = name_start + node.name.length + while cursor < source.length && (source[cursor] == ' ' || source[cursor] == "\t") + cursor += 1 + end + return nil unless source[cursor] == ':' + colon_off = cursor + # Walk forward to the `=` that ends the type annotation. Respect + # nesting in case the type contains `[` `(` `{` (generics, fixed + # arrays). We stop at the FIRST top-level `=` that isn't part of + # `==` / `=>` etc. + depth = 0 + i = colon_off + 1 + eq_off = nil + while i < source.length + c = source[i] + if '([{'.include?(c) then depth += 1 + elsif ')]}'.include?(c) then depth -= 1 + elsif c == '=' && depth == 0 && source[i + 1] != '=' && source[i - 1] != '!' && + source[i - 1] != '<' && source[i - 1] != '>' + eq_off = i + break + end + i += 1 + end + return nil unless eq_off + + # Strip back from `=` over whitespace to find the last char of the + # type annotation. The span we remove is [colon_off, last_type_char]. + j = eq_off - 1 + j -= 1 while j > colon_off && (source[j] == ' ' || source[j] == "\t") + return nil if j < colon_off + + # Translate to (line, col, length). All edits go through the same + # 1-based-line/col model the FixableFinding edits use. + line, col = line_col_for_offset(source, colon_off) + length = j - colon_off + 1 + { line: line, col: col, length: length } + end + + # ---- Edit application ---- + + # Apply edits to source. Multiple edits per line are sorted right- + # to-left so earlier ones don't shift later positions. + def apply_edits(source, edits) + grouped = edits.group_by { |e| e[:line] } + lines = source.split("\n", -1) + grouped.each do |ln_idx, ln_edits| + idx = ln_idx - 1 + next if idx < 0 || idx >= lines.length + ln = lines[idx] + ln_edits.sort_by { |e| -e[:col] }.each do |e| + start_col = e[:col] - 1 + next if start_col < 0 || start_col > ln.length + end_col = start_col + e[:length] + end_col = ln.length if end_col > ln.length + ln = ln[0...start_col] + e[:replacement].to_s + ln[end_col..] + end + lines[idx] = ln + end + lines.join("\n") + end + + # ---- Source-offset helpers ---- + + def offset_for(source, line, col) + return nil if line < 1 || col < 1 + off = 0 + cur_line = 1 + while cur_line < line + nl = source.index("\n", off) + return nil unless nl + off = nl + 1 + cur_line += 1 + end + target = off + col - 1 + return nil if target > source.length + target + end + + def line_col_for_offset(source, off) + line = 1 + col = 1 + i = 0 + while i < off + if source[i] == "\n" + line += 1 + col = 1 + else + col += 1 + end + i += 1 + end + [line, col] + end +end diff --git a/src/tools/method_rewriter.rb b/src/tools/method_rewriter.rb index e5433db85..5103055e7 100644 --- a/src/tools/method_rewriter.rb +++ b/src/tools/method_rewriter.rb @@ -94,7 +94,18 @@ def stdlib_method_names list = defs.is_a?(Array) ? defs : [defs] list.each do |d| next unless d.is_a?(Hash) - names << name if d[:is_method] + next unless d[:is_method] + # Skip stdlib functions whose Zig lowering is FSM-based + # (suspending I/O calls like readFile / writeFile / accept). + # Their MIR/FSM lowering reads the call's positional args + # at fixed indices via FsmOps; UFCS-rewriting moves the + # first arg into the receiver slot and the FSM lowerer + # crashes with "FsmOps arg index 0 out of range (0 args)." + # Detection is structural (`suspends: true` plus an + # `fsm_*` setup table) so we don't have to enumerate + # specific names. + next if fsm_lowered?(d) + names << name end end end @@ -102,6 +113,16 @@ def stdlib_method_names end end + # True when an stdlib registry entry uses FSM lowering for I/O. Both + # markers must be present to count: `suspends: true` means the call + # yields, and the `fsm_*` keys carry the templates the FSM emitter + # reads. Either alone wouldn't be enough — `suspends: true` is also + # set on plain async helpers that don't go through FSM. + def fsm_lowered?(defn) + return false unless defn[:suspends] + defn.keys.any? { |k| k.to_s.start_with?("fsm_") } + end + # Post-order walk: collect edits for inner calls first so outer # rewrites see the (logically) rewritten inner. Edits are applied # right-to-left on the source so positions don't shift. @@ -165,6 +186,15 @@ def compute_edit(call, source) first = args_text[spans[0][0]...spans[0][1]].strip return nil if first.empty? + # Wrap the first arg in parens if its top-level AST node would + # bind looser than `.method()`. Without this, expressions like + # `toFloat(state MOD 1000)` would be rewritten to + # `state MOD 1000.toFloat()`, which Zig parses as + # `state MOD (1000.toFloat())` — a real semantics change. + # See spec/method_rewriter_spec.rb for the regression case. + first_arg_node = call.args[0] + first = "(#{first})" if needs_parens?(first_arg_node, first) + rest_text = if spans.size > 1 # Preserve whatever the user wrote between the first comma and # the closing `)` (sans leading whitespace). This keeps internal @@ -185,6 +215,30 @@ def compute_edit(call, source) { start: start_off, len: close_off - start_off + 1, replacement: rewritten } end + # True when the source text for `node` would mis-parse if placed + # immediately before `.method(...)`. Drives the paren wrap above. + # + # The check is structural (AST shape) with a textual safety net for + # node types we don't enumerate. Anything whose top is a binary or + # unary expression, a pipeline, a CAST, or similar must be wrapped. + # "Tight" AST shapes — Identifier, Literal, MethodCall, FuncCall, + # GetField, GetIndex, StructLit, ListLit — already bind tighter than + # `.method()` and need no wrap. If the source text is already + # paren-wrapped, no extra wrap either. + TIGHT_AST_TYPES = [ + :Identifier, :Literal, :MethodCall, :FuncCall, :GetField, + :GetIndex, :StructLit, :ListLit, :HashLit, :StringLit + ].freeze + + def needs_parens?(node, text) + stripped = text.strip + return false if stripped.start_with?('(') && stripped.end_with?(')') + return false unless node + type_name = node.class.name&.split('::')&.last&.to_sym + return false if TIGHT_AST_TYPES.include?(type_name) + true + end + # ---- Source / span helpers ---- def offset_for(source, line, col) diff --git a/src/tools/multi_statement_linter.rb b/src/tools/multi_statement_linter.rb new file mode 100644 index 000000000..229c4e39a --- /dev/null +++ b/src/tools/multi_statement_linter.rb @@ -0,0 +1,100 @@ +# MultiStatementLinter — emits a `clear fix` warning when a single +# source line contains more than one statement (multiple `;`- +# terminated statements at depth 0). +# +# Why warn but not auto-fix: splitting `a; b; c;` into three lines +# requires deciding indentation, blank-line treatment, and inline- +# comment placement. Those are judgement calls a fmt pass shouldn't +# make automatically — the user might genuinely have wanted a tight +# one-liner shape (e.g., for a hot-loop body they're debugging). +# +# Surfaced via FixCollector during `clear fix`. No-op when the +# collector is disabled, so normal `clear build` is unaffected. + +require_relative '../ast/lexer' +require_relative '../ast/fixable_error' + +module MultiStatementLinter + module_function + + def lint!(source) + return unless FixCollector.enabled? + + line_to_semis = scan_top_level_semis(source) + line_to_semis.each do |line_no, count| + next if count < 2 + emit_finding(source, line_no) + end + end + + # Scan source forward, counting `;` at bracket-depth 0, grouped by + # line. Inside `(...)`, `[...]`, `{...}`, strings, or comments the + # `;` doesn't count — those are STRUCT-field separators, hash kv + # separators, FOR-loop variants, etc. that legitimately share a + # line. + def scan_top_level_semis(source) + counts = Hash.new(0) + line_no = 1 + depth = 0 + in_str = false + in_triple = false + i = 0 + while i < source.length + c = source[i] + if in_triple + if source[i, 3] == '"""' + in_triple = false + i += 3 + next + end + elsif in_str + if c == '\\' && i + 1 < source.length + i += 2 + next + elsif c == '"' + in_str = false + end + else + if source[i, 3] == '"""' + in_triple = true + i += 3 + next + elsif c == '"' + in_str = true + elsif c == '#' + # Skip to end of line. + nl = source.index("\n", i) || source.length + i = nl + next + elsif '([{'.include?(c) + depth += 1 + elsif ')]}'.include?(c) + depth -= 1 if depth > 0 + elsif c == ';' && depth.zero? + counts[line_no] += 1 + end + end + if c == "\n" + line_no += 1 + end + i += 1 + end + counts + end + + def emit_finding(source, line_no) + line_text = source.lines[line_no - 1] || "" + anchor = Struct.new(:line, :column).new(line_no, 1) + msg = "multiple statements on one line — split each `;`-terminated " \ + "statement to its own line for readability" + finding = FixableFinding.new( + level: :warning, + message: msg, + token: anchor, + category: :lint, + fixes: [] # no auto-fix; user has to decide layout + ) + FixCollector.push(finding) + _ = line_text # reserved for future fix proposal + end +end