diff --git a/README.md b/README.md index 2df13d00..bdf76c2a 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ DLSlime logo

- Docs | + Docs | Roadmap | Slack | WeChat Group | diff --git a/README_zh.md b/README_zh.md index 7d8bd36c..4794a028 100644 --- a/README_zh.md +++ b/README_zh.md @@ -2,7 +2,7 @@ DLSlime logo

- 文档 | + 文档 | 路线图 | Slack | 微信群 | diff --git a/bench/README.md b/bench/README.md index d4d459c2..8cdd2f6f 100644 --- a/bench/README.md +++ b/bench/README.md @@ -12,8 +12,8 @@ so benchmark commands, hardware notes, and result files can evolve together. | `python/endpoint_io_bench.py` | Endpoint-level I/O benchmark | | `python/endpoint_sendrecv_bench.py` | Endpoint send/recv benchmark | | `python/cache_bench.py` | DLSlimeCache benchmark | -| `python/run_rpc_bench.sh` | SlimeRPC vs Ray benchmark wrapper | -| `python/rpc_bench_*.py` | SlimeRPC and Ray benchmark implementations | +| `python/run_rpc_bench.sh` | SlimeRPC vs Ray (optionally + Pulsing) benchmark wrapper | +| `python/rpc_bench_*.py` | SlimeRPC, Ray, and Pulsing benchmark implementations | | `results/` | CSV outputs and captured worker logs | ## Prerequisites @@ -112,12 +112,21 @@ dlslime-cache stop ## SlimeRPC vs Ray Benchmark The RPC benchmark compares SlimeRPC round-trip latency and bandwidth with a Ray -actor baseline. +actor baseline. A Pulsing (`@pul.remote`) actor baseline is available as an +opt-in third comparator. ```bash bash bench/python/run_rpc_bench.sh ``` +Include the Pulsing baseline (requires `pip install pulsing`): + +```bash +bash bench/python/run_rpc_bench.sh --with-pulsing +# or +WITH_PULSING=1 bash bench/python/run_rpc_bench.sh +``` + With explicit parameters: ```bash @@ -132,6 +141,7 @@ The script writes: ```text bench/results/slime_rpc.csv bench/results/ray_rpc.csv +bench/results/pulsing_rpc.csv # only when --with-pulsing is passed ``` See [../docs/benchmark-rpc.md](../docs/benchmark-rpc.md) for the full RPC diff --git a/bench/python/rpc_bench_compare.py b/bench/python/rpc_bench_compare.py index f9ffdc59..2d404caf 100644 --- a/bench/python/rpc_bench_compare.py +++ b/bench/python/rpc_bench_compare.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 -"""Print a side-by-side comparison of SlimeRPC vs Ray benchmark CSVs. +"""Print a side-by-side comparison of SlimeRPC, Ray (and optionally Pulsing) benchmark CSVs. Usage: - python rpc_bench_compare.py [--slime results/slime_rpc.csv] [--ray results/ray_rpc.csv] + python rpc_bench_compare.py \\ + [--slime results/slime_rpc.csv] \\ + [--ray results/ray_rpc.csv] \\ + [--pulsing results/pulsing_rpc.csv] # optional """ import argparse @@ -22,10 +25,15 @@ def label(size: int) -> str: def main(): default_dir = os.path.join(os.path.dirname(__file__), "..", "results") parser = argparse.ArgumentParser( - description="Compare SlimeRPC vs Ray benchmark results" + description="Compare SlimeRPC vs Ray (and optionally Pulsing) benchmark results" ) parser.add_argument("--slime", default=os.path.join(default_dir, "slime_rpc.csv")) parser.add_argument("--ray", default=os.path.join(default_dir, "ray_rpc.csv")) + parser.add_argument( + "--pulsing", + default=None, + help="Optional Pulsing CSV; if omitted, Pulsing columns are skipped.", + ) args = parser.parse_args() for path in (args.slime, args.ray): @@ -37,25 +45,43 @@ def main(): slime = load(args.slime) ray_r = load(args.ray) - common = sorted(set(slime) & set(ray_r)) + pulsing = None + if args.pulsing: + if not os.path.exists(args.pulsing): + print(f"Missing Pulsing results file: {args.pulsing}") + print("Run rpc_bench_pulsing.py first, or omit --pulsing.") + return + pulsing = load(args.pulsing) + + common = set(slime) & set(ray_r) + if pulsing is not None: + common &= set(pulsing) + common = sorted(common) if not common: - print("No overlapping sizes between the two result files.") + print("No overlapping sizes between the result files.") return col = 12 - sep = "-" * ( - 10 + 1 + col + 1 + col + 1 + col + 1 + col + 1 + col + 1 + col + 1 + 12 - ) - print( "\n┌─ Avg Latency (µs) ─────────────────────────────────────────────────────────┐" ) - header = ( - f"{'Size':<10} | " - f"{'Slime avg':>{col}} | {'Slime p99':>{col}} | {'Slime BW':>{col}} | " - f"{'Ray avg':>{col}} | {'Ray p99':>{col}} | {'Ray BW':>{col}} | " - f"{'Speedup':>10}" - ) + + if pulsing is not None: + header = ( + f"{'Size':<10} | " + f"{'Slime avg':>{col}} | {'Slime p99':>{col}} | {'Slime BW':>{col}} | " + f"{'Puls avg':>{col}} | {'Puls p99':>{col}} | {'Puls BW':>{col}} | " + f"{'Ray avg':>{col}} | {'Ray p99':>{col}} | {'Ray BW':>{col}} | " + f"{'S/Pul':>10} | {'S/Ray':>10}" + ) + else: + header = ( + f"{'Size':<10} | " + f"{'Slime avg':>{col}} | {'Slime p99':>{col}} | {'Slime BW':>{col}} | " + f"{'Ray avg':>{col}} | {'Ray p99':>{col}} | {'Ray BW':>{col}} | " + f"{'S/Ray':>10}" + ) + sep = "-" * len(header) print(header) print(sep) @@ -66,17 +92,36 @@ def main(): ray_avg = float(ray_r[size]["avg_us"]) ray_p99 = float(ray_r[size]["p99_us"]) ray_bw = float(ray_r[size]["bw_gbps"]) - speedup = ray_avg / sl_avg # >1 means SlimeRPC is faster + sp_ray = ray_avg / sl_avg # >1 means SlimeRPC is faster than Ray + + if pulsing is not None: + pu_avg = float(pulsing[size]["avg_us"]) + pu_p99 = float(pulsing[size]["p99_us"]) + pu_bw = float(pulsing[size]["bw_gbps"]) + sp_pul = pu_avg / sl_avg # >1 means SlimeRPC is faster than Pulsing + print( + f"{label(size):<10} | " + f"{sl_avg:>{col}.1f} | {sl_p99:>{col}.1f} | {sl_bw:>{col}.3f} | " + f"{pu_avg:>{col}.1f} | {pu_p99:>{col}.1f} | {pu_bw:>{col}.3f} | " + f"{ray_avg:>{col}.1f} | {ray_p99:>{col}.1f} | {ray_bw:>{col}.3f} | " + f"{'%.2fx' % sp_pul:>10} | {'%.2fx' % sp_ray:>10}" + ) + else: + print( + f"{label(size):<10} | " + f"{sl_avg:>{col}.1f} | {sl_p99:>{col}.1f} | {sl_bw:>{col}.3f} | " + f"{ray_avg:>{col}.1f} | {ray_p99:>{col}.1f} | {ray_bw:>{col}.3f} | " + f"{'%.2fx' % sp_ray:>10}" + ) + print(sep) + if pulsing is not None: print( - f"{label(size):<10} | " - f"{sl_avg:>{col}.1f} | {sl_p99:>{col}.1f} | {sl_bw:>{col}.3f} | " - f"{ray_avg:>{col}.1f} | {ray_p99:>{col}.1f} | {ray_bw:>{col}.3f} | " - f"{'%.2fx' % speedup:>10}" + "S/Pul = Pulsing avg latency / SlimeRPC avg latency (>1 means SlimeRPC wins)" ) - - print(sep) - print("Speedup = Ray avg latency / SlimeRPC avg latency (>1 means SlimeRPC wins)") + print( + "S/Ray = Ray avg latency / SlimeRPC avg latency (>1 means SlimeRPC wins)" + ) print() diff --git a/bench/python/rpc_bench_pulsing.py b/bench/python/rpc_bench_pulsing.py new file mode 100644 index 00000000..69a66b0d --- /dev/null +++ b/bench/python/rpc_bench_pulsing.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +"""Pulsing RPC benchmark — same-machine CPU bytes echo. + +Measures round-trip latency for bytes echo via a Pulsing actor across a range +of payload sizes, using identical metrics as rpc_bench_slime_driver.py and +rpc_bench_ray.py so the three CSVs can be compared directly. + +Usage: + python rpc_bench_pulsing.py [--out results/pulsing_rpc.csv] +""" + +import argparse +import asyncio +import csv +import os +import time + +import pulsing as pul + + +@pul.remote +class EchoActor: + def echo(self, data: bytes) -> bytes: + return data + + +SIZES = [ + 1 * 1024, + 4 * 1024, + 16 * 1024, + 64 * 1024, + 256 * 1024, + 1 * 1024 * 1024, + 4 * 1024 * 1024, + 16 * 1024 * 1024, + 64 * 1024 * 1024, +] + + +async def _benchmark(actor, data: bytes, warmup: int, iterations: int) -> dict: + for _ in range(warmup): + await actor.echo(data) + + latencies_us = [] + for _ in range(iterations): + t0 = time.perf_counter() + await actor.echo(data) + latencies_us.append((time.perf_counter() - t0) * 1e6) + + latencies_us.sort() + n = len(latencies_us) + avg = sum(latencies_us) / n + return { + "avg_us": avg, + "p50_us": latencies_us[n // 2], + "p99_us": latencies_us[int(n * 0.99)], + "bw_gbps": (2 * len(data)) / 1e9 / (avg / 1e6), + } + + +def _label(size: int) -> str: + return f"{size // 1024}KB" if size < 1024 * 1024 else f"{size // (1024 * 1024)}MB" + + +async def _run(out_path: str): + await pul.init() + try: + actor = await EchoActor.spawn() + + header = ( + f"{'Size':<10} | {'Avg (µs)':<12} | {'P50 (µs)':<12} " + f"| {'P99 (µs)':<12} | {'BW (GB/s)':<10}" + ) + print(header) + print("-" * len(header)) + + records = [] + for size in SIZES: + data = bytes(size) + iters = max(50, min(500, 50_000_000 // size)) + r = await _benchmark(actor, data, warmup=20, iterations=iters) + records.append({"size": size, **r}) + print( + f"{_label(size):<10} | {r['avg_us']:<12.1f} | {r['p50_us']:<12.1f} " + f"| {r['p99_us']:<12.1f} | {r['bw_gbps']:<10.3f}" + ) + + os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True) + with open(out_path, "w", newline="") as f: + writer = csv.DictWriter( + f, fieldnames=["size", "avg_us", "p50_us", "p99_us", "bw_gbps"] + ) + writer.writeheader() + writer.writerows(records) + print(f"\nResults saved → {out_path}") + finally: + await pul.shutdown() + + +def main(): + parser = argparse.ArgumentParser(description="Pulsing RPC benchmark") + parser.add_argument( + "--out", + default=os.path.join( + os.path.dirname(__file__), "..", "results", "pulsing_rpc.csv" + ), + ) + args = parser.parse_args() + asyncio.run(_run(args.out)) + + +if __name__ == "__main__": + main() diff --git a/bench/python/rpc_bench_slime_driver.py b/bench/python/rpc_bench_slime_driver.py index 572b917f..240357f1 100644 --- a/bench/python/rpc_bench_slime_driver.py +++ b/bench/python/rpc_bench_slime_driver.py @@ -118,9 +118,8 @@ def main(): driver._rpc_buffer_size = buf_bytes driver._rpc_max_inflight = max(1, int(getattr(args, "max_inflight", 4))) - driver.set_desired_topology(["bench-worker"]) print("Waiting for worker…") - driver.wait_for_peers(["bench-worker"], timeout_sec=120) + driver.connect_to("bench-worker", ib_port=1, qp_num=1).wait(timeout=120) print("Connected. Starting benchmark.\n") w = make_proxy(driver, "bench-worker", EchoService) diff --git a/bench/python/rpc_bench_slime_worker.py b/bench/python/rpc_bench_slime_worker.py index 7b2efb4c..1221cb05 100644 --- a/bench/python/rpc_bench_slime_worker.py +++ b/bench/python/rpc_bench_slime_worker.py @@ -67,9 +67,8 @@ def main(): # buf_mb * SLIME_RPC_MAX_INFLIGHT bytes of CPU memory needlessly. worker._rpc_max_inflight = max(1, int(getattr(args, "max_inflight", 4))) - worker.set_desired_topology(["bench-driver"]) print("Worker ready, waiting for driver to connect…") - worker.wait_for_peers(["bench-driver"], timeout_sec=120) + worker.connect_to("bench-driver", ib_port=1, qp_num=1).wait(timeout=120) print("Connected. Serving (Ctrl-C to stop).") try: diff --git a/bench/python/run_rpc_bench.sh b/bench/python/run_rpc_bench.sh index 38edb753..c4bef7d3 100644 --- a/bench/python/run_rpc_bench.sh +++ b/bench/python/run_rpc_bench.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash -# run_rpc_bench.sh — run SlimeRPC + Ray benchmarks and print comparison. +# run_rpc_bench.sh — run SlimeRPC + Ray benchmarks (+ optional Pulsing) and print comparison. # # Usage: -# bash run_rpc_bench.sh [--ctrl http://127.0.0.1:3000] [--buf-mb 256] [--max-size-mb 16] [--scope rpc-bench-...] +# bash run_rpc_bench.sh [--ctrl http://127.0.0.1:3000] [--buf-mb 256] [--max-size-mb 16] \ +# [--scope rpc-bench-...] [--with-pulsing] # # Environment overrides: -# CTRL=http://host:3000 MAX_SIZE_MB=16 bash run_rpc_bench.sh +# CTRL=http://host:3000 MAX_SIZE_MB=16 WITH_PULSING=1 bash run_rpc_bench.sh set -euo pipefail @@ -15,6 +16,7 @@ CTRL="${CTRL:-http://127.0.0.1:3000}" BUF_MB="${BUF_MB:-256}" MAX_SIZE_MB="${MAX_SIZE_MB:-16}" SCOPE="${SCOPE:-rpc-bench-$(date +%s)-$$}" +WITH_PULSING="${WITH_PULSING:-0}" # Parse args while [[ $# -gt 0 ]]; do @@ -23,26 +25,38 @@ while [[ $# -gt 0 ]]; do --buf-mb) BUF_MB="$2"; shift 2 ;; --max-size-mb) MAX_SIZE_MB="$2"; shift 2 ;; --scope) SCOPE="$2"; shift 2 ;; + --with-pulsing) WITH_PULSING=1; shift ;; *) echo "Unknown argument: $1"; exit 1 ;; esac done +if [[ "$WITH_PULSING" == "1" ]]; then + TOTAL=4 +else + TOTAL=3 +fi + mkdir -p "$RESULTS_DIR" WORKER_LOG="$RESULTS_DIR/slime_worker_${SCOPE}.log" echo "╔══════════════════════════════════════════════════╗" -echo "║ SlimeRPC vs Ray — RPC Benchmark ║" +if [[ "$WITH_PULSING" == "1" ]]; then + echo "║ SlimeRPC vs Pulsing vs Ray — RPC Benchmark ║" +else + echo "║ SlimeRPC vs Ray — RPC Benchmark ║" +fi echo "╚══════════════════════════════════════════════════╝" echo " NanoCtrl : $CTRL" echo " Scope : $SCOPE" echo " Buffer : ${BUF_MB} MB" echo " Max Size : ${MAX_SIZE_MB} MB" +echo " Pulsing : $([[ "$WITH_PULSING" == "1" ]] && echo on || echo off)" echo " Results : $RESULTS_DIR" echo " Worker Log: $WORKER_LOG" echo "" -# ── [1/3] SlimeRPC ────────────────────────────────────────────────────────── -echo "▶ [1/3] SlimeRPC — starting worker..." +# ── [1/N] SlimeRPC ────────────────────────────────────────────────────────── +echo "▶ [1/$TOTAL] SlimeRPC — starting worker..." PYTHONUNBUFFERED=1 python "$SCRIPT_DIR/rpc_bench_slime_worker.py" \ --ctrl "$CTRL" --scope "$SCOPE" --buf-mb "$BUF_MB" \ >"$WORKER_LOG" 2>&1 & @@ -51,7 +65,7 @@ WORKER_PID=$! # Give the worker time to register with NanoCtrl sleep 2 -echo "▶ [1/3] SlimeRPC — running driver..." +echo "▶ [1/$TOTAL] SlimeRPC — running driver..." if ! python "$SCRIPT_DIR/rpc_bench_slime_driver.py" \ --ctrl "$CTRL" --scope "$SCOPE" --buf-mb "$BUF_MB" \ --max-size-mb "$MAX_SIZE_MB" \ @@ -70,13 +84,26 @@ kill "$WORKER_PID" 2>/dev/null || true wait "$WORKER_PID" 2>/dev/null || true echo "" -# ── [2/3] Ray ─────────────────────────────────────────────────────────────── -echo "▶ [2/3] Ray — running benchmark..." +STAGE=2 + +# ── [STAGE/N] Pulsing (optional) ──────────────────────────────────────────── +if [[ "$WITH_PULSING" == "1" ]]; then + echo "▶ [$STAGE/$TOTAL] Pulsing — running benchmark..." + python "$SCRIPT_DIR/rpc_bench_pulsing.py" --out "$RESULTS_DIR/pulsing_rpc.csv" + echo "" + STAGE=$((STAGE + 1)) +fi + +# ── [STAGE/N] Ray ─────────────────────────────────────────────────────────── +echo "▶ [$STAGE/$TOTAL] Ray — running benchmark..." python "$SCRIPT_DIR/rpc_bench_ray.py" --out "$RESULTS_DIR/ray_rpc.csv" echo "" +STAGE=$((STAGE + 1)) -# ── [3/3] Compare ─────────────────────────────────────────────────────────── -echo "▶ [3/3] Comparison" -python "$SCRIPT_DIR/rpc_bench_compare.py" \ - --slime "$RESULTS_DIR/slime_rpc.csv" \ - --ray "$RESULTS_DIR/ray_rpc.csv" +# ── [STAGE/N] Compare ─────────────────────────────────────────────────────── +echo "▶ [$STAGE/$TOTAL] Comparison" +COMPARE_ARGS=(--slime "$RESULTS_DIR/slime_rpc.csv" --ray "$RESULTS_DIR/ray_rpc.csv") +if [[ "$WITH_PULSING" == "1" ]]; then + COMPARE_ARGS+=(--pulsing "$RESULTS_DIR/pulsing_rpc.csv") +fi +python "$SCRIPT_DIR/rpc_bench_compare.py" "${COMPARE_ARGS[@]}" diff --git a/bench/results/AA.csv b/bench/results/AA.csv deleted file mode 100755 index cd0ed335..00000000 --- a/bench/results/AA.csv +++ /dev/null @@ -1,18 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"2,048","32,768",0,4,0.02,196.02 -"4,096","65,536",0,4,0.02,371.41 -"8,192","131,072",0,4,0.02,742.45 -"16,384","262,144",0,4,0.02,1481.07 -"32,768","524,288",0,4,0.02,2959.75 -"65,536","1,048,576",0,4,0.02,5888.44 -"131,072","2,097,152",0,4,0.02,11611.46 -"262,144","4,194,304",0,4,0.03,22872.46 -"524,288","8,388,608",0,4,0.08,39340.64 -"1,048,576","16,777,216",0,4,0.18,43292.38 -"2,097,152","33,554,432",0,4,0.40,45970.34 -"4,194,304","67,108,864",0,4,0.83,47447.48 -"8,388,608","134,217,728",0,4,1.69,48214.60 -"16,777,216","268,435,456",0,4,3.39,48580.47 -"33,554,432","536,870,912",0,4,6.82,48800.45 -"67,108,864","1,073,741,824",0,4,13.66,48911.09 -"134,217,728","2,147,483,648",0,4,27.36,48967.45 diff --git a/bench/results/AB_affi.csv b/bench/results/AB_affi.csv deleted file mode 100755 index c8194cb1..00000000 --- a/bench/results/AB_affi.csv +++ /dev/null @@ -1,34 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,62.82,21321.65 -"134,217,728","2,147,483,648",0,1,58.65,22827.92 -"134,217,728","2,147,483,648",0,2,58.65,22822.99 -"134,217,728","2,147,483,648",0,3,62.84,21314.43 -"134,217,728","2,147,483,648",0,4,46.71,28683.17 -"134,217,728","2,147,483,648",0,5,46.72,28684.64 -"134,217,728","2,147,483,648",0,6,46.71,28681.58 -"134,217,728","2,147,483,648",0,7,46.71,28682.90 -"134,217,728","2,147,483,648",0,8,62.86,21319.35 -"134,217,728","2,147,483,648",0,9,58.63,22824.66 -"134,217,728","2,147,483,648",0,10,58.69,22827.80 -"134,217,728","2,147,483,648",0,11,62.86,21319.49 -"134,217,728","2,147,483,648",0,12,58.72,22824.56 -"134,217,728","2,147,483,648",0,13,62.85,21319.14 -"134,217,728","2,147,483,648",0,14,62.86,21318.71 -"134,217,728","2,147,483,648",0,15,58.68,22825.61 -"2,048","32,768",0,6,0.02,216.70 -"4,096","65,536",0,6,0.02,431.07 -"8,192","131,072",0,6,0.02,851.31 -"16,384","262,144",0,6,0.02,1711.90 -"32,768","524,288",0,6,0.02,3445.05 -"65,536","1,048,576",0,6,0.02,6821.34 -"131,072","2,097,152",0,6,0.03,13543.92 -"262,144","4,194,304",0,6,0.05,22257.61 -"524,288","8,388,608",0,6,0.15,24878.99 -"1,048,576","16,777,216",0,6,0.33,26412.48 -"2,097,152","33,554,432",0,6,0.70,27746.73 -"4,194,304","67,108,864",0,6,1.43,28163.56 -"8,388,608","134,217,728",0,6,2.88,28398.95 -"16,777,216","268,435,456",0,6,5.79,28540.74 -"33,554,432","536,870,912",0,6,11.65,28630.73 -"67,108,864","1,073,741,824",0,6,23.33,28667.31 -"134,217,728","2,147,483,648",0,6,46.71,28686.29 diff --git a/bench/results/AC_affi.csv b/bench/results/AC_affi.csv deleted file mode 100755 index 46111e5a..00000000 --- a/bench/results/AC_affi.csv +++ /dev/null @@ -1,26 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,54.69,24489.72 -"134,217,728","2,147,483,648",1,0,54.73,24484.69 -"134,217,728","2,147,483,648",2,0,54.68,24506.96 -"134,217,728","2,147,483,648",3,0,54.69,24535.05 -"134,217,728","2,147,483,648",4,0,54.73,24501.22 -"134,217,728","2,147,483,648",5,0,54.69,24516.08 -"134,217,728","2,147,483,648",6,0,54.69,24503.68 -"134,217,728","2,147,483,648",7,0,54.69,24486.84 -"2,048","32,768",0,0,0.03,129.08 -"4,096","65,536",0,0,0.03,252.46 -"8,192","131,072",0,0,0.03,773.15 -"16,384","262,144",0,0,0.03,965.87 -"32,768","524,288",0,0,0.03,1470.05 -"65,536","1,048,576",0,0,0.03,4720.31 -"131,072","2,097,152",0,0,0.04,5701.97 -"262,144","4,194,304",0,0,0.04,15874.61 -"524,288","8,388,608",0,0,0.08,19473.47 -"1,048,576","16,777,216",0,0,0.33,21516.93 -"2,097,152","33,554,432",0,0,0.75,26086.32 -"4,194,304","67,108,864",0,0,1.60,24111.70 -"8,388,608","134,217,728",0,0,3.35,24330.94 -"16,777,216","268,435,456",0,0,6.75,24500.82 -"33,554,432","536,870,912",0,0,13.61,24450.78 -"67,108,864","1,073,741,824",0,0,27.29,24451.09 -"134,217,728","2,147,483,648",0,0,54.69,24506.82 diff --git a/bench/results/A_D_affi.csv b/bench/results/A_D_affi.csv deleted file mode 100755 index 12f20b2e..00000000 --- a/bench/results/A_D_affi.csv +++ /dev/null @@ -1,17 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,7,48.68,27521.27 -"134,217,728","2,147,483,648",1,7,48.68,27526.97 -"134,217,728","2,147,483,648",2,7,48.69,27524.65 -"134,217,728","2,147,483,648",3,7,48.69,27528.49 -"134,217,728","2,147,483,648",4,7,48.69,27526.21 -"134,217,728","2,147,483,648",5,7,48.69,27528.31 -"134,217,728","2,147,483,648",6,7,48.69,27528.41 -"134,217,728","2,147,483,648",7,7,48.69,27525.33 -"134,217,728","2,147,483,648",0,0,48.42,27670.15 -"134,217,728","2,147,483,648",0,1,48.46,27672.29 -"134,217,728","2,147,483,648",0,2,48.44,27672.69 -"134,217,728","2,147,483,648",0,3,48.43,27671.53 -"134,217,728","2,147,483,648",0,4,48.42,27674.43 -"134,217,728","2,147,483,648",0,5,48.42,27676.57 -"134,217,728","2,147,483,648",0,6,48.69,27526.57 -"134,217,728","2,147,483,648",0,7,48.69,27526.66 diff --git a/bench/results/BA_affi.csv b/bench/results/BA_affi.csv deleted file mode 100755 index 3ebb4a82..00000000 --- a/bench/results/BA_affi.csv +++ /dev/null @@ -1,34 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,62.90,21323.17 -"134,217,728","2,147,483,648",1,0,57.21,23433.06 -"134,217,728","2,147,483,648",2,0,57.22,23431.92 -"134,217,728","2,147,483,648",3,0,62.89,21321.35 -"134,217,728","2,147,483,648",4,0,57.22,23430.36 -"134,217,728","2,147,483,648",5,0,62.90,21321.60 -"134,217,728","2,147,483,648",6,0,62.88,21320.40 -"134,217,728","2,147,483,648",7,0,57.22,23432.35 -"134,217,728","2,147,483,648",8,0,62.89,21321.22 -"134,217,728","2,147,483,648",9,0,57.22,23433.89 -"134,217,728","2,147,483,648",10,0,57.22,23428.90 -"134,217,728","2,147,483,648",11,0,62.89,21318.73 -"134,217,728","2,147,483,648",12,0,46.36,28914.65 -"134,217,728","2,147,483,648",13,0,46.36,28911.38 -"134,217,728","2,147,483,648",14,0,46.36,28912.41 -"134,217,728","2,147,483,648",15,0,46.37,28912.13 -"2,048","32,768",15,0,0.02,259.02 -"4,096","65,536",15,0,0.02,500.66 -"8,192","131,072",15,0,0.02,1022.53 -"16,384","262,144",15,0,0.02,2034.45 -"32,768","524,288",15,0,0.02,4021.68 -"65,536","1,048,576",15,0,0.03,7992.82 -"131,072","2,097,152",15,0,0.03,15932.78 -"262,144","4,194,304",15,0,0.06,22640.78 -"524,288","8,388,608",15,0,0.16,25413.76 -"1,048,576","16,777,216",15,0,0.35,26884.69 -"2,097,152","33,554,432",15,0,0.70,27900.60 -"4,194,304","67,108,864",15,0,1.43,28394.32 -"8,388,608","134,217,728",15,0,2.88,28661.44 -"16,777,216","268,435,456",15,0,5.78,28802.17 -"33,554,432","536,870,912",15,0,11.58,28861.69 -"67,108,864","1,073,741,824",15,0,23.17,28898.01 -"134,217,728","2,147,483,648",15,0,46.36,28915.72 diff --git a/bench/results/BB.csv b/bench/results/BB.csv deleted file mode 100755 index f41f3cd2..00000000 --- a/bench/results/BB.csv +++ /dev/null @@ -1,18 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"2,048","32,768",15,7,0.02,186.22 -"4,096","65,536",15,7,0.02,380.64 -"8,192","131,072",15,7,0.02,759.87 -"16,384","262,144",15,7,0.02,1505.36 -"32,768","524,288",15,7,0.02,3009.92 -"65,536","1,048,576",15,7,0.03,5991.92 -"131,072","2,097,152",15,7,0.03,11780.19 -"262,144","4,194,304",15,7,0.05,20974.75 -"524,288","8,388,608",15,7,0.15,24501.33 -"1,048,576","16,777,216",15,7,0.34,26502.06 -"2,097,152","33,554,432",15,7,0.70,27566.30 -"4,194,304","67,108,864",15,7,1.42,28142.58 -"8,388,608","134,217,728",15,7,2.90,28393.64 -"16,777,216","268,435,456",15,7,5.81,28544.30 -"33,554,432","536,870,912",15,7,11.66,28617.29 -"67,108,864","1,073,741,824",15,7,23.44,28410.72 -"134,217,728","2,147,483,648",15,7,47.05,28275.85 diff --git a/bench/results/BC_affi.csv b/bench/results/BC_affi.csv deleted file mode 100755 index a5d953fa..00000000 --- a/bench/results/BC_affi.csv +++ /dev/null @@ -1,34 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,62.81,21335.93 -"134,217,728","2,147,483,648",1,0,57.14,23453.94 -"134,217,728","2,147,483,648",2,0,57.13,23458.82 -"134,217,728","2,147,483,648",3,0,62.81,21330.01 -"134,217,728","2,147,483,648",4,0,57.11,23445.78 -"134,217,728","2,147,483,648",5,0,62.82,21336.52 -"134,217,728","2,147,483,648",6,0,62.81,21357.24 -"134,217,728","2,147,483,648",7,0,57.14,23446.67 -"134,217,728","2,147,483,648",8,0,62.81,21342.34 -"134,217,728","2,147,483,648",9,0,57.13,23437.67 -"134,217,728","2,147,483,648",10,0,57.14,23456.65 -"134,217,728","2,147,483,648",11,0,62.81,21339.10 -"134,217,728","2,147,483,648",12,0,66.42,19877.77 -"134,217,728","2,147,483,648",13,0,65.93,19834.39 -"134,217,728","2,147,483,648",14,0,67.10,19882.69 -"134,217,728","2,147,483,648",15,0,72.07,19805.54 -"2,048","32,768",2,0,0.03,191.65 -"4,096","65,536",2,0,0.03,287.50 -"8,192","131,072",2,0,0.03,767.89 -"16,384","262,144",2,0,0.03,799.03 -"32,768","524,288",2,0,0.04,2549.87 -"65,536","1,048,576",2,0,0.04,3530.37 -"131,072","2,097,152",2,0,0.04,6276.05 -"262,144","4,194,304",2,0,0.05,20870.75 -"524,288","8,388,608",2,0,0.07,20225.39 -"1,048,576","16,777,216",2,0,0.35,21390.89 -"2,097,152","33,554,432",2,0,0.80,23297.66 -"4,194,304","67,108,864",2,0,1.69,24324.57 -"8,388,608","134,217,728",2,0,3.48,23378.32 -"16,777,216","268,435,456",2,0,7.05,23628.05 -"33,554,432","536,870,912",2,0,14.21,23419.31 -"67,108,864","1,073,741,824",2,0,28.52,23474.30 -"134,217,728","2,147,483,648",2,0,57.14,23472.29 diff --git a/bench/results/B_D_affi.csv b/bench/results/B_D_affi.csv deleted file mode 100755 index 0f7854a8..00000000 --- a/bench/results/B_D_affi.csv +++ /dev/null @@ -1,25 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,7,62.86,19963.24 -"134,217,728","2,147,483,648",1,7,57.20,23426.24 -"134,217,728","2,147,483,648",2,7,57.20,23426.84 -"134,217,728","2,147,483,648",3,7,62.87,21318.32 -"134,217,728","2,147,483,648",4,7,57.19,23428.55 -"134,217,728","2,147,483,648",5,7,62.87,19989.97 -"134,217,728","2,147,483,648",6,7,62.87,21318.29 -"134,217,728","2,147,483,648",7,7,57.21,23428.74 -"134,217,728","2,147,483,648",8,7,62.88,21317.80 -"134,217,728","2,147,483,648",9,7,57.19,23429.03 -"134,217,728","2,147,483,648",10,7,57.19,23430.83 -"134,217,728","2,147,483,648",11,7,62.88,21317.28 -"134,217,728","2,147,483,648",12,7,49.53,26927.22 -"134,217,728","2,147,483,648",13,7,49.57,26933.33 -"134,217,728","2,147,483,648",14,7,49.55,26923.98 -"134,217,728","2,147,483,648",15,7,49.59,26948.25 -"134,217,728","2,147,483,648",15,0,48.87,27234.60 -"134,217,728","2,147,483,648",15,1,49.17,27216.30 -"134,217,728","2,147,483,648",15,2,49.19,27212.34 -"134,217,728","2,147,483,648",15,3,49.84,27207.04 -"134,217,728","2,147,483,648",15,4,49.23,27179.73 -"134,217,728","2,147,483,648",15,5,48.67,27219.37 -"134,217,728","2,147,483,648",15,6,49.62,26942.26 -"134,217,728","2,147,483,648",15,7,49.54,26926.76 diff --git a/bench/results/CA_affi.csv b/bench/results/CA_affi.csv deleted file mode 100755 index 98b918a0..00000000 --- a/bench/results/CA_affi.csv +++ /dev/null @@ -1,26 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,54.74,24498.34 -"134,217,728","2,147,483,648",0,1,54.74,24495.31 -"134,217,728","2,147,483,648",0,2,54.75,24497.10 -"134,217,728","2,147,483,648",0,3,54.74,24481.20 -"134,217,728","2,147,483,648",0,4,54.73,24495.34 -"134,217,728","2,147,483,648",0,5,54.74,24495.64 -"134,217,728","2,147,483,648",0,6,54.74,24495.90 -"134,217,728","2,147,483,648",0,7,54.74,24485.14 -"2,048","32,768",0,0,0.02,275.63 -"4,096","65,536",0,0,0.02,528.19 -"8,192","131,072",0,0,0.02,1051.23 -"16,384","262,144",0,0,0.02,2023.28 -"32,768","524,288",0,0,0.02,3890.43 -"65,536","1,048,576",0,0,0.03,8124.08 -"131,072","2,097,152",0,0,0.03,16216.00 -"262,144","4,194,304",0,0,0.07,20419.25 -"524,288","8,388,608",0,0,0.19,22044.91 -"1,048,576","16,777,216",0,0,0.40,23237.08 -"2,097,152","33,554,432",0,0,0.82,23900.71 -"4,194,304","67,108,864",0,0,1.69,24163.36 -"8,388,608","134,217,728",0,0,3.40,24344.22 -"16,777,216","268,435,456",0,0,6.82,24408.30 -"33,554,432","536,870,912",0,0,13.67,24456.58 -"67,108,864","1,073,741,824",0,0,27.36,24485.03 -"134,217,728","2,147,483,648",0,0,54.73,24496.14 diff --git a/bench/results/CB_affi.csv b/bench/results/CB_affi.csv deleted file mode 100755 index 30eb3c02..00000000 --- a/bench/results/CB_affi.csv +++ /dev/null @@ -1,34 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,0,63.32,21115.32 -"134,217,728","2,147,483,648",0,1,58.73,22616.25 -"134,217,728","2,147,483,648",0,2,58.75,22549.50 -"134,217,728","2,147,483,648",0,3,63.30,21199.11 -"134,217,728","2,147,483,648",0,4,54.75,24497.23 -"134,217,728","2,147,483,648",0,5,54.74,24495.94 -"134,217,728","2,147,483,648",0,6,54.75,24496.62 -"134,217,728","2,147,483,648",0,7,54.74,24496.53 -"134,217,728","2,147,483,648",0,8,63.32,21189.85 -"134,217,728","2,147,483,648",0,9,58.74,22655.58 -"134,217,728","2,147,483,648",0,10,58.74,22716.60 -"134,217,728","2,147,483,648",0,11,63.31,21197.24 -"134,217,728","2,147,483,648",0,12,58.75,22597.88 -"134,217,728","2,147,483,648",0,13,63.28,21195.90 -"134,217,728","2,147,483,648",0,14,63.26,21188.03 -"134,217,728","2,147,483,648",0,15,58.86,22711.31 -"2,048","32,768",0,4,0.02,238.61 -"4,096","65,536",0,4,0.02,485.34 -"8,192","131,072",0,4,0.02,941.53 -"16,384","262,144",0,4,0.02,1922.75 -"32,768","524,288",0,4,0.02,3756.02 -"65,536","1,048,576",0,4,0.03,7506.86 -"131,072","2,097,152",0,4,0.03,14645.73 -"262,144","4,194,304",0,4,0.08,19733.07 -"524,288","8,388,608",0,4,0.19,22014.06 -"1,048,576","16,777,216",0,4,0.40,23040.64 -"2,097,152","33,554,432",0,4,0.83,23844.16 -"4,194,304","67,108,864",0,4,1.69,24155.24 -"8,388,608","134,217,728",0,4,3.40,24193.92 -"16,777,216","268,435,456",0,4,6.83,24418.50 -"33,554,432","536,870,912",0,4,13.67,24463.27 -"67,108,864","1,073,741,824",0,4,27.36,24484.79 -"134,217,728","2,147,483,648",0,4,54.75,24496.51 diff --git a/bench/results/CC.csv b/bench/results/CC.csv deleted file mode 100755 index 5b405781..00000000 --- a/bench/results/CC.csv +++ /dev/null @@ -1,19 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,2,54.66,24498.47 -"2,048","32,768",0,2,0.03,144.72 -"4,096","65,536",0,2,0.03,196.35 -"8,192","131,072",0,2,0.03,1028.93 -"16,384","262,144",0,2,0.03,765.88 -"32,768","524,288",0,2,0.03,2497.77 -"65,536","1,048,576",0,2,0.03,3782.66 -"131,072","2,097,152",0,2,0.04,11154.85 -"262,144","4,194,304",0,2,0.04,20311.35 -"524,288","8,388,608",0,2,0.10,31235.86 -"1,048,576","16,777,216",0,2,0.32,24258.45 -"2,097,152","33,554,432",0,2,0.75,25957.08 -"4,194,304","67,108,864",0,2,1.60,24383.17 -"8,388,608","134,217,728",0,2,3.31,24805.42 -"16,777,216","268,435,456",0,2,6.74,24379.71 -"33,554,432","536,870,912",0,2,13.59,24558.78 -"67,108,864","1,073,741,824",0,2,27.25,24558.24 -"134,217,728","2,147,483,648",0,2,54.65,24513.57 diff --git a/bench/results/C_D_affi.csv b/bench/results/C_D_affi.csv deleted file mode 100755 index d1f4278f..00000000 --- a/bench/results/C_D_affi.csv +++ /dev/null @@ -1,17 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",0,7,54.72,24491.40 -"134,217,728","2,147,483,648",1,7,54.72,24490.74 -"134,217,728","2,147,483,648",2,7,54.72,24491.14 -"134,217,728","2,147,483,648",3,7,54.72,24493.89 -"134,217,728","2,147,483,648",4,7,54.71,24490.40 -"134,217,728","2,147,483,648",5,7,54.73,24493.04 -"134,217,728","2,147,483,648",6,7,54.72,24492.51 -"134,217,728","2,147,483,648",7,7,54.71,24494.84 -"134,217,728","2,147,483,648",8,7,56.93,23539.90 -"134,217,728","2,147,483,648",9,7,56.93,23538.21 -"134,217,728","2,147,483,648",10,7,56.93,23541.97 -"134,217,728","2,147,483,648",11,7,56.94,21836.20 -"134,217,728","2,147,483,648",12,7,56.94,23538.32 -"134,217,728","2,147,483,648",13,7,56.92,23547.92 -"134,217,728","2,147,483,648",14,7,56.91,21913.73 -"134,217,728","2,147,483,648",15,7,56.91,23549.99 diff --git a/bench/results/D_A_affi.csv b/bench/results/D_A_affi.csv deleted file mode 100755 index ff5439d2..00000000 --- a/bench/results/D_A_affi.csv +++ /dev/null @@ -1,9 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",4,0,45.74,29314.02 -"134,217,728","2,147,483,648",4,1,45.69,29315.77 -"134,217,728","2,147,483,648",4,2,45.76,29309.79 -"134,217,728","2,147,483,648",4,3,45.73,29315.84 -"134,217,728","2,147,483,648",4,4,45.74,29317.55 -"134,217,728","2,147,483,648",4,5,45.75,29309.67 -"134,217,728","2,147,483,648",4,6,45.74,29307.28 -"134,217,728","2,147,483,648",4,7,45.73,29317.66 diff --git a/bench/results/D_B_affi.csv b/bench/results/D_B_affi.csv deleted file mode 100755 index 7227137f..00000000 --- a/bench/results/D_B_affi.csv +++ /dev/null @@ -1,17 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",4,0,62.9,21301.88 -"134,217,728","2,147,483,648",4,1,58.74,22806.47 -"134,217,728","2,147,483,648",4,2,58.74,22771.85 -"134,217,728","2,147,483,648",4,3,62.93,21304.81 -"134,217,728","2,147,483,648",4,4,46.74,28680.66 -"134,217,728","2,147,483,648",4,5,46.73,28683.25 -"134,217,728","2,147,483,648",4,6,46.73,28681.84 -"134,217,728","2,147,483,648",4,7,46.74,28681.1 -"134,217,728","2,147,483,648",4,8,62.9,21298.85 -"134,217,728","2,147,483,648",4,9,58.75,22612.52 -"134,217,728","2,147,483,648",4,10,58.74,22765.7 -"134,217,728","2,147,483,648",4,11,62.9,21304.81 -"134,217,728","2,147,483,648",4,12,58.74,22806.75 -"134,217,728","2,147,483,648",4,13,62.9,21284.54 -"134,217,728","2,147,483,648",4,14,62.9,21294.32 -"134,217,728","2,147,483,648",4,15,58.74,22770.65 diff --git a/bench/results/D_C_affi.csv b/bench/results/D_C_affi.csv deleted file mode 100755 index c1868456..00000000 --- a/bench/results/D_C_affi.csv +++ /dev/null @@ -1,17 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",4,0,54.68,24490.73 -"134,217,728","2,147,483,648",4,1,54.69,24506.81 -"134,217,728","2,147,483,648",4,2,54.68,24468.7 -"134,217,728","2,147,483,648",4,3,54.68,24514.4 -"134,217,728","2,147,483,648",4,4,54.68,24515.3 -"134,217,728","2,147,483,648",4,5,54.69,24467.47 -"134,217,728","2,147,483,648",4,6,54.69,24482.49 -"134,217,728","2,147,483,648",4,7,54.68,24494.96 -"134,217,728","2,147,483,648",4,8,105.99,12649.06 -"134,217,728","2,147,483,648",4,9,105.85,12664.4 -"134,217,728","2,147,483,648",4,10,106,12645.67 -"134,217,728","2,147,483,648",4,11,106.21,12609.33 -"134,217,728","2,147,483,648",4,12,105.84,12666.78 -"134,217,728","2,147,483,648",4,13,105.85,12667.23 -"134,217,728","2,147,483,648",4,14,106.01,12646.66 -"134,217,728","2,147,483,648",4,15,0.51,12245.73 diff --git a/bench/results/D_D_affi.csv b/bench/results/D_D_affi.csv deleted file mode 100755 index 6b8f3fff..00000000 --- a/bench/results/D_D_affi.csv +++ /dev/null @@ -1,18 +0,0 @@ -Message Size (bytes),Total Transport (bytes),Target Affinity,Initiator Affinity,Avg Latency(ms),Bandwidth(MB/s) -"134,217,728","2,147,483,648",7,7,58.78,22790.95 -"134,217,728","2,147,483,648",0,7,49.9,26835.56 -"134,217,728","2,147,483,648",1,7,50.04,26783.6 -"134,217,728","2,147,483,648",2,7,51.19,26164.79 -"134,217,728","2,147,483,648",3,7,51.29,26112.61 -"134,217,728","2,147,483,648",4,7,48.77,27412.99 -"134,217,728","2,147,483,648",5,7,48.69,27491.33 -"134,217,728","2,147,483,648",6,7,57.2,23437.03 -"134,217,728","2,147,483,648",7,7,58.25,23011.38 -"134,217,728","2,147,483,648",0,7,49.93,26821.45 -"134,217,728","2,147,483,648",1,7,50.05,26779.6 -"134,217,728","2,147,483,648",2,7,51.16,26193.63 -"134,217,728","2,147,483,648",3,7,51.36,26111.74 -"134,217,728","2,147,483,648",4,7,48.75,27416 -"134,217,728","2,147,483,648",5,7,48.69,27491.31 -"134,217,728","2,147,483,648",6,7,57.22,21740.47 -"134,217,728","2,147,483,648",7,7,58.26,23000.13 diff --git a/bench/results/ray_rpc.csv b/bench/results/ray_rpc.csv deleted file mode 100644 index c1e5f31a..00000000 --- a/bench/results/ray_rpc.csv +++ /dev/null @@ -1,10 +0,0 @@ -size,avg_us,p50_us,p99_us,bw_gbps -1024,321.0414529312402,313.80902510136366,488.6779934167862,0.006379238510481808 -4096,348.55153784155846,339.17196560651064,511.0709462314844,0.023502980508219272 -16384,381.16832822561264,374.3050619959831,549.4569195434451,0.08596726845732235 -65536,577.4983225855976,552.2669525817037,1670.3959554433823,0.22696516141061573 -262144,1027.6467196251217,958.6790110915899,1614.8729482665658,0.510183110584206 -1048576,1731.2549171037972,1366.1759439855814,2533.0260396003723,1.2113478952644994 -4194304,3272.672554012388,2896.21006231755,6399.256060831249,2.563228633954024 -16777216,8063.031900674105,6645.97493596375,13586.94804366678,4.161515471270144 -67108864,73681.16008583456,72670.11108342558,99878.37704829872,1.8216017207606887 diff --git a/bench/results/slime_rpc.csv b/bench/results/slime_rpc.csv deleted file mode 100644 index 32b71a9c..00000000 --- a/bench/results/slime_rpc.csv +++ /dev/null @@ -1,9 +0,0 @@ -size,avg_us,p50_us,p99_us,bw_gbps -1024,52.21534846350551,51.33496597409248,71.66399154812098,0.0392221839030988 -4096,55.94716197811067,54.75303623825312,66.44800305366516,0.1464238704941838 -16384,67.23385164514184,66.52099546045065,77.82597094774246,0.48737353577403947 -65536,98.09479210525751,98.93800597637892,112.82600462436676,1.3361769487146407 -262144,141.2927381400215,141.50398783385754,154.55996617674828,3.710650716390174 -1048576,399.80457397177815,418.485957197845,435.4199627414346,5.245442740102408 -4194304,1104.8975097946823,1083.2069674506783,1260.1850321516395,7.592204639468157 -16777216,4280.842423904687,4272.751975804567,4447.276005521417,7.838277768092659 diff --git a/dlslime/peer_agent/_mailbox.py b/dlslime/peer_agent/_mailbox.py index 4797c876..63caa9d1 100644 --- a/dlslime/peer_agent/_mailbox.py +++ b/dlslime/peer_agent/_mailbox.py @@ -56,13 +56,17 @@ def __init__(self, agent: "PeerAgent", stream_block_ms: int = 100): def start(self) -> None: """Start the stream listener thread.""" - # Delete any stale stream messages from a previous run before listening. - prefix = ( - f"{self._agent._redis_key_prefix}:" if self._agent._redis_key_prefix else "" - ) - stream_key = f"{prefix}stream:{self._agent.alias}" - self._agent._redis_client.delete(stream_key) - + # Do NOT delete the stream here. A peer whose connect_to runs before + # this agent registers will XADD qp_ready onto stream: + # before our listener exists; deleting here wipes that message. The + # peer marks _notified_peers after its first XADD and will not re-send, + # so losing the message stalls the handshake forever. + # + # Instead, _listen_loop reads from "0-0" and processes every message + # on the stream. Truly stale messages from a prior crashed session + # with the same alias fail endpoint.connect at handshake time — the + # listen loop catches that and continues, so later in-flight qp_ready + # from the current peer session still completes the handshake. self._thread = threading.Thread(target=self._listen_loop, daemon=True) self._thread.start() logger.info( @@ -85,7 +89,11 @@ def _listen_loop(self) -> None: ) stream_key = f"{prefix}stream:{self._agent.alias}" - # Start from the beginning of the (now-freshly-cleared) stream. + # Start from the beginning of the stream. We intentionally do not + # delete the stream on start — see the note in start() — so there may + # be messages from before our listener thread was running (including + # in-flight qp_ready from a peer whose connect_to fired before our + # registration). Processing from "0-0" picks those up. last_id = "0-0" logger.info( diff --git a/docs/benchmark-rpc.md b/docs/benchmark-rpc.md index a5b4253e..9e62aa94 100644 --- a/docs/benchmark-rpc.md +++ b/docs/benchmark-rpc.md @@ -1,18 +1,20 @@ # SlimeRPC Benchmark This document describes the Python RPC microbenchmark added for comparing -SlimeRPC against Ray on a single machine. +SlimeRPC against Ray (and optionally Pulsing) on a single machine. ## What It Measures The benchmark measures round-trip latency and effective bandwidth for a raw bytes echo RPC across payload sizes from `1KB` up to `16MB` by default. -It runs two implementations: +It runs two implementations by default, with a third opt-in baseline: - `SlimeRPC`: RDMA-backed `PeerAgent` RPC echo between `bench-driver` and `bench-worker` - `Ray`: a local `EchoActor` baseline using the same payload sizes and metrics +- `Pulsing` (optional, off by default): a `@pul.remote` actor echo using the + same payload sizes and metrics. Enable with `--with-pulsing`. The comparison script prints: @@ -20,7 +22,9 @@ The comparison script prints: - p50 latency - p99 latency - effective round-trip bandwidth -- Ray/Slime speedup +- `S/Ray` = Ray avg latency / SlimeRPC avg latency (> 1 means SlimeRPC wins) +- `S/Pul` = Pulsing avg latency / SlimeRPC avg latency (only shown when + Pulsing was enabled) ## Files @@ -28,6 +32,7 @@ The comparison script prints: - `bench/python/rpc_bench_slime_worker.py` - `bench/python/rpc_bench_slime_driver.py` - `bench/python/rpc_bench_ray.py` +- `bench/python/rpc_bench_pulsing.py` - `bench/python/rpc_bench_compare.py` ## Prerequisites @@ -38,14 +43,25 @@ Before running the SlimeRPC side: 2. Make sure Redis is reachable through NanoCtrl. 3. Build and install DLSlime with Python bindings and RDMA support. +For the optional Pulsing baseline, also install `pulsing` +(`pip install pulsing`) in the same environment. + ## Run -Default run: +Default run (SlimeRPC + Ray, Pulsing disabled): ```bash bash bench/python/run_rpc_bench.sh ``` +Include the Pulsing baseline: + +```bash +bash bench/python/run_rpc_bench.sh --with-pulsing +# or +WITH_PULSING=1 bash bench/python/run_rpc_bench.sh +``` + Specify control-plane address or buffer size: ```bash @@ -64,12 +80,17 @@ CTRL=http://127.0.0.1:3000 BUF_MB=256 MAX_SIZE_MB=16 \ ## Output -The script writes: +The script always writes: - `bench/results/slime_rpc.csv` - `bench/results/ray_rpc.csv` -and then prints a merged comparison table. +and, when `--with-pulsing` is passed, additionally writes: + +- `bench/results/pulsing_rpc.csv` + +It then prints a merged comparison table. The `S/Pul` column only appears in +the table when the Pulsing CSV is present. ## Stability Notes diff --git a/docs/imgs/assets/docs.png b/docs/imgs/assets/docs.png new file mode 100644 index 00000000..245d3446 Binary files /dev/null and b/docs/imgs/assets/docs.png differ diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5b29070f..a3f4a441 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -84,6 +84,7 @@ plugins: Overview: 总览 Endpoint API: Endpoint API PeerAgent API: PeerAgent API + SlimeRPC: SlimeRPC Deployment: 部署 DLSlimeCache Service: DLSlimeCache 服务 SlimeRPC Benchmark: SlimeRPC 基准测试 @@ -118,6 +119,7 @@ nav: - Overview: guide/index.md - Endpoint API: guide/endpoint-api.md - PeerAgent API: guide/peeragent-api.md + - SlimeRPC: guide/slimerpc.md - Deployment: guide/deployment.md - DLSlimeCache Service: guide/dlslime-cache.md - SlimeRPC Benchmark: guide/benchmark-rpc.md diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index 5483803b..6866f5bc 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -32,6 +32,7 @@ from dlslime.logging import get_logger, set_log_level - [Endpoint API](guide/endpoint-api.md) - [PeerAgent API](guide/peeragent-api.md) +- [SlimeRPC](guide/slimerpc.md) - [DLSlimeCache Service](guide/dlslime-cache.md) - [Versions](versions.md) - `dlslime.cache.CacheClient` diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index f0bd88f6..ab5528eb 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -30,5 +30,6 @@ from dlslime.logging import get_logger, set_log_level - [Endpoint API](guide/endpoint-api.md) - [PeerAgent API](guide/peeragent-api.md) +- [SlimeRPC](guide/slimerpc.md) - [DLSlimeCache 服务](guide/dlslime-cache.md) - [版本](versions.md) diff --git a/docs/src/guide/benchmark-rpc.md b/docs/src/guide/benchmark-rpc.md index a5b4253e..9e62aa94 100644 --- a/docs/src/guide/benchmark-rpc.md +++ b/docs/src/guide/benchmark-rpc.md @@ -1,18 +1,20 @@ # SlimeRPC Benchmark This document describes the Python RPC microbenchmark added for comparing -SlimeRPC against Ray on a single machine. +SlimeRPC against Ray (and optionally Pulsing) on a single machine. ## What It Measures The benchmark measures round-trip latency and effective bandwidth for a raw bytes echo RPC across payload sizes from `1KB` up to `16MB` by default. -It runs two implementations: +It runs two implementations by default, with a third opt-in baseline: - `SlimeRPC`: RDMA-backed `PeerAgent` RPC echo between `bench-driver` and `bench-worker` - `Ray`: a local `EchoActor` baseline using the same payload sizes and metrics +- `Pulsing` (optional, off by default): a `@pul.remote` actor echo using the + same payload sizes and metrics. Enable with `--with-pulsing`. The comparison script prints: @@ -20,7 +22,9 @@ The comparison script prints: - p50 latency - p99 latency - effective round-trip bandwidth -- Ray/Slime speedup +- `S/Ray` = Ray avg latency / SlimeRPC avg latency (> 1 means SlimeRPC wins) +- `S/Pul` = Pulsing avg latency / SlimeRPC avg latency (only shown when + Pulsing was enabled) ## Files @@ -28,6 +32,7 @@ The comparison script prints: - `bench/python/rpc_bench_slime_worker.py` - `bench/python/rpc_bench_slime_driver.py` - `bench/python/rpc_bench_ray.py` +- `bench/python/rpc_bench_pulsing.py` - `bench/python/rpc_bench_compare.py` ## Prerequisites @@ -38,14 +43,25 @@ Before running the SlimeRPC side: 2. Make sure Redis is reachable through NanoCtrl. 3. Build and install DLSlime with Python bindings and RDMA support. +For the optional Pulsing baseline, also install `pulsing` +(`pip install pulsing`) in the same environment. + ## Run -Default run: +Default run (SlimeRPC + Ray, Pulsing disabled): ```bash bash bench/python/run_rpc_bench.sh ``` +Include the Pulsing baseline: + +```bash +bash bench/python/run_rpc_bench.sh --with-pulsing +# or +WITH_PULSING=1 bash bench/python/run_rpc_bench.sh +``` + Specify control-plane address or buffer size: ```bash @@ -64,12 +80,17 @@ CTRL=http://127.0.0.1:3000 BUF_MB=256 MAX_SIZE_MB=16 \ ## Output -The script writes: +The script always writes: - `bench/results/slime_rpc.csv` - `bench/results/ray_rpc.csv` -and then prints a merged comparison table. +and, when `--with-pulsing` is passed, additionally writes: + +- `bench/results/pulsing_rpc.csv` + +It then prints a merged comparison table. The `S/Pul` column only appears in +the table when the Pulsing CSV is present. ## Stability Notes diff --git a/docs/src/guide/index.md b/docs/src/guide/index.md index 1891832f..900dcc44 100644 --- a/docs/src/guide/index.md +++ b/docs/src/guide/index.md @@ -8,5 +8,6 @@ than a one-off transfer library. - [Deployment](deployment.md): run NanoCtrl, Redis, DLSlimeCache, and examples in a predictable layout. - [Endpoint API](endpoint-api.md): use the low-level RDMA endpoint surface directly. - [PeerAgent API](peeragent-api.md): use control-plane discovery, named memory regions, and service-friendly I/O. +- [SlimeRPC](slimerpc.md): define Python services and call them through PeerAgent-backed RDMA RPC. - [DLSlimeCache Service](dlslime-cache.md): service lifecycle and client flow. - [SlimeRPC Benchmark](benchmark-rpc.md): benchmark SlimeRPC against Ray. diff --git a/docs/src/guide/index.zh.md b/docs/src/guide/index.zh.md index 82617c27..61fb03ce 100644 --- a/docs/src/guide/index.zh.md +++ b/docs/src/guide/index.zh.md @@ -7,5 +7,6 @@ - [部署](deployment.md):以可复现方式运行 NanoCtrl、Redis、DLSlimeCache 和示例。 - [Endpoint API](endpoint-api.md):直接使用底层 RDMA endpoint 接口。 - [PeerAgent API](peeragent-api.md):使用控制面发现、命名 memory region 和服务化 I/O。 +- [SlimeRPC](slimerpc.md):定义 Python 服务,并通过 PeerAgent-backed RDMA RPC 调用。 - [DLSlimeCache 服务](dlslime-cache.md):服务生命周期和客户端流程。 - [SlimeRPC 基准测试](benchmark-rpc.md):对比 SlimeRPC 与 Ray。 diff --git a/docs/src/guide/slimerpc.md b/docs/src/guide/slimerpc.md new file mode 100644 index 00000000..ecb203cc --- /dev/null +++ b/docs/src/guide/slimerpc.md @@ -0,0 +1,275 @@ +# SlimeRPC + +SlimeRPC is a typed RPC layer built on PeerAgent and the DLSlime RDMA data path. +Use it when application logic should look like a Python service call, while +connection setup, memory registration, and request/reply transport stay inside +DLSlime. + +The public API is: + +```python +from dlslime.rpc import ( + method, + proxy, + serve, + serve_once, + wait_all, + RpcError, + RemoteRpcError, + RpcTimeoutError, + SoftRnrMonitor, +) +``` + +## Requirements + +SlimeRPC requires: + +- NanoCtrl running and reachable by both peers. +- Redis reachable through NanoCtrl. +- Connected PeerAgents on both client and worker. +- A DLSlime build with the C++ RPC session enabled. If the extension was built + without RPC support, `proxy()` raises an error asking you to rebuild with + `BUILD_RPC=ON`. + +```bash +nanoctrl start +python examples/python/rpc_example.py --ctrl http://127.0.0.1:3000 +``` + +## Basic Service + +Define a service class and mark RPC-callable methods with `@method`: + +```python +from dlslime.rpc import method + + +class CalcService: + @method + def add(self, a: int, b: int) -> int: + return a + b + + @method + def echo(self, msg: str) -> str: + return f"echo: {msg}" +``` + +By default, SlimeRPC serializes arguments and return values with `pickle`. +Both sides must import the same service class definition, because method tags +are assigned deterministically by sorted method name. + +## Connect PeerAgents + +SlimeRPC does not replace PeerAgent discovery. First create and connect the two +agents: + +```python +from dlslime import PeerAgent + +worker = PeerAgent(nanoctrl_url="http://127.0.0.1:3000", alias="worker:0") +driver = PeerAgent(nanoctrl_url="http://127.0.0.1:3000", alias="driver:0") + +driver_conn = driver.connect_to("worker:0", ib_port=1, qp_num=1) +worker_conn = worker.connect_to("driver:0", ib_port=1, qp_num=1) + +driver_conn.wait() +worker_conn.wait() +``` + +## Worker Side + +`serve()` blocks and dispatches requests for one peer: + +```python +from dlslime.rpc import serve + +serve(worker, CalcService(), peer="driver:0") +``` + +For examples or tests, run it in a daemon thread: + +```python +import threading + +threading.Thread( + target=serve, + args=(worker, CalcService(), "driver:0"), + daemon=True, +).start() +``` + +`serve()` can infer `peer` only when exactly one peer is connected. Pass +`peer=...` explicitly in multi-peer services. + +Server dispatch settings: + +| Setting | Purpose | +| -------------------------- | ------------------------------------------------------------------ | +| `max_workers=` | Overrides the handler dispatch pool size. | +| `SLIME_RPC_SERVER_WORKERS` | Default handler pool size when `max_workers` is unset. | +| `@method(parallel=True)` | Allows a handler to run concurrently on the same service instance. | + +Handlers are serialized by default under a per-service lock. This is safer for +stateful services such as model runners, NCCL workers, and shared PyTorch +objects. + +## Client Side + +Create a proxy from the client PeerAgent to the worker alias: + +```python +from dlslime.rpc import proxy + +calc = proxy(driver, "worker:0", CalcService) + +future = calc.add(1, 2) +assert future.wait(timeout=30.0) == 3 + +echo_future = calc.echo("hello") +assert echo_future.wait() == "echo: hello" +``` + +Every proxy call returns a future. Use `wait()` to receive the result: + +```python +result = calc.add(1, 2).wait() +``` + +Wait for several calls in order: + +```python +from dlslime.rpc import wait_all + +futures = [calc.add(i, i * 10) for i in range(5)] +results = wait_all(futures, timeout=30.0) +``` + +## Channel Buffers + +SlimeRPC creates an RDMA mailbox channel per `(local_agent, peer_agent)` pair. +The channel registers receive memory under names like: + +```text +rpc:mailbox:: +``` + +Buffer controls: + +| Setting | Default | Purpose | +| ------------------------- | -------------------------------: | ------------------------------------------- | +| `agent._rpc_buffer_size` | `32_000_000` bytes | Per-inflight request/reply slot size. | +| `agent._rpc_max_inflight` | `SLIME_RPC_MAX_INFLIGHT` or `16` | Number of receive slots. | +| `SLIME_RPC_MAX_INFLIGHT` | `16` | Environment fallback for max inflight RPCs. | + +Set these before creating the proxy or starting `serve()`: + +```python +driver._rpc_buffer_size = 64 * 1024 * 1024 +driver._rpc_max_inflight = 8 +worker._rpc_buffer_size = 64 * 1024 * 1024 +worker._rpc_max_inflight = 8 +``` + +Client and worker should use compatible slot sizes for large payloads. + +## Raw Mode + +Use `@method(raw=True)` to bypass pickle. Raw handlers receive the channel, a +request pointer, and request byte length: + +```python +import ctypes +from dlslime.rpc import method + + +class RawEcho: + @method(raw=True) + def echo(self, channel, ptr, nbytes): + request = bytes((ctypes.c_char * nbytes).from_address(ptr)) + return request.upper() +``` + +The client passes one `bytes` payload and receives `bytes`: + +```python +raw = proxy(driver, "worker:0", RawEcho) +response = raw.echo(b"hello").wait() +assert response == b"HELLO" +``` + +This is the right mode for FlatBuffers, Cap'n Proto, protobuf bytes, or custom +binary layouts. See `examples/python/rpc_flatbuf_example.py` for a complete +FlatBuffers loopback. + +## In-Place Raw Replies + +For very small hot-path handlers, `@method(raw=True, inplace=True)` lets the +handler write the reply directly into the registered send buffer: + +```python +class InplaceService: + @method(raw=True, inplace=True) + def echo(self, req_ptr, req_nbytes, resp_ptr, resp_cap) -> int: + n = min(req_nbytes, resp_cap) + ctypes.memmove(resp_ptr, req_ptr, n) + return n +``` + +The handler must return the number of bytes written. While the handler runs, +the session send lock is held, so use this only for very short handlers. + +## Error Handling + +Common client-visible errors: + +| Error | Meaning | +| ----------------- | ---------------------------------------------- | +| `RpcTimeoutError` | `future.wait(timeout=...)` timed out. | +| `RemoteRpcError` | The remote handler raised an exception. | +| `RpcError` | Base class for SlimeRPC client-visible errors. | + +```python +from dlslime.rpc import RemoteRpcError, RpcTimeoutError + +try: + result = calc.add(1, 2).wait(timeout=1.0) +except RpcTimeoutError: + ... +except RemoteRpcError as exc: + print(exc.type_name, exc.message) +``` + +## One-Shot Serving + +`serve_once()` dispatches exactly one incoming call and returns. It is useful for +tests and controlled loops: + +```python +from dlslime.rpc import serve_once + +serve_once(worker, CalcService(), peer="driver:0") +``` + +## RNR Monitoring + +For RDMA receiver-not-ready debugging, use `SoftRnrMonitor`: + +```python +from dlslime.rpc import SoftRnrMonitor + +monitor = SoftRnrMonitor() +monitor.snapshot() + +# run workload + +print(monitor.delta()) +print(monitor.total_delta()) +``` + +## Full Examples + +- `examples/python/rpc_example.py`: typed pickle RPC loopback. +- `examples/python/rpc_flatbuf_example.py`: raw FlatBuffers RPC loopback. +- `bench/python/rpc_bench_slime_worker.py`: benchmark worker. +- `bench/python/rpc_bench_slime_driver.py`: benchmark driver. diff --git a/docs/src/guide/slimerpc.zh.md b/docs/src/guide/slimerpc.zh.md new file mode 100644 index 00000000..cf92e210 --- /dev/null +++ b/docs/src/guide/slimerpc.zh.md @@ -0,0 +1,201 @@ +# SlimeRPC + +SlimeRPC 是构建在 PeerAgent 和 DLSlime RDMA 数据面上的 typed RPC 层。它适合把应用逻辑写成 +Python 服务调用,同时把连接建立、内存注册和 request/reply 传输交给 DLSlime。 + +公开 API: + +```python +from dlslime.rpc import ( + method, + proxy, + serve, + serve_once, + wait_all, + RpcError, + RemoteRpcError, + RpcTimeoutError, + SoftRnrMonitor, +) +``` + +## 前置条件 + +- NanoCtrl 正在运行,client 和 worker 都能访问。 +- Redis 可通过 NanoCtrl 发现。 +- 两侧 PeerAgent 已连接。 +- DLSlime 构建时启用了 C++ RPC session。若未启用,`proxy()` 会提示需要用 `BUILD_RPC=ON` 重新构建。 + +```bash +nanoctrl start +python examples/python/rpc_example.py --ctrl http://127.0.0.1:3000 +``` + +## 定义服务 + +用 `@method` 标记可远程调用的方法: + +```python +from dlslime.rpc import method + + +class CalcService: + @method + def add(self, a: int, b: int) -> int: + return a + b + + @method + def echo(self, msg: str) -> str: + return f"echo: {msg}" +``` + +默认使用 `pickle` 序列化参数和返回值。两端需要导入同一个 service class 定义,因为 method tag 会按方法名排序确定。 + +## 连接 PeerAgent + +SlimeRPC 依赖 PeerAgent 连接,不替代 discovery 流程: + +```python +from dlslime import PeerAgent + +worker = PeerAgent(nanoctrl_url="http://127.0.0.1:3000", alias="worker:0") +driver = PeerAgent(nanoctrl_url="http://127.0.0.1:3000", alias="driver:0") + +driver_conn = driver.connect_to("worker:0", ib_port=1, qp_num=1) +worker_conn = worker.connect_to("driver:0", ib_port=1, qp_num=1) + +driver_conn.wait() +worker_conn.wait() +``` + +## Worker 侧 + +`serve()` 会阻塞并处理某个 peer 的请求: + +```python +from dlslime.rpc import serve + +serve(worker, CalcService(), peer="driver:0") +``` + +在示例或测试中可放到后台线程: + +```python +import threading + +threading.Thread( + target=serve, + args=(worker, CalcService(), "driver:0"), + daemon=True, +).start() +``` + +`serve()` 只有在恰好连接一个 peer 时才能省略 `peer`。多 peer 服务应显式传 `peer=...`。 + +## Client 侧 + +```python +from dlslime.rpc import proxy, wait_all + +calc = proxy(driver, "worker:0", CalcService) + +future = calc.add(1, 2) +assert future.wait(timeout=30.0) == 3 + +futures = [calc.add(i, i * 10) for i in range(5)] +results = wait_all(futures, timeout=30.0) +``` + +每次 proxy 调用都会返回 future,通过 `wait()` 获取结果。 + +## Channel Buffer + +SlimeRPC 会为每个 `(local_agent, peer_agent)` 创建一个 RDMA mailbox channel,并注册类似下面的 MR: + +```text +rpc:mailbox:: +``` + +常用参数: + +| 设置 | 默认值 | 作用 | +| ------------------------- | -------------------------------: | --------------------------------------- | +| `agent._rpc_buffer_size` | `32_000_000` bytes | 每个 inflight request/reply slot 大小。 | +| `agent._rpc_max_inflight` | `SLIME_RPC_MAX_INFLIGHT` 或 `16` | receive slot 数量。 | +| `SLIME_RPC_MAX_INFLIGHT` | `16` | max inflight 的环境变量默认值。 | + +创建 proxy 或启动 `serve()` 前设置: + +```python +driver._rpc_buffer_size = 64 * 1024 * 1024 +driver._rpc_max_inflight = 8 +worker._rpc_buffer_size = 64 * 1024 * 1024 +worker._rpc_max_inflight = 8 +``` + +## Raw 模式 + +`@method(raw=True)` 会跳过 pickle。handler 接收 channel、请求指针和请求字节数: + +```python +import ctypes +from dlslime.rpc import method + + +class RawEcho: + @method(raw=True) + def echo(self, channel, ptr, nbytes): + request = bytes((ctypes.c_char * nbytes).from_address(ptr)) + return request.upper() +``` + +客户端传入一个 `bytes`,返回值也是 `bytes`: + +```python +raw = proxy(driver, "worker:0", RawEcho) +response = raw.echo(b"hello").wait() +``` + +Raw 模式适合 FlatBuffers、protobuf bytes 或自定义二进制布局。完整示例见 +`examples/python/rpc_flatbuf_example.py`。 + +## In-Place Raw 回复 + +`@method(raw=True, inplace=True)` 允许 handler 直接把回复写入已注册的 send buffer: + +```python +class InplaceService: + @method(raw=True, inplace=True) + def echo(self, req_ptr, req_nbytes, resp_ptr, resp_cap) -> int: + n = min(req_nbytes, resp_cap) + ctypes.memmove(resp_ptr, req_ptr, n) + return n +``` + +handler 返回写入的字节数。执行期间 session send lock 会被持有,因此只适合非常短的 hot-path handler。 + +## 错误处理 + +| 错误 | 含义 | +| ----------------- | --------------------------------- | +| `RpcTimeoutError` | `future.wait(timeout=...)` 超时。 | +| `RemoteRpcError` | 远端 handler 抛出异常。 | +| `RpcError` | SlimeRPC 客户端可见错误基类。 | + +```python +from dlslime.rpc import RemoteRpcError, RpcTimeoutError + +try: + result = calc.add(1, 2).wait(timeout=1.0) +except RpcTimeoutError: + ... +except RemoteRpcError as exc: + print(exc.type_name, exc.message) +``` + +## 示例 + +- `examples/python/rpc_example.py`:typed pickle RPC loopback。 +- `examples/python/rpc_flatbuf_example.py`:raw FlatBuffers RPC loopback。 +- `bench/python/rpc_bench_slime_worker.py`:benchmark worker。 +- `bench/python/rpc_bench_slime_driver.py`:benchmark driver。