Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions src/tether/exporters/fp16_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,35 @@ def parity_gate(
}


def _remove_stale_external_data(dst: Path) -> list[Path]:
"""Delete external-data leftovers from a PRIOR conversion of *this* model.

``onnx.save`` writes the FP16 weights to ``{dst.stem}.bin`` next to ``dst``.
A leftover ``.bin``/``.data`` from an earlier run at the same path would sit
alongside the new one and inflate our size accounting, so we clear it first.

Scope is strictly this model's stem. The previous implementation globbed
every ``*.bin`` / ``*.data`` in ``dst.parent`` and unlinked them — which
destroyed *other* models' weight files whenever a user converted into a
shared export directory (real data-loss bug). ``dst`` itself (the ``.onnx``)
is never touched.

Returns the list of paths actually removed (for logging / tests).
"""
removed: list[Path] = []
for old in dst.parent.glob(f"{dst.stem}*"):
if old == dst:
continue
if old.suffix not in (".bin", ".data"):
continue
try:
old.unlink()
removed.append(old)
except OSError:
pass
return removed


def convert_fp32_to_fp16(
fp32_onnx_path: str | Path,
fp16_onnx_path: str | Path,
Expand Down Expand Up @@ -205,15 +234,10 @@ def convert_fp32_to_fp16(
)

logger.info("[fp16] Saving %s...", dst)
# Remove any leftover external data from a prior run at this path —
# onnx.save would otherwise leave both the old and new .bin on disk
# and our size accounting would double-count.
for pat in ("*.bin", "*.data"):
for old in dst.parent.glob(pat):
try:
old.unlink()
except Exception:
pass
# Clear leftover external data from a prior conversion of THIS model only.
# (Scoped to dst.stem — see _remove_stale_external_data; a blanket
# *.bin/*.data sweep would delete other models' weights in a shared dir.)
_remove_stale_external_data(dst)

onnx.save(
model_fp16,
Expand Down
61 changes: 61 additions & 0 deletions tests/test_fp16_convert_external_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Regression tests for fp16_convert external-data cleanup scoping.

Guards the data-loss bug where the pre-save cleanup globbed every *.bin/*.data
in the destination's parent directory and unlinked them — destroying OTHER
models' weight files when a user converted into a shared export dir. The fix
scopes deletion strictly to the model being converted (dst.stem).
"""
from __future__ import annotations

from pathlib import Path

from tether.exporters.fp16_convert import _remove_stale_external_data


def _touch(p: Path) -> Path:
p.write_bytes(b"x")
return p


def test_removes_this_models_stale_bin(tmp_path: Path) -> None:
dst = tmp_path / "model_fp16.onnx"
stale = _touch(tmp_path / "model_fp16.bin")
removed = _remove_stale_external_data(dst)
assert stale in removed
assert not stale.exists()


def test_removes_legacy_data_variants_for_this_model(tmp_path: Path) -> None:
dst = tmp_path / "model_fp16.onnx"
legacy1 = _touch(tmp_path / "model_fp16.data")
legacy2 = _touch(tmp_path / "model_fp16.onnx.data")
removed = _remove_stale_external_data(dst)
assert legacy1 in removed and legacy2 in removed
assert not legacy1.exists() and not legacy2.exists()


def test_preserves_other_models_weights(tmp_path: Path) -> None:
"""The core data-loss guard: a sibling model's external data must survive."""
dst = tmp_path / "model_fp16.onnx"
_touch(tmp_path / "model_fp16.bin") # ours — will be removed
other_bin = _touch(tmp_path / "other_model.bin")
other_data = _touch(tmp_path / "unrelated.onnx.data")
other_onnx = _touch(tmp_path / "other_model.onnx")

removed = _remove_stale_external_data(dst)

assert other_bin.exists(), "sibling model .bin was destroyed"
assert other_data.exists(), "unrelated .onnx.data was destroyed"
assert other_onnx.exists()
assert other_bin not in removed and other_data not in removed


def test_never_removes_dst_itself(tmp_path: Path) -> None:
dst = _touch(tmp_path / "model_fp16.onnx")
_remove_stale_external_data(dst)
assert dst.exists(), "dst .onnx must never be unlinked"


def test_no_leftovers_is_noop(tmp_path: Path) -> None:
dst = tmp_path / "model_fp16.onnx"
assert _remove_stale_external_data(dst) == []
Loading