Skip to content
Merged
8 changes: 8 additions & 0 deletions .buildkite/pipeline.json.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ def main():
".buildkite/pipelines/check_build_regression.yml.sh",
soft_fail=True))

# Validate the PyTorch allowlist against HuggingFace models when
# triggered from the PyTorch edge pipeline. Runs in a python:3
# container since the build/test images don't include Python.
if config.run_pytorch_tests:
pipeline_steps.append(pipeline_steps.generate_step("Upload PyTorch allowlist validation",
".buildkite/pipelines/validate_pytorch_allowlist.yml.sh",
soft_fail=True))

pipeline["env"] = env
pipeline["steps"] = pipeline_steps
print(json.dumps(pipeline, indent=2))
Expand Down
41 changes: 41 additions & 0 deletions .buildkite/pipelines/validate_pytorch_allowlist.yml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.

cat <<'EOL'
steps:
- label: "Validate PyTorch allowlist :torch:"
key: "validate_pytorch_allowlist"
timeout_in_minutes: 60
command:
- "if [ ! -f dev-tools/extract_model_ops/validate_allowlist.py ]; then echo 'validate_allowlist.py not found, skipping'; exit 0; fi"
- "pip install -r dev-tools/extract_model_ops/requirements.txt"
- "python3 dev-tools/extract_model_ops/validate_allowlist.py --config dev-tools/extract_model_ops/validation_models.json --pt-dir dev-tools/extract_model_ops/es_it_models --verbose"
EOL

# Depend on the build steps so validation doesn't start before the
# pipeline is fully generated.
if [ -n "${ML_BUILD_STEP_KEYS:-}" ]; then
echo ' depends_on:'
IFS=',' read -ra STEP_KEYS <<< "$ML_BUILD_STEP_KEYS"
for key in "${STEP_KEYS[@]}"; do
echo " - \"${key}\""
done
fi

cat <<'EOL'
allow_dependency_failure: true
agents:
image: "python:3.12"
memory: "32G"
ephemeralStorage: "30G"
notify:
- github_commit_status:
context: "Validate PyTorch allowlist"
EOL
26 changes: 1 addition & 25 deletions .buildkite/scripts/steps/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,6 @@ else
-P cmake/run-all-tests-parallel.cmake || TEST_OUTCOME=$?
fi

# --- PyTorch allowlist validation ---
# When triggered from the PyTorch edge pipeline, run the Python-based
# allowlist validation which traces live HuggingFace models with the
# new PyTorch version and verifies every op is in ALLOWED_OPERATIONS.
VALIDATION_OUTCOME=0
if [[ "${GITHUB_PR_COMMENT_VAR_ACTION:-}" == "run_pytorch_tests" ]] && [ -f cmake/run-validation.cmake ]; then
echo "--- Validating PyTorch allowlist against HuggingFace models"
cmake \
-DSOURCE_DIR="$(pwd)" \
-DVALIDATE_CONFIG="$(pwd)/dev-tools/extract_model_ops/validation_models.json" \
-DVALIDATE_PT_DIR="$(pwd)/dev-tools/extract_model_ops/es_it_models" \
-DVALIDATE_VERBOSE=TRUE \
-DOPTIONAL=TRUE \
-P cmake/run-validation.cmake || VALIDATION_OUTCOME=$?

if [[ $VALIDATION_OUTCOME -ne 0 ]]; then
echo "^^^ +++"
echo "Allowlist validation failed — the new PyTorch version may introduce ops not in ALLOWED_OPERATIONS."
echo "See dev-tools/extract_model_ops/README.md for how to update the allowlist."
fi
fi

# Upload test results
echo "--- Uploading test results"
TEST_RESULTS_ARCHIVE=${OS}-${HARDWARE_ARCH}-unit_test_results.tgz
Expand All @@ -139,6 +117,4 @@ else
echo "No test results archive created"
fi

if [[ $TEST_OUTCOME -ne 0 || $VALIDATION_OUTCOME -ne 0 ]]; then
exit 1
fi
exit $TEST_OUTCOME
9 changes: 7 additions & 2 deletions dev-tools/extract_model_ops/torchscript_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,18 @@ def load_and_trace_hf_model(model_name: str, quantize: bool = False,
attention_mask = inputs["attention_mask"]

try:
return torch.jit.trace(
traced = torch.jit.trace(
model, (input_ids, attention_mask), strict=False)
except Exception as exc:
print(f" TRACE WARNING: {exc}", file=sys.stderr)
print(" Falling back to torch.jit.script...", file=sys.stderr)
try:
return torch.jit.script(model)
traced = torch.jit.script(model)
except Exception as exc2:
print(f" SCRIPT ERROR: {exc2}", file=sys.stderr)
return None

# Free the original HF model to reduce peak memory when validating
# many models sequentially.
del model, tokenizer, inputs
return traced
71 changes: 47 additions & 24 deletions dev-tools/extract_model_ops/validate_allowlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"""

import argparse
import gc
import re
import sys
from pathlib import Path
Expand Down Expand Up @@ -104,30 +105,44 @@ def validate_model(model_name: str,
allowed: set[str],
forbidden: set[str],
verbose: bool,
quantize: bool = False) -> bool:
"""Validate one HuggingFace model. Returns True if all ops pass."""
quantize: bool = False,
auto_class: str | None = None,
config_overrides: dict | None = None) -> str:
"""Validate one HuggingFace model.

Returns "pass", "fail" (op validation failed), or "skip" (could not
load/trace — e.g. private model without HF_TOKEN).
"""
label = f"{model_name} (quantized)" if quantize else model_name
print(f" {label}...", file=sys.stderr)
traced = load_and_trace_hf_model(model_name, quantize=quantize)
traced = load_and_trace_hf_model(model_name, quantize=quantize,
auto_class=auto_class,
config_overrides=config_overrides)
if traced is None:
print(f" FAILED (could not load/trace)", file=sys.stderr)
return False
print(f" SKIPPED (could not load/trace)", file=sys.stderr)
return "skip"
ops = collect_inlined_ops(traced)
return check_ops(ops, allowed, forbidden, verbose)
result = "pass" if check_ops(ops, allowed, forbidden, verbose) else "fail"
del traced
gc.collect()
return result


def validate_pt_file(name: str,
pt_path: str,
allowed: set[str],
forbidden: set[str],
verbose: bool) -> bool:
"""Validate a local TorchScript .pt file. Returns True if all ops pass."""
verbose: bool) -> str:
"""Validate a local TorchScript .pt file.

Returns "pass", "fail", or "skip".
"""
print(f" {name} ({pt_path})...", file=sys.stderr)
ops = load_pt_and_collect_ops(pt_path)
if ops is None:
print(f" FAILED (could not load)", file=sys.stderr)
return False
return check_ops(ops, allowed, forbidden, verbose)
print(f" SKIPPED (could not load)", file=sys.stderr)
return "skip"
return "pass" if check_ops(ops, allowed, forbidden, verbose) else "fail"


def main():
Expand All @@ -151,7 +166,7 @@ def main():
print(f"Parsed {len(allowed)} allowed ops and {len(forbidden)} "
f"forbidden ops from {SUPPORTED_OPS_CC.name}", file=sys.stderr)

results: dict[str, bool] = {}
results: dict[str, str] = {}

models = load_model_config(args.config)

Expand All @@ -161,7 +176,9 @@ def main():
for arch, spec in models.items():
results[arch] = validate_model(
spec["model_id"], allowed, forbidden, args.verbose,
quantize=spec["quantized"])
quantize=spec["quantized"],
auto_class=spec.get("auto_class"),
config_overrides=spec.get("config_overrides"))

if args.pt_dir and args.pt_dir.is_dir():
pt_files = sorted(args.pt_dir.glob("*.pt"))
Expand All @@ -175,26 +192,32 @@ def main():

print(file=sys.stderr)
print("=" * 60, file=sys.stderr)
all_pass = all(results.values())
for key, passed in results.items():
status = "PASS" if passed else "FAIL"
for key, status in results.items():
display = status.upper()
if key.startswith("pt:"):
print(f" {key}: {status}", file=sys.stderr)
print(f" {key}: {display}", file=sys.stderr)
else:
spec = models[key]
label = spec["model_id"]
if spec["quantized"]:
label += " (quantized)"
print(f" {key} ({label}): {status}", file=sys.stderr)
print(f" {key} ({label}): {display}", file=sys.stderr)

failed = [a for a, s in results.items() if s == "fail"]
skipped = [a for a, s in results.items() if s == "skip"]
passed = [a for a, s in results.items() if s == "pass"]

print("=" * 60, file=sys.stderr)
if all_pass:
print("All models PASS - no false positives.", file=sys.stderr)
else:
failed = [a for a, p in results.items() if not p]
print(f"FAILED models: {', '.join(failed)}", file=sys.stderr)
print(f"{len(passed)} passed, {len(failed)} failed, "
f"{len(skipped)} skipped", file=sys.stderr)

if skipped:
print(f"Skipped (could not load/trace — may need HF_TOKEN "
f"for private models): {', '.join(skipped)}", file=sys.stderr)
if failed:
print(f"FAILED (op validation): {', '.join(failed)}", file=sys.stderr)

sys.exit(0 if all_pass else 1)
sys.exit(0 if not failed else 1)


if __name__ == "__main__":
Expand Down
Loading