diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py index 9b6d6616e..0ae577685 100755 --- a/.buildkite/pipeline.json.py +++ b/.buildkite/pipeline.json.py @@ -84,6 +84,14 @@ def main(): ".buildkite/pipelines/check_build_regression.yml.sh", soft_fail=True)) + # Validate the PyTorch allowlist against HuggingFace models when + # triggered from the PyTorch edge pipeline. Runs in a python:3 + # container since the build/test images don't include Python. + if config.run_pytorch_tests: + pipeline_steps.append(pipeline_steps.generate_step("Upload PyTorch allowlist validation", + ".buildkite/pipelines/validate_pytorch_allowlist.yml.sh", + soft_fail=True)) + pipeline["env"] = env pipeline["steps"] = pipeline_steps print(json.dumps(pipeline, indent=2)) diff --git a/.buildkite/pipelines/validate_pytorch_allowlist.yml.sh b/.buildkite/pipelines/validate_pytorch_allowlist.yml.sh new file mode 100755 index 000000000..ef2829976 --- /dev/null +++ b/.buildkite/pipelines/validate_pytorch_allowlist.yml.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. + +cat <<'EOL' +steps: + - label: "Validate PyTorch allowlist :torch:" + key: "validate_pytorch_allowlist" + timeout_in_minutes: 60 + command: + - "if [ ! -f dev-tools/extract_model_ops/validate_allowlist.py ]; then echo 'validate_allowlist.py not found, skipping'; exit 0; fi" + - "pip install -r dev-tools/extract_model_ops/requirements.txt" + - "python3 dev-tools/extract_model_ops/validate_allowlist.py --config dev-tools/extract_model_ops/validation_models.json --pt-dir dev-tools/extract_model_ops/es_it_models --verbose" +EOL + +# Depend on the build steps so validation doesn't start before the +# pipeline is fully generated. +if [ -n "${ML_BUILD_STEP_KEYS:-}" ]; then + echo ' depends_on:' + IFS=',' read -ra STEP_KEYS <<< "$ML_BUILD_STEP_KEYS" + for key in "${STEP_KEYS[@]}"; do + echo " - \"${key}\"" + done +fi + +cat <<'EOL' + allow_dependency_failure: true + agents: + image: "python:3.12" + memory: "32G" + ephemeralStorage: "30G" + notify: + - github_commit_status: + context: "Validate PyTorch allowlist" +EOL diff --git a/.buildkite/scripts/steps/run_tests.sh b/.buildkite/scripts/steps/run_tests.sh index 0c5c08125..12b88c1bb 100755 --- a/.buildkite/scripts/steps/run_tests.sh +++ b/.buildkite/scripts/steps/run_tests.sh @@ -105,28 +105,6 @@ else -P cmake/run-all-tests-parallel.cmake || TEST_OUTCOME=$? fi -# --- PyTorch allowlist validation --- -# When triggered from the PyTorch edge pipeline, run the Python-based -# allowlist validation which traces live HuggingFace models with the -# new PyTorch version and verifies every op is in ALLOWED_OPERATIONS. -VALIDATION_OUTCOME=0 -if [[ "${GITHUB_PR_COMMENT_VAR_ACTION:-}" == "run_pytorch_tests" ]] && [ -f cmake/run-validation.cmake ]; then - echo "--- Validating PyTorch allowlist against HuggingFace models" - cmake \ - -DSOURCE_DIR="$(pwd)" \ - -DVALIDATE_CONFIG="$(pwd)/dev-tools/extract_model_ops/validation_models.json" \ - -DVALIDATE_PT_DIR="$(pwd)/dev-tools/extract_model_ops/es_it_models" \ - -DVALIDATE_VERBOSE=TRUE \ - -DOPTIONAL=TRUE \ - -P cmake/run-validation.cmake || VALIDATION_OUTCOME=$? - - if [[ $VALIDATION_OUTCOME -ne 0 ]]; then - echo "^^^ +++" - echo "Allowlist validation failed — the new PyTorch version may introduce ops not in ALLOWED_OPERATIONS." - echo "See dev-tools/extract_model_ops/README.md for how to update the allowlist." - fi -fi - # Upload test results echo "--- Uploading test results" TEST_RESULTS_ARCHIVE=${OS}-${HARDWARE_ARCH}-unit_test_results.tgz @@ -139,6 +117,4 @@ else echo "No test results archive created" fi -if [[ $TEST_OUTCOME -ne 0 || $VALIDATION_OUTCOME -ne 0 ]]; then - exit 1 -fi +exit $TEST_OUTCOME diff --git a/dev-tools/extract_model_ops/torchscript_utils.py b/dev-tools/extract_model_ops/torchscript_utils.py index af2b30f68..da8fb481b 100644 --- a/dev-tools/extract_model_ops/torchscript_utils.py +++ b/dev-tools/extract_model_ops/torchscript_utils.py @@ -145,13 +145,18 @@ def load_and_trace_hf_model(model_name: str, quantize: bool = False, attention_mask = inputs["attention_mask"] try: - return torch.jit.trace( + traced = torch.jit.trace( model, (input_ids, attention_mask), strict=False) except Exception as exc: print(f" TRACE WARNING: {exc}", file=sys.stderr) print(" Falling back to torch.jit.script...", file=sys.stderr) try: - return torch.jit.script(model) + traced = torch.jit.script(model) except Exception as exc2: print(f" SCRIPT ERROR: {exc2}", file=sys.stderr) return None + + # Free the original HF model to reduce peak memory when validating + # many models sequentially. + del model, tokenizer, inputs + return traced diff --git a/dev-tools/extract_model_ops/validate_allowlist.py b/dev-tools/extract_model_ops/validate_allowlist.py index dfb39021a..d7a1ba99c 100644 --- a/dev-tools/extract_model_ops/validate_allowlist.py +++ b/dev-tools/extract_model_ops/validate_allowlist.py @@ -29,6 +29,7 @@ """ import argparse +import gc import re import sys from pathlib import Path @@ -104,30 +105,44 @@ def validate_model(model_name: str, allowed: set[str], forbidden: set[str], verbose: bool, - quantize: bool = False) -> bool: - """Validate one HuggingFace model. Returns True if all ops pass.""" + quantize: bool = False, + auto_class: str | None = None, + config_overrides: dict | None = None) -> str: + """Validate one HuggingFace model. + + Returns "pass", "fail" (op validation failed), or "skip" (could not + load/trace — e.g. private model without HF_TOKEN). + """ label = f"{model_name} (quantized)" if quantize else model_name print(f" {label}...", file=sys.stderr) - traced = load_and_trace_hf_model(model_name, quantize=quantize) + traced = load_and_trace_hf_model(model_name, quantize=quantize, + auto_class=auto_class, + config_overrides=config_overrides) if traced is None: - print(f" FAILED (could not load/trace)", file=sys.stderr) - return False + print(f" SKIPPED (could not load/trace)", file=sys.stderr) + return "skip" ops = collect_inlined_ops(traced) - return check_ops(ops, allowed, forbidden, verbose) + result = "pass" if check_ops(ops, allowed, forbidden, verbose) else "fail" + del traced + gc.collect() + return result def validate_pt_file(name: str, pt_path: str, allowed: set[str], forbidden: set[str], - verbose: bool) -> bool: - """Validate a local TorchScript .pt file. Returns True if all ops pass.""" + verbose: bool) -> str: + """Validate a local TorchScript .pt file. + + Returns "pass", "fail", or "skip". + """ print(f" {name} ({pt_path})...", file=sys.stderr) ops = load_pt_and_collect_ops(pt_path) if ops is None: - print(f" FAILED (could not load)", file=sys.stderr) - return False - return check_ops(ops, allowed, forbidden, verbose) + print(f" SKIPPED (could not load)", file=sys.stderr) + return "skip" + return "pass" if check_ops(ops, allowed, forbidden, verbose) else "fail" def main(): @@ -151,7 +166,7 @@ def main(): print(f"Parsed {len(allowed)} allowed ops and {len(forbidden)} " f"forbidden ops from {SUPPORTED_OPS_CC.name}", file=sys.stderr) - results: dict[str, bool] = {} + results: dict[str, str] = {} models = load_model_config(args.config) @@ -161,7 +176,9 @@ def main(): for arch, spec in models.items(): results[arch] = validate_model( spec["model_id"], allowed, forbidden, args.verbose, - quantize=spec["quantized"]) + quantize=spec["quantized"], + auto_class=spec.get("auto_class"), + config_overrides=spec.get("config_overrides")) if args.pt_dir and args.pt_dir.is_dir(): pt_files = sorted(args.pt_dir.glob("*.pt")) @@ -175,26 +192,32 @@ def main(): print(file=sys.stderr) print("=" * 60, file=sys.stderr) - all_pass = all(results.values()) - for key, passed in results.items(): - status = "PASS" if passed else "FAIL" + for key, status in results.items(): + display = status.upper() if key.startswith("pt:"): - print(f" {key}: {status}", file=sys.stderr) + print(f" {key}: {display}", file=sys.stderr) else: spec = models[key] label = spec["model_id"] if spec["quantized"]: label += " (quantized)" - print(f" {key} ({label}): {status}", file=sys.stderr) + print(f" {key} ({label}): {display}", file=sys.stderr) + + failed = [a for a, s in results.items() if s == "fail"] + skipped = [a for a, s in results.items() if s == "skip"] + passed = [a for a, s in results.items() if s == "pass"] print("=" * 60, file=sys.stderr) - if all_pass: - print("All models PASS - no false positives.", file=sys.stderr) - else: - failed = [a for a, p in results.items() if not p] - print(f"FAILED models: {', '.join(failed)}", file=sys.stderr) + print(f"{len(passed)} passed, {len(failed)} failed, " + f"{len(skipped)} skipped", file=sys.stderr) + + if skipped: + print(f"Skipped (could not load/trace — may need HF_TOKEN " + f"for private models): {', '.join(skipped)}", file=sys.stderr) + if failed: + print(f"FAILED (op validation): {', '.join(failed)}", file=sys.stderr) - sys.exit(0 if all_pass else 1) + sys.exit(0 if not failed else 1) if __name__ == "__main__":