diff --git a/Cargo.lock b/Cargo.lock index 5ff7ef2b6d..ce6e26b34f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1164,9 +1164,7 @@ dependencies = [ "axum", "axum-extra", "base64", - "bmc-explorer", "bmc-vendor", - "bms-dsx-exchange", "byteorder", "bytes", "carbide-api-db", @@ -1175,7 +1173,6 @@ dependencies = [ "carbide-dpa-interface-controller", "carbide-dpf", "carbide-firmware", - "carbide-health-metrics", "carbide-health-report", "carbide-host-support", "carbide-ib-fabric", @@ -1183,14 +1180,19 @@ dependencies = [ "carbide-ipmi", "carbide-ipxe-renderer", "carbide-libmlx", + "carbide-machine-controller", "carbide-macros", "carbide-measured-boot", "carbide-metrics-utils", + "carbide-mqtt-common", "carbide-network", "carbide-network-segment-controller", "carbide-nvlink-manager", + "carbide-power-shelf-controller", "carbide-preingestion-manager", "carbide-prost-builder", + "carbide-rack", + "carbide-rack-controller", "carbide-redfish", "carbide-rpc", "carbide-rpc-utils", @@ -1198,9 +1200,8 @@ dependencies = [ "carbide-site-explorer", "carbide-spdm-controller", "carbide-sqlx-testing", - "carbide-ssh", + "carbide-state-controller-common", "carbide-switch-controller", - "carbide-tls", "carbide-utils", "carbide-uuid", "carbide-version", @@ -1210,7 +1211,6 @@ dependencies = [ "component-manager", "config-version", "const_format", - "crypto-bigint 0.7.0-rc.28", "ctor", "dashmap", "data-encoding", @@ -1226,29 +1226,21 @@ dependencies = [ "http", "http-body-util", "hyper", - "hyper-rustls", - "hyper-timeout", "hyper-util", "ipnetwork", "itertools 0.14.0", "jsonwebtoken", - "k8s-openapi", - "kube", "lazy_static", "libnmxc", - "libnmxm", "libredfish", "librms", "logfmt", "mac_address", - "mockall", "mockito", "mqttea", "nras", "num_cpus", "oauth2", - "oid-registry", - "once_cell", "opentelemetry", "opentelemetry-otlp", "opentelemetry-prometheus", @@ -1259,7 +1251,6 @@ dependencies = [ "prometheus", "prometheus-text-parser", "prost", - "prost-types", "rand 0.10.1", "rcgen", "regex", @@ -1280,7 +1271,6 @@ dependencies = [ "strum 0.28.0", "temp-dir", "tempfile", - "tera", "thiserror 2.0.18", "time", "tokio", @@ -1292,7 +1282,6 @@ dependencies = [ "tonic-reflection", "tower", "tower-http", - "tower-test", "tracing", "tracing-log", "tracing-opentelemetry", @@ -1301,7 +1290,6 @@ dependencies = [ "url", "urlencoding", "uuid", - "version-compare 0.2.1", "x509-parser", ] @@ -1331,7 +1319,7 @@ dependencies = [ "eyre", "futures", "futures-util", - "hickory-proto 0.26.1", + "hickory-proto", "ipnetwork", "itertools 0.14.0", "lazy_static", @@ -2185,6 +2173,52 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-machine-controller" +version = "0.0.0" +dependencies = [ + "async-trait", + "bmc-vendor", + "carbide-api-db", + "carbide-api-model", + "carbide-dpf", + "carbide-firmware", + "carbide-health-metrics", + "carbide-health-report", + "carbide-ipmi", + "carbide-measured-boot", + "carbide-redfish", + "carbide-secrets", + "carbide-state-controller-common", + "carbide-utils", + "carbide-uuid", + "chrono", + "config-version", + "duration-str", + "eyre", + "figment", + "futures", + "futures-util", + "itertools 0.14.0", + "lazy_static", + "libredfish", + "mac_address", + "mockall", + "opentelemetry", + "prost", + "prost-types", + "regex", + "serde", + "serde_json", + "sqlx", + "state-controller", + "tokio", + "tonic-prost-build", + "tracing", + "uuid", + "version-compare 0.2.1", +] + [[package]] name = "carbide-machine-validation" version = "0.0.0" @@ -2259,6 +2293,18 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "carbide-mqtt-common" +version = "0.1.0" +dependencies = [ + "async-trait", + "mqttea", + "opentelemetry", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "carbide-network" version = "0.0.0" @@ -2321,6 +2367,28 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-power-shelf-controller" +version = "0.0.0" +dependencies = [ + "async-trait", + "carbide-api-db", + "carbide-api-model", + "carbide-health-metrics", + "carbide-rack", + "carbide-secrets", + "carbide-utils", + "carbide-uuid", + "config-version", + "eyre", + "librms", + "mac_address", + "opentelemetry", + "sqlx", + "state-controller", + "tracing", +] + [[package]] name = "carbide-preingestion-manager" version = "0.0.1" @@ -2389,6 +2457,60 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-rack" +version = "0.0.0" +dependencies = [ + "async-trait", + "bms-dsx-exchange", + "carbide-api-db", + "carbide-api-model", + "carbide-health-report", + "carbide-mqtt-common", + "carbide-secrets", + "carbide-uuid", + "chrono", + "eyre", + "librms", + "mac_address", + "mqttea", + "opentelemetry", + "serde_json", + "sqlx", + "state-controller", + "tokio", + "tokio-util", + "tonic", + "tracing", +] + +[[package]] +name = "carbide-rack-controller" +version = "0.0.0" +dependencies = [ + "async-trait", + "carbide-api-db", + "carbide-api-model", + "carbide-health-metrics", + "carbide-rack", + "carbide-secrets", + "carbide-utils", + "carbide-uuid", + "chrono", + "config-version", + "duration-str", + "eyre", + "librms", + "mac_address", + "opentelemetry", + "serde", + "serde_json", + "sqlx", + "state-controller", + "tonic", + "tracing", +] + [[package]] name = "carbide-redfish" version = "0.0.1" @@ -2744,6 +2866,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-state-controller-common" +version = "0.0.0" +dependencies = [ + "carbide-utils", + "duration-str", + "serde", + "state-controller", +] + [[package]] name = "carbide-switch-controller" version = "0.0.0" @@ -3945,14 +4077,14 @@ checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" [[package]] name = "dhcproto" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "425ab19f6a915beac79cac8ec2810c1311b502ae14d7f294682081cf5ae4c5bb" +checksum = "c278d2f17dbcb7332f3b31788be67f76017096c5eedc293e1259f2d48b0f891f" dependencies = [ "dhcproto-macros", - "hickory-proto 0.25.2", + "hickory-proto", "ipnet", - "rand 0.9.4", + "rand 0.10.1", "thiserror 2.0.18", ] @@ -5127,7 +5259,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "hickory-proto 0.26.1", + "hickory-proto", "idna 1.1.0", "ipnet", "jni", @@ -5139,28 +5271,6 @@ dependencies = [ "url", ] -[[package]] -name = "hickory-proto" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-util", - "idna 1.1.0", - "ipnet", - "once_cell", - "rand 0.9.4", - "thiserror 2.0.18", - "tinyvec", - "tracing", - "url", -] - [[package]] name = "hickory-proto" version = "0.26.1" @@ -5190,7 +5300,7 @@ dependencies = [ "cfg-if", "futures-util", "hickory-net", - "hickory-proto 0.26.1", + "hickory-proto", "ipconfig", "ipnet", "jni", @@ -9111,12 +9221,8 @@ dependencies = [ "flume", "futures-util", "log", - "rustls-native-certs", - "rustls-pemfile", - "rustls-webpki 0.102.8", "thiserror 2.0.18", "tokio", - "tokio-rustls", "tokio-stream", "tokio-util", ] @@ -9353,7 +9459,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.13", + "rustls-webpki", "subtle", "zeroize", ] @@ -9403,7 +9509,7 @@ dependencies = [ "rustls", "rustls-native-certs", "rustls-platform-verifier-android", - "rustls-webpki 0.103.13", + "rustls-webpki", "security-framework", "security-framework-sys", "webpki-root-certs", @@ -9416,17 +9522,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" -[[package]] -name = "rustls-webpki" -version = "0.102.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted 0.9.0", -] - [[package]] name = "rustls-webpki" version = "0.103.13" diff --git a/crates/api/Cargo.toml b/crates/api/Cargo.toml index 98cb44550e..60ca99b174 100644 --- a/crates/api/Cargo.toml +++ b/crates/api/Cargo.toml @@ -30,65 +30,58 @@ name = "carbide-api" path = "src/main.rs" [dependencies] -# [local-dependencies] -# DO NOT PUT DEPENDENCIES OTHER THAN LOCAL DEPS HERE, THEY SHOULD ALL HAVE 'path =' IN THEM. +# External dependencies. PLEASE KEEP ALPHABETIZED ORDER. bmc-vendor = { path = "../bmc-vendor" } -bmc-explorer = { path = "../bmc-explorer" } -bms-dsx-exchange = { path = "../bms-dsx-exchange" } -config-version = { path = "../config-version", features = ["sqlx"] } +carbide-api-model = { path = "../api-model", default-features = false } +carbide-api-db = { path = "../api-db", default-features = false } +carbide-authn = { path = "../authn" } carbide-dpa-interface-controller = { path = "../dpa-interface-controller" } -carbide-host-support = { path = "../host-support", default-features = false } -carbide-mqtt-common = { path = "../mqtt-common" } -carbide-network = { path = "../network", features = ["sqlx"] } -carbide-rack = { path = "../rack" } -carbide-rack-controller = { path = "../rack-controller" } -carbide-power-shelf-controller = { path = "../power-shelf-controller" } -carbide-secrets = { path = "../secrets" } -carbide-version = { path = "../version" } +carbide-dpf = { path = "../dpf" } carbide-firmware = { path = "../firmware" } carbide-health-report = { path = "../health-report" } -carbide-health-metrics = { path = "../health-metrics" } +carbide-host-support = { path = "../host-support", default-features = false } carbide-ib-fabric = { path = "../ib-fabric" } carbide-ib-partition-controller = { path = "../ib-partition-controller" } -carbide-ipxe-renderer = { path = "../ipxe-renderer" } carbide-ipmi = { path = "../ipmi" } -carbide-redfish = { path = "../redfish" } +carbide-ipxe-renderer = { path = "../ipxe-renderer" } +carbide-libmlx = { path = "../libmlx" } +carbide-machine-controller = { path = "../machine-controller" } +carbide-measured-boot = { path = "../measured-boot", features = ["sqlx"] } +carbide-metrics-utils = { path = "../metrics-utils" } +carbide-mqtt-common = { path = "../mqtt-common" } +carbide-network = { path = "../network", features = ["sqlx"] } carbide-network-segment-controller = { path = "../network-segment-controller" } +carbide-nvlink-manager = { path = "../nvlink-manager" } +carbide-power-shelf-controller = { path = "../power-shelf-controller" } +carbide-preingestion-manager = { path = "../preingestion-manager" } +carbide-rack = { path = "../rack" } +carbide-rack-controller = { path = "../rack-controller" } +carbide-redfish = { path = "../redfish" } +carbide-rpc = { path = "../rpc", features = ["sqlx", "model"] } +carbide-rpc-utils = { path = "../rpc-utils" } +carbide-secrets = { path = "../secrets" } carbide-site-explorer = { path = "../site-explorer" } carbide-spdm-controller = { path = "../spdm-controller" } carbide-state-controller-common = { path = "../state-controller-common" } carbide-switch-controller = { path = "../switch-controller" } -carbide-preingestion-manager = { path = "../preingestion-manager" } -carbide-nvlink-manager = { path = "../nvlink-manager" } +carbide-utils = { path = "../utils", features = ["sqlx"] } +carbide-uuid = { path = "../uuid", features = ["sqlx"] } +carbide-version = { path = "../version" } +component-manager = { path = "../component-manager" } +config-version = { path = "../config-version", features = ["sqlx"] } dns-record = { path = "../dns-record" } -libnmxm = { path = "../libnmxm" } libnmxc = { path = "../libnmxc" } logfmt = { path = "../logfmt" } mqttea = { path = "../mqttea" } -carbide-rpc = { path = "../rpc", features = ["sqlx", "model"] } -carbide-rpc-utils = { path = "../rpc-utils" } -carbide-utils = { path = "../utils", features = ["sqlx"] } -carbide-ssh = { path = "../ssh" } -carbide-tls = { path = "../tls" } -carbide-uuid = { path = "../uuid", features = ["sqlx"] } -carbide-measured-boot = { path = "../measured-boot", features = ["sqlx"] } -carbide-metrics-utils = { path = "../metrics-utils" } -carbide-libmlx = { path = "../libmlx" } -carbide-api-model = { path = "../api-model", default-features = false } -carbide-api-db = { path = "../api-db", default-features = false } -carbide-authn = { path = "../authn" } -spancounter = { path = "../spancounter" } nras = { path = "../nras" } -carbide-dpf = { path = "../dpf" } -component-manager = { path = "../component-manager" } -sqlx-query-tracing = { path = "../sqlx-query-tracing" } +spancounter = { path = "../spancounter" } state-controller = { path = "../state-controller" } -# DO NOT PUT DEPENDENCIES OTHER THAN LOCAL DEPS HERE, THEY SHOULD ALL HAVE 'path =' IN THEM. +sqlx-query-tracing = { path = "../sqlx-query-tracing" } -#these are alphabetized +# External dependencies. PLEASE KEEP ALPHABETIZED ORDER. ansi-to-html = { workspace = true } arc-swap = { workspace = true } -askama = { features = ["serde_json"], workspace = true } +askama = { workspace = true, features = ["serde_json"] } askama_escape = { workspace = true } asn1-rs = { workspace = true } async-trait = { workspace = true } @@ -97,15 +90,14 @@ axum-extra = { workspace = true, features = ["cookie", "cookie-private", "typed- base64 = { workspace = true } byteorder = { workspace = true } bytes = { workspace = true } -casbin = { features = ["glob"], workspace = true } +casbin = { workspace = true, features = ["glob"] } chrono = { workspace = true } clap = { workspace = true } -crypto-bigint = { workspace = true } dashmap = { workspace = true } data-encoding = { workspace = true } duration-str = { workspace = true } eyre = { workspace = true } -figment = { features = ["env", "toml"], workspace = true } +figment = { workspace = true, features = ["env", "toml"] } futures = { workspace = true } futures-util = { workspace = true } hex = { workspace = true } @@ -114,32 +106,20 @@ hostname = { workspace = true } http = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true, features = ["full"] } -hyper-rustls = { workspace = true } -hyper-timeout = { workspace = true } hyper-util = { workspace = true } ipnetwork = { workspace = true, features = ["serde"] } itertools = { workspace = true } -jsonwebtoken = { features = ["rust_crypto"], workspace = true } -k8s-openapi = { features = ["latest"], workspace = true } -kube = { default-features = false, features = [ - "runtime", - "derive", - "client", - "rustls-tls", -], workspace = true } +jsonwebtoken = { workspace = true, features = ["rust_crypto"] } lazy_static = { workspace = true } libredfish = { workspace = true } librms = { workspace = true } mac_address = { workspace = true } num_cpus = { workspace = true } -oauth2 = { default-features = false, workspace = true } -oid-registry = { workspace = true } +oauth2 = { workspace = true, default-features = false } opentelemetry = { workspace = true, features = ["logs"] } opentelemetry-otlp = { workspace = true, features = ["grpc-tonic"] } -opentelemetry-prometheus.workspace = true -opentelemetry-semantic-conventions = { features = [ - "semconv_experimental", -], workspace = true } +opentelemetry-prometheus = { workspace = true } +opentelemetry-semantic-conventions = { workspace = true, features = ["semconv_experimental"] } opentelemetry_sdk = { workspace = true, features = [ "logs", "rt-tokio", @@ -149,19 +129,15 @@ opentelemetry_sdk = { workspace = true, features = [ pkcs1 = { workspace = true } p256 = { workspace = true } prometheus = { workspace = true } -prost-types = { workspace = true } rand = { workspace = true } regex = { workspace = true } -reqwest = { default-features = false, features = [ - "rustls", - "stream", -], workspace = true } +reqwest = { workspace = true, default-features = false, features = ["rustls", "stream"] } rsa = { workspace = true } rumqttc = { workspace = true } rustls = { workspace = true } rustls-pemfile = { workspace = true } rustls-pki-types = { workspace = true } -serde = { features = ["derive"], workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } serde_yaml = { workspace = true } sha2 = { workspace = true } @@ -178,7 +154,6 @@ sqlx = { workspace = true, features = [ ] } strum = { workspace = true } temp-dir = { workspace = true } -tera = { workspace = true } thiserror = { workspace = true } time = { workspace = true } tokio = { workspace = true } @@ -189,25 +164,16 @@ toml = { workspace = true } tonic = { workspace = true } tonic-reflection = { workspace = true } tower = { workspace = true } -tower-http = { features = [ - "add-extension", - "auth", - "normalize-path", -], workspace = true } +tower-http = { workspace = true, features = ["add-extension", "auth", "normalize-path"] } tracing = { workspace = true } tracing-log = { workspace = true } tracing-opentelemetry = { workspace = true } -tracing-subscriber = { features = [ - "env-filter", - "local-time", -], workspace = true } -tss-esapi = { optional = true, workspace = true } -url = { features = ["serde"], workspace = true } +tracing-subscriber = { workspace = true, features = ["env-filter", "local-time"] } +tss-esapi = { workspace = true, optional = true } +url = { workspace = true, features = ["serde"] } urlencoding = { workspace = true } -uuid = { features = ["v4", "serde"], workspace = true } -version-compare = { workspace = true } -x509-parser = { features = ["verify"], workspace = true } -#these are alphabetized +uuid = { workspace = true, features = ["v4", "serde"] } +x509-parser = { workspace = true, features = ["verify"] } [features] default = ["linux-build"] @@ -217,28 +183,26 @@ linux-build = ["tss-esapi"] carbide-version = { path = "../version" } [dev-dependencies] -figment = { features = ["env", "test", "toml"], workspace = true } -ctor = { workspace = true } -lazy_static = { workspace = true } -const_format = { workspace = true } -mockall = { workspace = true } -rcgen = { workspace = true } +# External dependencies. PLEASE KEEP ALPHABETIZED ORDER. +carbide-ib-fabric = { path = "../ib-fabric", features = ["test-support"] } +carbide-machine-controller = { path = "../machine-controller", features = ["test-support"] } carbide-macros = { path = "../macros" } -carbide-sqlx-testing = { path = "../sqlx-testing", default-features = false } -carbide-prost-builder = { path = "../prost-builder" } carbide-nvlink-manager = { path = "../nvlink-manager", features = ["test-support"] } +carbide-prost-builder = { path = "../prost-builder" } carbide-rack = { path = "../rack", features = ["test-support"] } carbide-redfish = { path = "../redfish", features = ["test-support"] } +carbide-sqlx-testing = { path = "../sqlx-testing", default-features = false } carbide-utils = { path = "../utils", features = ["test-support"] } -state-controller = { path = "../state-controller", features = ["test-support"] } -carbide-ib-fabric = { path = "../ib-fabric", features = ["test-support"] } prometheus-text-parser = { path = "../prometheus-text-parser" } -prost = { workspace = true } -tower-test = { workspace = true } -hyper = { features = ["client", "http1"], workspace = true } -http = { workspace = true } +state-controller = { path = "../state-controller", features = ["test-support"] } + +# External dependencies. PLEASE KEEP ALPHABETIZED ORDER. +const_format = { workspace = true } +ctor = { workspace = true } +figment = { workspace = true, features = ["env", "test", "toml"] } mockito = { workspace = true } -once_cell = { workspace = true } +prost = { workspace = true } +rcgen = { workspace = true } tempfile = { workspace = true } [lints] diff --git a/crates/api/src/api.rs b/crates/api/src/api.rs index 3615362609..4b3b7de1df 100644 --- a/crates/api/src/api.rs +++ b/crates/api/src/api.rs @@ -31,6 +31,8 @@ use ::rpc::protos::dns::{ }; use ::rpc::protos::{measured_boot as measured_boot_pb, mlx_device as mlx_device_pb}; use carbide_ib_fabric::ib::IBFabricManager; +use carbide_machine_controller::dpf::DpfOperations; +use carbide_machine_controller::io::MachineStateControllerIO; use carbide_rack::bms_client::BmsDsxExchangeHandle; use carbide_redfish::libredfish::RedfishClientPool; use carbide_site_explorer::EndpointExplorer; @@ -57,8 +59,6 @@ use crate::dynamic_settings::DynamicSettings; use crate::ethernet_virtualization::EthVirtData; use crate::logging::log_limiter::LogLimiter; use crate::scout_stream::ConnectionRegistry; -use crate::state_controller::machine::dpf::DpfOperations; -use crate::state_controller::machine::io::MachineStateControllerIO; use crate::{CarbideError, CarbideResult}; pub struct Api { diff --git a/crates/api/src/attestation/measured_boot.rs b/crates/api/src/attestation/measured_boot.rs index 39e1fae2fe..0b8cf617fb 100644 --- a/crates/api/src/attestation/measured_boot.rs +++ b/crates/api/src/attestation/measured_boot.rs @@ -21,6 +21,7 @@ use std::io::Write; use std::process::Command; use byteorder::{BigEndian, ByteOrder}; +use carbide_machine_controller::{MeasuringOutcome, handle_measuring_state}; use carbide_uuid::machine::MachineId; use carbide_uuid::measured_boot::MeasurementReportId; use db::db_read::DbReader; @@ -31,7 +32,6 @@ use sqlx::PgConnection; use temp_dir::TempDir; use crate::attestation::get_ek_cert_by_machine_id; -use crate::state_controller::machine::{MeasuringOutcome, handle_measuring_state}; use crate::{CarbideError, CarbideResult}; /// VerifyQuoteState is a simple enum used to track diff --git a/crates/api/src/cfg/file.rs b/crates/api/src/cfg/file.rs index 97a774b56f..c08be945c5 100644 --- a/crates/api/src/cfg/file.rs +++ b/crates/api/src/cfg/file.rs @@ -24,6 +24,11 @@ use bmc_vendor::BMCVendor; use carbide_authn::config::{AllowedCertCriteria, TrustConfig}; use carbide_firmware::FirmwareConfig; use carbide_ib_fabric::config::{IBFabricConfig, IbFabricDefinition}; +use carbide_machine_controller::config::power_manager::default_power_options; +use carbide_machine_controller::config::{ + BomValidationConfig, FirmwareGlobal, MachineStateControllerConfig, + MachineStateHandlerSiteConfig, MachineValidationConfig, PowerManagerOptions, TimePeriod, +}; use carbide_nvlink_manager::config::NvLinkConfig; use carbide_preingestion_manager::PreingestionManagerConfig; use carbide_rack_controller::config::{RackValidationConfig, RmsConfig}; @@ -51,12 +56,6 @@ use model::tenant::identity_config::SigningAlgorithm; use regex::Regex; use serde::{Deserialize, Deserializer, Serialize}; -use crate::state_controller::machine::config::power_manager::default_power_options; -use crate::state_controller::machine::config::{ - BomValidationConfig, FirmwareGlobal, MachineStateControllerConfig, - MachineStateHandlerSiteConfig, PowerManagerOptions, -}; - static BF2_NIC: &str = "24.47.2682"; static BF2_BMC: &str = "BF-25.10-20"; static BF2_CEC: &str = "4-15"; @@ -1751,15 +1750,6 @@ pub struct MachineUpdater { pub max_concurrent_machine_updates_percent: Option, } -/// A UTC time window defined by a start and end timestamp. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -pub struct TimePeriod { - /// Start of the time window (UTC). - pub start: chrono::DateTime, - /// End of the time window (UTC). - pub end: chrono::DateTime, -} - pub fn default_max_find_by_ids() -> u32 { 100 } @@ -1831,70 +1821,6 @@ impl MeasuredBootMetricsCollectorConfig { } } -/// Controls which machine validation tests are active. -#[derive(Default, Clone, Copy, Debug, Deserialize, Serialize)] -pub enum MachineValidationTestSelectionMode { - /// Only update tests in DB that are specified in the - /// `tests` config list. - #[default] - Default, - /// Enable all tests in DB, but allow per-test overrides - /// from the `tests` config list. - EnableAll, - /// Disable all tests in DB, but allow per-test overrides - /// from the `tests` config list. - DisableAll, -} - -/// Configuration for machine validation tests (memory -/// latency, SSD I/O, etc.) run after ingestion to verify -/// hardware health. -#[derive(Default, Clone, Debug, Deserialize, Serialize)] -pub struct MachineValidationConfig { - /// Enables machine validation testing. - #[serde(default)] - pub enabled: bool, - - /// Controls whether to run all tests, no tests, or use - /// per-test configuration. - #[serde(default)] - pub test_selection_mode: MachineValidationTestSelectionMode, - - #[serde( - default = "MachineValidationConfig::default_run_interval", - deserialize_with = "deserialize_duration", - serialize_with = "as_std_duration" - )] - pub run_interval: std::time::Duration, - - /// Per-test enable/disable overrides. - #[serde(default)] - pub tests: Vec, -} - -/// Per-test override for machine validation. -/// -/// Example: -/// ```toml -/// tests = [ -/// { id = "MmMemLatency", enable = true }, -/// { id = "FioSSD", enable = true } -/// ] -/// ``` -#[derive(Default, Clone, Debug, Deserialize, Serialize)] -pub struct MachineValidationTestConfig { - /// Unique test identifier (e.g., "MmMemLatency"). - pub id: String, - /// Whether this test is enabled. - pub enable: bool, -} - -impl MachineValidationConfig { - const fn default_run_interval() -> std::time::Duration { - std::time::Duration::from_secs(60) - } -} - /// The VPC isolation behavior enforced within a site. #[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/api/src/compat.rs b/crates/api/src/compat.rs index e3d249b610..bc27cdcec5 100644 --- a/crates/api/src/compat.rs +++ b/crates/api/src/compat.rs @@ -130,8 +130,8 @@ impl BuildAndFillLegacyFields for ForgeAgentControlResponse { #[cfg(test)] mod tests { - use ::rpc::common; use ::rpc::protos::mlx_device; + use ::rpc::{common, scout_firmware_upgrade as sfu}; use carbide_uuid::machine_validation::MachineValidationId; use super::*; @@ -339,17 +339,17 @@ mod tests { fn firmware_upgrade_converts_to_legacy_task_json() { let upgrade_task_id = uuid::Uuid::new_v4().to_string(); let action = fac::Action::FirmwareUpgrade(fac::FirmwareUpgrade { - task: Some(fac::ScoutFirmwareUpgradeTask { + task: Some(sfu::ScoutFirmwareUpgradeTask { upgrade_task_id: upgrade_task_id.clone(), component_type: "cpld".to_string(), target_version: "1.2.3".to_string(), - script: Some(fac::FileArtifact { + script: Some(sfu::FileArtifact { url: "http://pxe/script.sh".to_string(), sha256: "abc".to_string(), }), execution_timeout_seconds: 30, artifact_download_timeout_seconds: 10, - file_artifacts: vec![fac::FileArtifact { + file_artifacts: vec![sfu::FileArtifact { url: "http://pxe/fw.bin".to_string(), sha256: "def".to_string(), }], @@ -385,17 +385,17 @@ mod tests { fn response_from_firmware_upgrade_sets_typed_payload_and_legacy_pairs() { let response = ForgeAgentControlResponse::build_and_fill_legacy_fields( fac::Action::FirmwareUpgrade(fac::FirmwareUpgrade { - task: Some(fac::ScoutFirmwareUpgradeTask { + task: Some(sfu::ScoutFirmwareUpgradeTask { upgrade_task_id: uuid::Uuid::new_v4().to_string(), component_type: "cpld".to_string(), target_version: "1.2.3".to_string(), - script: Some(fac::FileArtifact { + script: Some(sfu::FileArtifact { url: "http://pxe/script.sh".to_string(), sha256: "abc".to_string(), }), execution_timeout_seconds: 30, artifact_download_timeout_seconds: 10, - file_artifacts: vec![fac::FileArtifact { + file_artifacts: vec![sfu::FileArtifact { url: "http://pxe/fw.bin".to_string(), sha256: "def".to_string(), }], diff --git a/crates/api/src/handlers/attestation.rs b/crates/api/src/handlers/attestation.rs index 5cb5b0e8d8..feed451e80 100644 --- a/crates/api/src/handlers/attestation.rs +++ b/crates/api/src/handlers/attestation.rs @@ -16,6 +16,7 @@ */ use ::rpc::common::MachineIdList; use ::rpc::forge::{self as rpc}; +use carbide_machine_controller::handler::attestation::trigger_attestation; use carbide_uuid::machine::MachineId; use db::ObjectFilter; use model::machine::machine_search_config::MachineSearchConfig; @@ -24,7 +25,6 @@ use tonic::{Request, Response, Status}; use crate::CarbideError; use crate::api::{Api, log_machine_id, log_request_data}; -use crate::state_controller::machine::handler::attestation::trigger_attestation; pub(crate) async fn trigger_machine_attestation( api: &Api, diff --git a/crates/api/src/handlers/instance.rs b/crates/api/src/handlers/instance.rs index 292dd91671..60913c3ab0 100644 --- a/crates/api/src/handlers/instance.rs +++ b/crates/api/src/handlers/instance.rs @@ -1637,7 +1637,7 @@ pub async fn force_delete_instance( id: instance.machine_id.to_string(), })?; - crate::state_controller::machine::handler::release_vpc_dpu_loopback( + carbide_machine_controller::handler::release_vpc_dpu_loopback( &snapshot, Some(api.common_pools.as_ref()), &mut txn, diff --git a/crates/api/src/handlers/machine_scout.rs b/crates/api/src/handlers/machine_scout.rs index dde0dd3147..4a297d0cc2 100644 --- a/crates/api/src/handlers/machine_scout.rs +++ b/crates/api/src/handlers/machine_scout.rs @@ -16,7 +16,7 @@ */ use ::rpc::forge::ForgeAgentControlResponse; use ::rpc::model::machine::get_action_for_dpu_state; -use ::rpc::{forge as rpc, forge_agent_control_response as fac}; +use ::rpc::{forge as rpc, forge_agent_control_response as fac, scout_firmware_upgrade as sfu}; use model::machine::machine_search_config::MachineSearchConfig; use model::machine::{ BomValidating, CleanupContext, CleanupState, FailureCause, FailureDetails, FailureSource, @@ -334,7 +334,7 @@ pub(crate) async fn forge_agent_control( machine_id = %machine.id, "Sending firmware upgrade task to scout", ); - let action = match serde_json::from_str::(task_json) + let action = match serde_json::from_str::(task_json) { Ok(task) => Action::FirmwareUpgrade(fac::FirmwareUpgrade { task: Some(task) }), Err(e) => { diff --git a/crates/api/src/handlers/machine_validation.rs b/crates/api/src/handlers/machine_validation.rs index a946b7d610..783bea2f74 100644 --- a/crates/api/src/handlers/machine_validation.rs +++ b/crates/api/src/handlers/machine_validation.rs @@ -15,6 +15,9 @@ * limitations under the License. */ use ::rpc::forge::{self as rpc, GetMachineValidationExternalConfigResponse}; +use carbide_machine_controller::config::machine_validation::{ + MachineValidationConfig, MachineValidationTestSelectionMode, +}; use config_version::ConfigVersion; use db::{self, machine_validation_suites}; use model::machine::machine_search_config::MachineSearchConfig; @@ -32,7 +35,6 @@ use tonic::{Request, Response, Status}; use crate::CarbideError; use crate::api::{Api, log_request_data}; -use crate::cfg::file::{MachineValidationConfig, MachineValidationTestSelectionMode}; use crate::handlers::utils::convert_and_log_machine_id; /// Temporary: when `true`, MV mutation handlers return `FailedPrecondition` and do not write to the DB. diff --git a/crates/api/src/machine_update_manager/dpu_nic_firmware.rs b/crates/api/src/machine_update_manager/dpu_nic_firmware.rs index 5060bf9da8..80a79619b1 100644 --- a/crates/api/src/machine_update_manager/dpu_nic_firmware.rs +++ b/crates/api/src/machine_update_manager/dpu_nic_firmware.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use std::sync::atomic::Ordering; use async_trait::async_trait; +use carbide_machine_controller::dpf::DpfOperations; use carbide_uuid::machine::MachineId; use db::dpu_machine_update; use model::dpu_machine_update::{DpuMachineUpdate, OutdatedDpfDpu}; @@ -30,7 +31,6 @@ use super::dpu_nic_firmware_metrics::DpuNicFirmwareUpdateMetrics; use super::machine_update_module::MachineUpdateModule; use crate::cfg::file::CarbideConfig; use crate::machine_update_manager::MachineUpdateManager; -use crate::state_controller::machine::dpf::DpfOperations; use crate::{CarbideResult, DatabaseError}; /// DpuNicFirmwareUpdate is a module used [MachineUpdateManager](crate::machine_update_manager::MachineUpdateManager) diff --git a/crates/api/src/machine_update_manager/mod.rs b/crates/api/src/machine_update_manager/mod.rs index 1920f68dc6..d0406cd788 100644 --- a/crates/api/src/machine_update_manager/mod.rs +++ b/crates/api/src/machine_update_manager/mod.rs @@ -26,6 +26,7 @@ use std::sync::Arc; use std::sync::atomic::Ordering; use std::time::Duration; +use carbide_machine_controller::dpf::DpfOperations; use carbide_utils::periodic_timer::PeriodicTimer; use carbide_uuid::machine::MachineId; use db::work_lock_manager::WorkLockManagerHandle; @@ -44,7 +45,6 @@ use self::dpu_nic_firmware::DpuNicFirmwareUpdate; use self::metrics::MachineUpdateManagerMetrics; use crate::CarbideResult; use crate::cfg::file::{CarbideConfig, MaxConcurrentUpdates}; -use crate::state_controller::machine::dpf::DpfOperations; /// The MachineUpdateManager periodically runs [modules](machine_update_module::MachineUpdateModule) to initiate upgrades of machine components. /// On each iteration the MachineUpdateManager will: diff --git a/crates/api/src/machine_validation/mod.rs b/crates/api/src/machine_validation/mod.rs index 6e4c8290a7..de47adfd3a 100644 --- a/crates/api/src/machine_validation/mod.rs +++ b/crates/api/src/machine_validation/mod.rs @@ -21,6 +21,7 @@ use std::default::Default; use std::io; use std::sync::Arc; +use carbide_machine_controller::config::machine_validation::MachineValidationConfig; use carbide_utils::periodic_timer::PeriodicTimer; use db::ObjectFilter; use tokio::task::JoinSet; @@ -28,7 +29,6 @@ use tokio_util::sync::CancellationToken; use self::metrics::MachineValidationMetrics; use crate::CarbideResult; -use crate::cfg::file::MachineValidationConfig; pub struct MachineValidationManager { database_connection: sqlx::PgPool, diff --git a/crates/api/src/run.rs b/crates/api/src/run.rs index be384b5905..078d0e71fe 100644 --- a/crates/api/src/run.rs +++ b/crates/api/src/run.rs @@ -75,7 +75,7 @@ pub async fn run( } else { setup_logging( debug, - crate::state_controller::machine::extra_logfmt_logging_fields(), + carbide_machine_controller::extra_logfmt_logging_fields(), None::, ) .wrap_err("setup_telemetry")? diff --git a/crates/api/src/setup.rs b/crates/api/src/setup.rs index e7cb404a53..2f4100aa34 100644 --- a/crates/api/src/setup.rs +++ b/crates/api/src/setup.rs @@ -33,6 +33,12 @@ use carbide_ib_partition_controller::context::IBPartitionStateHandlerServices; use carbide_ib_partition_controller::handler::IBPartitionStateHandler; use carbide_ib_partition_controller::io::IBPartitionStateControllerIO; use carbide_ipmi::IPMITool; +use carbide_machine_controller::context::MachineStateHandlerServices; +use carbide_machine_controller::dpf::{ + CarbideBmcPasswordProvider, CarbideDPFLabeler, DpfOperations, DpfSdkOps, +}; +use carbide_machine_controller::handler::MachineStateHandlerBuilder; +use carbide_machine_controller::io::MachineStateControllerIO; use carbide_network_segment_controller::context::NetworkSegmentStateHandlerServices; use carbide_network_segment_controller::handler::NetworkSegmentStateHandler; use carbide_network_segment_controller::io::NetworkSegmentStateControllerIO; @@ -102,9 +108,6 @@ use crate::measured_boot::metrics_collector::MeasuredBootMetricsCollector; use crate::mqtt_state_change_hook::hook::MqttStateChangeHook; use crate::scout_stream::ConnectionRegistry; use crate::state_controller::common_services::CommonStateHandlerServices; -use crate::state_controller::machine::context::MachineStateHandlerServices; -use crate::state_controller::machine::handler::MachineStateHandlerBuilder; -use crate::state_controller::machine::io::MachineStateControllerIO; use crate::{attestation, db_init, ethernet_virtualization, listener}; /// The resolved set of network declarations passed from `start_api` into @@ -644,58 +647,51 @@ pub async fn start_api( // Create DPF SDK and initialize CRs if enabled // If we end up having static DPUDeployments, we could move the static CRs outside of the API. - let dpf_sdk: Option> = - if carbide_config.dpf.enabled { - tracing::info!("Initializing DPF SDK"); - let repo = carbide_dpf::KubeRepository::new() - .await - .map_err(|e| eyre::eyre!("Failed to create DPF repository: {e}"))?; - - let provider = crate::state_controller::machine::dpf::CarbideBmcPasswordProvider::new( - credential_manager.clone(), - ); - - let mandatory_services = carbide_config.dpf.services.clone(); - let dpf_mandatory_services = vec![ - crate::dpf_services::dts_service(&mandatory_services.dts), - crate::dpf_services::doca_hbn_service(&mandatory_services.doca_hbn), - crate::dpf_services::dhcp_server_service(&mandatory_services.dhcp_server), - crate::dpf_services::dpu_agent_service(&mandatory_services.dpu_agent), - crate::dpf_services::fmds_service(&mandatory_services.fmds), - crate::dpf_services::otelcol_service(&mandatory_services.otel), - ]; - - // This is just temparary code until we make v2 only option. (just 2 weeks) - // Soon v2 flag will be removed and will become only mode for dpf handling. - let init_config = carbide_dpf::InitDpfResourcesConfig { - bfb_url: carbide_config.dpf.bfb_url.clone(), - flavor_name: carbide_config.dpf.flavor_name.clone(), - deployment_name: carbide_config.dpf.deployment_name.clone(), - services: dpf_mandatory_services, - }; + let dpf_sdk: Option> = if carbide_config.dpf.enabled { + tracing::info!("Initializing DPF SDK"); + let repo = carbide_dpf::KubeRepository::new() + .await + .map_err(|e| eyre::eyre!("Failed to create DPF repository: {e}"))?; + + let provider = CarbideBmcPasswordProvider::new(credential_manager.clone()); + + let mandatory_services = carbide_config.dpf.services.clone(); + let dpf_mandatory_services = vec![ + crate::dpf_services::dts_service(&mandatory_services.dts), + crate::dpf_services::doca_hbn_service(&mandatory_services.doca_hbn), + crate::dpf_services::dhcp_server_service(&mandatory_services.dhcp_server), + crate::dpf_services::dpu_agent_service(&mandatory_services.dpu_agent), + crate::dpf_services::fmds_service(&mandatory_services.fmds), + crate::dpf_services::otelcol_service(&mandatory_services.otel), + ]; + + // This is just temparary code until we make v2 only option. (just 2 weeks) + // Soon v2 flag will be removed and will become only mode for dpf handling. + let init_config = carbide_dpf::InitDpfResourcesConfig { + bfb_url: carbide_config.dpf.bfb_url.clone(), + flavor_name: carbide_config.dpf.flavor_name.clone(), + deployment_name: carbide_config.dpf.deployment_name.clone(), + services: dpf_mandatory_services, + }; - let sdk = carbide_dpf::DpfSdkBuilder::new(repo, carbide_dpf::NAMESPACE, provider) - .with_labeler( - crate::state_controller::machine::dpf::CarbideDPFLabeler::new( - carbide_config.dpf.node_label_key.clone(), - ), - ) - .with_bmc_password_refresh_interval(std::time::Duration::from_secs(60)) - .with_join_set(join_set) - .initialize(&init_config) - .await - .map_err(|err| eyre::eyre!("Failed to initialize DPF SDK: {err}"))?; - - Some(Arc::new( - crate::state_controller::machine::dpf::DpfSdkOps::new( - Arc::new(sdk), - db_pool.clone(), - join_set, - )?, + let sdk = carbide_dpf::DpfSdkBuilder::new(repo, carbide_dpf::NAMESPACE, provider) + .with_labeler(CarbideDPFLabeler::new( + carbide_config.dpf.node_label_key.clone(), )) - } else { - None - }; + .with_bmc_password_refresh_interval(std::time::Duration::from_secs(60)) + .with_join_set(join_set) + .initialize(&init_config) + .await + .map_err(|err| eyre::eyre!("Failed to initialize DPF SDK: {err}"))?; + + Some(Arc::new(DpfSdkOps::new( + Arc::new(sdk), + db_pool.clone(), + join_set, + )?)) + } else { + None + }; let component_manager = if let Some(cd_config) = &carbide_config.component_manager { match component_manager::component_manager::build_component_manager( diff --git a/crates/api/src/state_controller/mod.rs b/crates/api/src/state_controller/mod.rs index 2d086487fa..cb3ca789f1 100644 --- a/crates/api/src/state_controller/mod.rs +++ b/crates/api/src/state_controller/mod.rs @@ -16,4 +16,3 @@ */ pub mod common_services; -pub mod machine; diff --git a/crates/api/src/tests/common/api_fixtures/mod.rs b/crates/api/src/tests/common/api_fixtures/mod.rs index 6484de7311..8f99a96e94 100644 --- a/crates/api/src/tests/common/api_fixtures/mod.rs +++ b/crates/api/src/tests/common/api_fixtures/mod.rs @@ -33,6 +33,16 @@ use carbide_ib_partition_controller::context::IBPartitionStateHandlerServices; use carbide_ib_partition_controller::handler::IBPartitionStateHandler; use carbide_ib_partition_controller::io::IBPartitionStateControllerIO; use carbide_ipmi::IPMITool; +use carbide_machine_controller::config::{ + BomValidationConfig, FirmwareGlobal, MachineStateControllerConfig, MachineValidationConfig, + PowerManagerOptions, +}; +use carbide_machine_controller::context::MachineStateHandlerServices; +use carbide_machine_controller::dpf::DpfOperations; +use carbide_machine_controller::handler::{ + MachineStateHandler, MachineStateHandlerBuilder, PowerOptionConfig, ReachabilityParams, +}; +use carbide_machine_controller::io::MachineStateControllerIO; use carbide_network_segment_controller::context::NetworkSegmentStateHandlerServices; use carbide_network_segment_controller::handler::NetworkSegmentStateHandler; use carbide_network_segment_controller::io::NetworkSegmentStateControllerIO; @@ -124,11 +134,10 @@ use crate::api::metrics::ApiMetricsEmitter; use crate::cfg::file::{ CarbideConfig, ComputeAllocationEnforcement, DpaConfig, DpaInterfaceStateControllerConfig, DpuConfig as InitialDpuConfig, FnnConfig, IbPartitionStateControllerConfig, ListenMode, - MachineUpdater, MachineValidationConfig, MeasuredBootMetricsCollectorConfig, MqttAuthConfig, - NetworkSecurityGroupConfig, NetworkSegmentStateControllerConfig, - PowerShelfStateControllerConfig, RackStateControllerConfig, SpdmConfig, - SpdmStateControllerConfig, SwitchStateControllerConfig, VmaasConfig, VpcPeeringPolicy, - default_max_find_by_ids, + MachineUpdater, MeasuredBootMetricsCollectorConfig, MqttAuthConfig, NetworkSecurityGroupConfig, + NetworkSegmentStateControllerConfig, PowerShelfStateControllerConfig, + RackStateControllerConfig, SpdmConfig, SpdmStateControllerConfig, SwitchStateControllerConfig, + VmaasConfig, VpcPeeringPolicy, default_max_find_by_ids, }; use crate::ethernet_virtualization::{EthVirtData, SiteFabricPrefixList}; use crate::logging::level_filter::ActiveLevel; @@ -136,15 +145,6 @@ use crate::logging::log_limiter::LogLimiter; use crate::measured_boot::convert_vec; use crate::scout_stream; use crate::state_controller::common_services::CommonStateHandlerServices; -use crate::state_controller::machine::config::{ - BomValidationConfig, FirmwareGlobal, MachineStateControllerConfig, PowerManagerOptions, -}; -use crate::state_controller::machine::context::MachineStateHandlerServices; -use crate::state_controller::machine::dpf::DpfOperations; -use crate::state_controller::machine::handler::{ - MachineStateHandler, MachineStateHandlerBuilder, PowerOptionConfig, ReachabilityParams, -}; -use crate::state_controller::machine::io::MachineStateControllerIO; use crate::tests::common::api_fixtures::endpoint_explorer::MockEndpointExplorer; use crate::tests::common::api_fixtures::managed_host::ManagedHostConfig; use crate::tests::common::api_fixtures::network_segment::{ diff --git a/crates/api/src/tests/dpf/duplicate_events.rs b/crates/api/src/tests/dpf/duplicate_events.rs index c9713a0d52..8add7fc6e0 100644 --- a/crates/api/src/tests/dpf/duplicate_events.rs +++ b/crates/api/src/tests/dpf/duplicate_events.rs @@ -29,13 +29,13 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use carbide_dpf::DpuPhase; +use carbide_machine_controller::dpf::{DpfOperations, MockDpfOperations}; use carbide_redfish::libredfish::test_support::RedfishSimAction; use carbide_uuid::machine::MachineId; use libredfish::SystemPowerControl; use model::machine::{DpfState, DpuInitState, ManagedHostState}; use tokio::time::timeout; -use crate::state_controller::machine::dpf::{DpfOperations, MockDpfOperations}; use crate::tests::common::api_fixtures::{ TestEnvOverrides, TestManagedHost, create_managed_host_with_dpf, create_test_env_with_overrides, get_config, reboot_completed, diff --git a/crates/api/src/tests/dpf/happy_path.rs b/crates/api/src/tests/dpf/happy_path.rs index 76f3bd4c74..b8f59de18e 100644 --- a/crates/api/src/tests/dpf/happy_path.rs +++ b/crates/api/src/tests/dpf/happy_path.rs @@ -21,14 +21,14 @@ use std::sync::Arc; use std::time::Duration; use carbide_dpf::DpuPhase; +use carbide_machine_controller::dpf::DpfOperations; use model::machine::ManagedHostState; use tokio::time::timeout; -use crate::state_controller::machine::dpf::DpfOperations; - const TEST_TIMEOUT: Duration = Duration::from_secs(30); -use crate::state_controller::machine::dpf::MockDpfOperations; +use carbide_machine_controller::dpf::MockDpfOperations; + use crate::tests::common::api_fixtures::{ TestEnvOverrides, create_managed_host_with_dpf, create_test_env_with_overrides, get_config, }; diff --git a/crates/api/src/tests/dpf/reprovisioning.rs b/crates/api/src/tests/dpf/reprovisioning.rs index 1db6388f5b..4bfdcf63e4 100644 --- a/crates/api/src/tests/dpf/reprovisioning.rs +++ b/crates/api/src/tests/dpf/reprovisioning.rs @@ -26,13 +26,13 @@ use std::sync::{Arc, Mutex}; use std::time::Duration; use carbide_dpf::DpuPhase; +use carbide_machine_controller::dpf::{DpfOperations, MockDpfOperations}; use carbide_uuid::machine::MachineId; use model::machine::{ DpfState, DpuReprovisionStates, InstanceState, ManagedHostState, ReprovisionState, }; use tokio::time::timeout; -use crate::state_controller::machine::dpf::{DpfOperations, MockDpfOperations}; use crate::tests::common::api_fixtures::{ TestEnvOverrides, TestManagedHost, create_managed_host_with_dpf, create_managed_host_with_dpf_multi, create_test_env_with_overrides, get_config, diff --git a/crates/api/src/tests/dpf/stale_labels.rs b/crates/api/src/tests/dpf/stale_labels.rs index e38442c4f3..fa0c166366 100644 --- a/crates/api/src/tests/dpf/stale_labels.rs +++ b/crates/api/src/tests/dpf/stale_labels.rs @@ -28,11 +28,11 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use carbide_dpf::DpuPhase; +use carbide_machine_controller::dpf::{DpfOperations, MockDpfOperations}; use carbide_uuid::machine::MachineId; use model::machine::{DpfState, DpuInitState, FailureCause, FailureDetails, ManagedHostState}; use tokio::time::timeout; -use crate::state_controller::machine::dpf::{DpfOperations, MockDpfOperations}; use crate::tests::common::api_fixtures::{ TestEnvOverrides, TestManagedHost, create_managed_host_with_dpf, create_test_env_with_overrides, get_config, diff --git a/crates/api/src/tests/dpf/waiting_for_ready.rs b/crates/api/src/tests/dpf/waiting_for_ready.rs index 3916e0b2b6..6fd1d9b994 100644 --- a/crates/api/src/tests/dpf/waiting_for_ready.rs +++ b/crates/api/src/tests/dpf/waiting_for_ready.rs @@ -23,6 +23,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use carbide_dpf::DpuPhase; +use carbide_machine_controller::dpf::{DpfOperations, MockDpfOperations}; use carbide_redfish::libredfish::RedfishClientPool; use carbide_redfish::libredfish::test_support::RedfishSimAction; use carbide_uuid::machine::MachineId; @@ -30,7 +31,6 @@ use libredfish::SystemPowerControl; use model::machine::{DpfState, DpuInitState, ManagedHostState}; use tokio::time::timeout; -use crate::state_controller::machine::dpf::{DpfOperations, MockDpfOperations}; use crate::tests::common::api_fixtures::{ TestEnvOverrides, TestManagedHost, create_managed_host_with_dpf, create_test_env_with_overrides, get_config, reboot_completed, diff --git a/crates/api/src/tests/dpu_nic_firmware.rs b/crates/api/src/tests/dpu_nic_firmware.rs index 1502d14249..43eb90493d 100644 --- a/crates/api/src/tests/dpu_nic_firmware.rs +++ b/crates/api/src/tests/dpu_nic_firmware.rs @@ -17,6 +17,7 @@ use std::collections::HashSet; use std::string::ToString; +use carbide_machine_controller::health_report::create_host_update_health_report_dpufw; use common::api_fixtures::{create_managed_host, create_managed_host_multi_dpu, create_test_env}; use model::machine::LoadSnapshotOptions; use model::machine_update_module::{ @@ -26,7 +27,6 @@ use model::machine_update_module::{ use crate::CarbideResult; use crate::machine_update_manager::dpu_nic_firmware::DpuNicFirmwareUpdate; use crate::machine_update_manager::machine_update_module::MachineUpdateModule; -use crate::state_controller::machine::health_report::create_host_update_health_report_dpufw; use crate::tests::common; use crate::tests::common::api_fixtures::TestManagedHost; use crate::tests::common::api_fixtures::test_managed_host::TestManagedHostSnapshots; diff --git a/crates/api/src/tests/dpu_reprovisioning.rs b/crates/api/src/tests/dpu_reprovisioning.rs index ff74bb57d1..af890c20de 100644 --- a/crates/api/src/tests/dpu_reprovisioning.rs +++ b/crates/api/src/tests/dpu_reprovisioning.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; +use carbide_machine_controller::handler::MachineStateHandlerBuilder; use carbide_redfish::libredfish::test_support::RedfishSimAction; use chrono::Utc; use common::api_fixtures::{create_managed_host_multi_dpu, create_test_env, reboot_completed}; @@ -32,7 +33,6 @@ use rpc::forge::forge_server::Forge; use rpc::forge_agent_control_response::Action; use rpc::model::instance::snapshot::instance_snapshot_derive_status; -use crate::state_controller::machine::handler::MachineStateHandlerBuilder; use crate::tests::common; use crate::tests::common::api_fixtures::dpu::create_dpu_machine_in_waiting_for_network_install; use crate::tests::common::api_fixtures::instance::TestInstance; diff --git a/crates/api/src/tests/host_bmc_firmware_test.rs b/crates/api/src/tests/host_bmc_firmware_test.rs index 2204264477..084a8e59c3 100644 --- a/crates/api/src/tests/host_bmc_firmware_test.rs +++ b/crates/api/src/tests/host_bmc_firmware_test.rs @@ -22,6 +22,8 @@ use std::os::unix::fs::PermissionsExt; use std::str::FromStr; use std::time::Duration; +use carbide_machine_controller::config::{FirmwareGlobal, TimePeriod}; +use carbide_machine_controller::handler::MAX_FIRMWARE_UPGRADE_RETRIES; use carbide_preingestion_manager::PreingestionManager; use carbide_redfish::libredfish::test_support::RedfishSimAction; use carbide_uuid::machine::MachineId; @@ -49,10 +51,8 @@ use tokio::time::sleep; use tonic::Request; use crate::CarbideResult; -use crate::cfg::file::{CarbideConfig, TimePeriod}; +use crate::cfg::file::CarbideConfig; use crate::machine_update_manager::MachineUpdateManager; -use crate::state_controller::machine::config::FirmwareGlobal; -use crate::state_controller::machine::handler::MAX_FIRMWARE_UPGRADE_RETRIES; use crate::tests::common; use crate::tests::common::api_fixtures::managed_host::HardwareInfoTemplate; use crate::tests::common::api_fixtures::{ diff --git a/crates/api/src/tests/machine_admin_force_delete.rs b/crates/api/src/tests/machine_admin_force_delete.rs index 22f2799465..e354ab89fb 100644 --- a/crates/api/src/tests/machine_admin_force_delete.rs +++ b/crates/api/src/tests/machine_admin_force_delete.rs @@ -26,6 +26,7 @@ use ::rpc::forge::{ }; use carbide_ib_fabric::config::IBFabricConfig; use carbide_ib_fabric::ib::{self, IBFabricManager}; +use carbide_machine_controller::dpf::{DpfOperations, MockDpfOperations}; use carbide_uuid::infiniband::IBPartitionId; use carbide_uuid::machine::{MachineId, MachineType}; use common::api_fixtures::dpu::create_dpu_machine; @@ -47,7 +48,6 @@ use tonic::Request; use crate::api::Api; use crate::attestation as attest; -use crate::state_controller::machine::dpf::{DpfOperations, MockDpfOperations}; use crate::tests::common; async fn get_partition_status(api: &Api, ib_partition_id: IBPartitionId) -> IbPartitionStatus { diff --git a/crates/api/src/tests/machine_creator.rs b/crates/api/src/tests/machine_creator.rs index a385eb4f8c..a2cef5e5c5 100644 --- a/crates/api/src/tests/machine_creator.rs +++ b/crates/api/src/tests/machine_creator.rs @@ -20,6 +20,7 @@ use std::net::IpAddr; use std::str::FromStr; use std::sync::Arc; +use carbide_machine_controller::handler::MachineStateHandlerBuilder; use carbide_site_explorer::MachineCreator; use carbide_site_explorer::config::SiteExplorerConfig; use carbide_site_explorer::errors::SiteExplorerError; @@ -39,7 +40,6 @@ use rpc::{BlockDevice, DiscoveryData, DiscoveryInfo, MachineDiscoveryInfo}; use tonic::Request; use crate::cfg::file::DpuConfig as InitialDpuConfig; -use crate::state_controller::machine::handler::MachineStateHandlerBuilder; use crate::tests::common; use crate::tests::common::api_fixtures::TestEnvOverrides; use crate::tests::common::api_fixtures::dpu::DpuConfig; diff --git a/crates/api/src/tests/machine_setup.rs b/crates/api/src/tests/machine_setup.rs new file mode 100644 index 0000000000..a96b32c9ed --- /dev/null +++ b/crates/api/src/tests/machine_setup.rs @@ -0,0 +1,82 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use std::collections::HashMap; + +/// Verify that `oem_manager_profiles` from the site config is forwarded to `machine_setup`. +/// +/// This test catches regressions where the argument gets dropped or replaced with an empty map. +#[tokio::test] +async fn test_oem_manager_profiles_passed_to_machine_setup() { + use carbide_redfish::libredfish::RedfishClientPool; + use carbide_redfish::libredfish::test_support::{RedfishSim, RedfishSimAction}; + use libredfish::BiosProfileType; + use libredfish::model::service_root::RedfishVendor; + + let mut config = crate::tests::common::api_fixtures::get_config(); + // Build an oem_manager_profiles map with a Dell R760 PSU Hot Spare setting. + // This mirrors the fix for the Dell R760 PSU fan issue (nvbugs-5834644). + config.oem_manager_profiles = HashMap::from([( + RedfishVendor::Dell, + HashMap::from([( + "r760".to_string(), + HashMap::from([( + BiosProfileType::Performance, + HashMap::from([( + "ServerPwr.1.PSRapidOn".to_string(), + serde_json::Value::String("Disabled".to_string()), + )]), + )]), + )]), + )]); + + use carbide_redfish::libredfish::RedfishAuth; + use forge_secrets::credentials::{CredentialKey, CredentialType}; + + let sim = RedfishSim::default(); + let timepoint = sim.timepoint(); + let client = sim + .create_client( + "test-host", + None, + RedfishAuth::Key(CredentialKey::HostRedfish { + credential_type: CredentialType::SiteDefault, + }), + None, + ) + .await + .unwrap(); + + let result = carbide_machine_controller::handler::call_machine_setup_and_handle_no_dpu_error( + client.as_ref(), + None, + 1, + &config.machine_state_handler_site_config(), + ) + .await; + + assert!(result.is_ok()); + + let actions = sim.actions_since(&timepoint).all_hosts(); + assert_eq!(actions.len(), 1); + assert_eq!( + actions[0], + RedfishSimAction::MachineSetup { + oem_manager_profiles: config.oem_manager_profiles, + } + ); +} diff --git a/crates/api/src/tests/machine_states.rs b/crates/api/src/tests/machine_states.rs index 27b1a70206..096c07ec9c 100644 --- a/crates/api/src/tests/machine_states.rs +++ b/crates/api/src/tests/machine_states.rs @@ -20,6 +20,9 @@ use std::sync::atomic::AtomicBool; use ::rpc::measured_boot::FromGrpc; use base64::prelude::*; +use carbide_machine_controller::context::MachineStateHandlerContextObjects; +use carbide_machine_controller::handler::{MachineStateHandlerBuilder, handler_host_power_control}; +use carbide_machine_controller::metrics::MachineMetrics; use carbide_redfish::libredfish::test_support::RedfishSimAction; use carbide_uuid::machine::MachineId; use carbide_uuid::machine_validation::MachineValidationId; @@ -65,11 +68,6 @@ use tonic::{Code, Request}; use crate::handlers::measured_boot::rpc_forge::MachineDiscoveryInfo; use crate::measured_boot::convert_vec; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; -use crate::state_controller::machine::handler::{ - MachineStateHandlerBuilder, handler_host_power_control, -}; -use crate::state_controller::machine::metrics::MachineMetrics; use crate::tests::common; use crate::tests::common::api_fixtures::dpu::{ TEST_DOCA_HBN_VERSION, TEST_DOCA_TELEMETRY_VERSION, TEST_DPU_AGENT_VERSION, diff --git a/crates/api/src/tests/machine_update_manager.rs b/crates/api/src/tests/machine_update_manager.rs index 1f44d3927b..afff7b2f62 100644 --- a/crates/api/src/tests/machine_update_manager.rs +++ b/crates/api/src/tests/machine_update_manager.rs @@ -20,6 +20,7 @@ use std::sync::{Arc, Mutex}; use std::time::Duration; use async_trait::async_trait; +use carbide_machine_controller::health_report::create_host_update_health_report; use carbide_uuid::machine::MachineId; use common::api_fixtures::create_test_env; use figment::Figment; @@ -37,7 +38,6 @@ use crate::CarbideResult; use crate::cfg::file::CarbideConfig; use crate::machine_update_manager::MachineUpdateManager; use crate::machine_update_manager::machine_update_module::MachineUpdateModule; -use crate::state_controller::machine::health_report::create_host_update_health_report; use crate::tests::common; use crate::tests::common::api_fixtures::create_managed_host; diff --git a/crates/api/src/tests/machine_validation.rs b/crates/api/src/tests/machine_validation.rs index ead690b6a9..434f366034 100644 --- a/crates/api/src/tests/machine_validation.rs +++ b/crates/api/src/tests/machine_validation.rs @@ -18,6 +18,9 @@ use std::str::FromStr; use std::time::SystemTime; +use carbide_machine_controller::config::machine_validation::{ + MachineValidationConfig, MachineValidationTestConfig, MachineValidationTestSelectionMode, +}; use carbide_uuid::machine_validation::MachineValidationId; use common::api_fixtures::{ TestEnvOverrides, create_host_with_machine_validation, create_test_env, @@ -33,9 +36,6 @@ use rpc::Timestamp; use rpc::forge::forge_server::Forge; use rpc::forge::{MachineValidationTestNextVersionRequest, MachineValidationTestVerfiedRequest}; -use crate::cfg::file::{ - MachineValidationConfig, MachineValidationTestConfig, MachineValidationTestSelectionMode, -}; use crate::handlers::machine_validation::apply_config_on_startup; use crate::tests::common; diff --git a/crates/api/src/tests/mod.rs b/crates/api/src/tests/mod.rs index af8893c89d..6fa4eb0dd4 100644 --- a/crates/api/src/tests/mod.rs +++ b/crates/api/src/tests/mod.rs @@ -75,6 +75,7 @@ mod machine_interfaces; mod machine_metadata; mod machine_network; mod machine_power; +mod machine_setup; mod machine_states; mod machine_topology; pub mod machine_update_manager; diff --git a/crates/machine-controller/Cargo.toml b/crates/machine-controller/Cargo.toml new file mode 100644 index 0000000000..cdbdcd7620 --- /dev/null +++ b/crates/machine-controller/Cargo.toml @@ -0,0 +1,77 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[package] +name = "carbide-machine-controller" +version = "0.0.0" +edition.workspace = true +license.workspace = true +authors.workspace = true + +[features] +default = [] +test-support = ["dep:mockall"] + +[dependencies] +bmc-vendor = { path = "../bmc-vendor" } +carbide-api-db = { path = "../api-db", default-features = false } +carbide-api-model = { path = "../api-model", default-features = false } +carbide-dpf = { path = "../dpf", default-features = false } +carbide-health-report = { path = "../health-report", default-features = false } +carbide-health-metrics = { path = "../health-metrics" } +carbide-utils = { path = "../utils", default-features = false } +carbide-firmware = { path = "../firmware", default-features = false } +carbide-ipmi = { path = "../ipmi", default-features = false } +carbide-measured-boot = { path = "../measured-boot", default-features = false } +carbide-redfish = { path = "../redfish", default-features = false } +carbide-secrets = { path = "../secrets" } +carbide-state-controller-common = { path = "../state-controller-common", default-features = false } +carbide-uuid = { path = "../uuid", default-features = false } +config-version = { path = "../config-version", default-features = false } +state-controller = { path = "../state-controller" } + +async-trait = { workspace = true } +chrono = { workspace = true } +duration-str = { workspace = true } +eyre = { workspace = true } +futures = { workspace = true } +futures-util = { workspace = true } +itertools = { workspace = true } +lazy_static = { workspace = true } +libredfish = { workspace = true } +mac_address = { workspace = true } +mockall = { workspace = true, optional = true } +opentelemetry = { workspace = true } +prost = { workspace = true } +prost-types = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +sqlx = { workspace = true } +tracing = { workspace = true } +tokio = { workspace = true } +uuid = { workspace = true, features = ["v4", "serde"] } +version-compare = { workspace = true } + +[dev-dependencies] +figment = { workspace = true, features = ["env", "test", "toml"] } +regex = { workspace = true } +lazy_static = { workspace = true } + +[build-dependencies] +tonic-prost-build = "0.14" + +[lints] +workspace = true diff --git a/crates/machine-controller/build.rs b/crates/machine-controller/build.rs new file mode 100644 index 0000000000..d52f264928 --- /dev/null +++ b/crates/machine-controller/build.rs @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +use std::path::PathBuf; + +fn main() -> Result<(), Box> { + let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); + tonic_prost_build::configure() + .out_dir(out_dir) + .type_attribute( + "scout_firmware_upgrade.ScoutFirmwareUpgradeTask", + "#[derive(serde::Serialize, serde::Deserialize)]", + ) + .type_attribute( + "scout_firmware_upgrade.FileArtifact", + "#[derive(serde::Serialize, serde::Deserialize)]", + ) + .compile_protos(&["scout_firmware_upgrade.proto"], &["../rpc/proto"])?; + + Ok(()) +} diff --git a/crates/api/src/state_controller/machine/config/bom_validation.rs b/crates/machine-controller/src/config/bom_validation.rs similarity index 100% rename from crates/api/src/state_controller/machine/config/bom_validation.rs rename to crates/machine-controller/src/config/bom_validation.rs diff --git a/crates/api/src/state_controller/machine/config/controller.rs b/crates/machine-controller/src/config/controller.rs similarity index 100% rename from crates/api/src/state_controller/machine/config/controller.rs rename to crates/machine-controller/src/config/controller.rs diff --git a/crates/api/src/state_controller/machine/config/firmware_global.rs b/crates/machine-controller/src/config/firmware_global.rs similarity index 98% rename from crates/api/src/state_controller/machine/config/firmware_global.rs rename to crates/machine-controller/src/config/firmware_global.rs index 78f59faa54..7929af7757 100644 --- a/crates/api/src/state_controller/machine/config/firmware_global.rs +++ b/crates/machine-controller/src/config/firmware_global.rs @@ -95,7 +95,7 @@ pub struct FirmwareGlobal { } impl FirmwareGlobal { - #[cfg(test)] + #[cfg(feature = "test-support")] pub fn test_default() -> Self { FirmwareGlobal { autoupdate: true, @@ -114,7 +114,7 @@ impl FirmwareGlobal { } } - #[cfg(test)] + #[cfg(feature = "test-support")] pub fn get_retry_interval() -> Duration { Duration::seconds(1) } diff --git a/crates/machine-controller/src/config/machine_validation.rs b/crates/machine-controller/src/config/machine_validation.rs new file mode 100644 index 0000000000..3bc8b4ecbb --- /dev/null +++ b/crates/machine-controller/src/config/machine_validation.rs @@ -0,0 +1,84 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_utils::config::as_std_duration; +use duration_str::deserialize_duration; +use serde::{Deserialize, Serialize}; + +/// Controls which machine validation tests are active. +#[derive(Default, Clone, Copy, Debug, Deserialize, Serialize)] +pub enum MachineValidationTestSelectionMode { + /// Only update tests in DB that are specified in the + /// `tests` config list. + #[default] + Default, + /// Enable all tests in DB, but allow per-test overrides + /// from the `tests` config list. + EnableAll, + /// Disable all tests in DB, but allow per-test overrides + /// from the `tests` config list. + DisableAll, +} + +/// Configuration for machine validation tests (memory +/// latency, SSD I/O, etc.) run after ingestion to verify +/// hardware health. +#[derive(Default, Clone, Debug, Deserialize, Serialize)] +pub struct MachineValidationConfig { + /// Enables machine validation testing. + #[serde(default)] + pub enabled: bool, + + /// Controls whether to run all tests, no tests, or use + /// per-test configuration. + #[serde(default)] + pub test_selection_mode: MachineValidationTestSelectionMode, + + #[serde( + default = "MachineValidationConfig::default_run_interval", + deserialize_with = "deserialize_duration", + serialize_with = "as_std_duration" + )] + pub run_interval: std::time::Duration, + + /// Per-test enable/disable overrides. + #[serde(default)] + pub tests: Vec, +} + +/// Per-test override for machine validation. +/// +/// Example: +/// ```toml +/// tests = [ +/// { id = "MmMemLatency", enable = true }, +/// { id = "FioSSD", enable = true } +/// ] +/// ``` +#[derive(Default, Clone, Debug, Deserialize, Serialize)] +pub struct MachineValidationTestConfig { + /// Unique test identifier (e.g., "MmMemLatency"). + pub id: String, + /// Whether this test is enabled. + pub enable: bool, +} + +impl MachineValidationConfig { + const fn default_run_interval() -> std::time::Duration { + std::time::Duration::from_secs(60) + } +} diff --git a/crates/api/src/state_controller/machine/config/mod.rs b/crates/machine-controller/src/config/mod.rs similarity index 78% rename from crates/api/src/state_controller/machine/config/mod.rs rename to crates/machine-controller/src/config/mod.rs index 99947e1d0a..504e7da0f8 100644 --- a/crates/api/src/state_controller/machine/config/mod.rs +++ b/crates/machine-controller/src/config/mod.rs @@ -16,15 +16,18 @@ */ use model::machine::HostHealthConfig; +use serde::{Deserialize, Serialize}; pub mod bom_validation; pub mod controller; pub mod firmware_global; +pub mod machine_validation; pub mod power_manager; pub use bom_validation::BomValidationConfig; pub use controller::MachineStateControllerConfig; pub use firmware_global::FirmwareGlobal; +pub use machine_validation::MachineValidationConfig; pub use power_manager::PowerManagerOptions; pub struct MachineStateHandlerSiteConfig { @@ -43,3 +46,12 @@ pub struct MachineStateHandlerSiteConfig { pub dpu_enable_secure_boot: bool, pub allow_zero_dpu_hosts: bool, } + +/// A UTC time window defined by a start and end timestamp. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct TimePeriod { + /// Start of the time window (UTC). + pub start: chrono::DateTime, + /// End of the time window (UTC). + pub end: chrono::DateTime, +} diff --git a/crates/api/src/state_controller/machine/config/power_manager.rs b/crates/machine-controller/src/config/power_manager.rs similarity index 100% rename from crates/api/src/state_controller/machine/config/power_manager.rs rename to crates/machine-controller/src/config/power_manager.rs diff --git a/crates/api/src/state_controller/machine/context.rs b/crates/machine-controller/src/context.rs similarity index 93% rename from crates/api/src/state_controller/machine/context.rs rename to crates/machine-controller/src/context.rs index df6bc3ae26..b39636be78 100644 --- a/crates/api/src/state_controller/machine/context.rs +++ b/crates/machine-controller/src/context.rs @@ -25,8 +25,8 @@ use model::machine::Machine; use sqlx::PgPool; use state_controller::state_handler::{StateHandlerContextObjects, StateHandlerError}; -use crate::state_controller::machine::config::MachineStateHandlerSiteConfig; -use crate::state_controller::machine::metrics::MachineMetrics; +use crate::config::MachineStateHandlerSiteConfig; +use crate::metrics::MachineMetrics; pub struct MachineStateHandlerContextObjects {} diff --git a/crates/api/src/state_controller/machine/dpf.rs b/crates/machine-controller/src/dpf.rs similarity index 99% rename from crates/api/src/state_controller/machine/dpf.rs rename to crates/machine-controller/src/dpf.rs index 8713a9df34..93b955c2be 100644 --- a/crates/api/src/state_controller/machine/dpf.rs +++ b/crates/machine-controller/src/dpf.rs @@ -32,7 +32,7 @@ use sqlx::PgPool; use state_controller::controller::Enqueuer; use tokio::task::JoinSet; -use crate::state_controller::machine::io::MachineStateControllerIO; +use crate::io::MachineStateControllerIO; /// Label key used by [`CarbideDPFLabeler`] to stamp the carbide `MachineId` of /// the DPU onto its DPUDevice. Propagates to the DPU CR via DPF. @@ -48,7 +48,7 @@ const CONTROLLED_DEVICE_LABEL: &str = "carbide.nvidia.com/controlled.device"; /// reacts to watcher callbacks, and performs reprovision/force-delete. /// /// Reboot handling is managed via the watcher's `on_reboot_required` callback. -#[cfg_attr(test, mockall::automock)] +#[cfg_attr(feature = "test-support", mockall::automock)] #[async_trait] pub trait DpfOperations: Send + Sync + std::fmt::Debug { /// Register a DPU device. diff --git a/crates/api/src/state_controller/machine/handler.rs b/crates/machine-controller/src/handler.rs similarity index 99% rename from crates/api/src/state_controller/machine/handler.rs rename to crates/machine-controller/src/handler.rs index f6bde21687..c50c840892 100644 --- a/crates/api/src/state_controller/machine/handler.rs +++ b/crates/machine-controller/src/handler.rs @@ -94,21 +94,18 @@ use tokio::sync::Semaphore; use tracing::instrument; use version_compare::Cmp; -use crate::cfg::file::{MachineValidationConfig, TimePeriod}; -use crate::state_controller::machine::config::{FirmwareGlobal, MachineStateHandlerSiteConfig}; -use crate::state_controller::machine::context::{ - MachineStateHandlerContextObjects, MachineStateHandlerServices, +use crate::config::{ + FirmwareGlobal, MachineStateHandlerSiteConfig, MachineValidationConfig, TimePeriod, }; -use crate::state_controller::machine::dpf::DpfOperations; -use crate::state_controller::machine::health_report::{ +use crate::context::{MachineStateHandlerContextObjects, MachineStateHandlerServices}; +use crate::dpf::DpfOperations; +use crate::health_report::{ create_host_update_health_report_dpufw, create_host_update_health_report_hostfw, }; -use crate::state_controller::machine::redfish::{ +use crate::redfish::{ did_dpu_finish_booting, host_power_control, host_power_control_with_location, }; -use crate::state_controller::machine::{ - MeasuringOutcome, get_measuring_prerequisites, handle_measuring_state, -}; +use crate::{MeasuringOutcome, get_measuring_prerequisites, handle_measuring_state}; pub mod attestation; mod bios_config; @@ -126,11 +123,11 @@ use helpers::{ DpuDiscoveringStateHelper, DpuInitStateHelper, ManagedHostStateHelper, NextState, ReprovisionStateHelper, all_equal, }; -use rpc::forge_agent_control_response::FileArtifact; use state_controller::db_write_batch::DbWriteBatch; -use crate::state_controller::machine::config::{BomValidationConfig, PowerManagerOptions}; -use crate::state_controller::machine::write_ops::MachineWriteOp; +use crate::config::{BomValidationConfig, PowerManagerOptions}; +use crate::rpc::scout_firmware_upgrade::{FileArtifact, ScoutFirmwareUpgradeTask}; +use crate::write_ops::MachineWriteOp; // We can't use http::StatusCode because libredfish has a newer version const NOT_FOUND: u16 = 404; @@ -295,7 +292,7 @@ impl MachineStateHandlerBuilder { self } - #[cfg(test)] // currently only used in tests + #[cfg(feature = "test-support")] pub fn dpu_nic_firmware_initial_update_enabled( mut self, dpu_nic_firmware_initial_update_enabled: bool, @@ -313,7 +310,7 @@ impl MachineStateHandlerBuilder { self } - #[cfg(test)] // currently only used in tests + #[cfg(feature = "test-support")] pub fn reachability_params(mut self, reachability_params: ReachabilityParams) -> Self { self.reachability_params = reachability_params; self @@ -7430,7 +7427,7 @@ impl HostUpgradeState { let upgrade_task_id = uuid::Uuid::new_v4().to_string(); let file_artifact_count = to_install.files.len(); - let task = rpc::forge_agent_control_response::ScoutFirmwareUpgradeTask { + let task = ScoutFirmwareUpgradeTask { upgrade_task_id: upgrade_task_id.clone(), component_type: firmware_type.to_string(), target_version: to_install.version.clone(), @@ -9531,7 +9528,7 @@ fn can_restart_reprovision(dpu_snapshots: &[Machine], version: ConfigVersion) -> /// TODO(ken): This is a temporary workaround for work-in-progress on zero-DPU support (August 2024) /// The way we should do this going forward is to plumb the actual non-DPU MAC address we want to /// boot from, instead of special-casing NoDpu errors. -pub(super) async fn call_machine_setup_and_handle_no_dpu_error( +pub async fn call_machine_setup_and_handle_no_dpu_error( redfish_client: &dyn Redfish, boot_interface_mac: Option<&str>, expected_dpu_count: usize, @@ -10656,70 +10653,6 @@ mod tests { assert_eq!(to_install.version, target_version); } - /// Verify that `oem_manager_profiles` from the site config is forwarded to `machine_setup`. - /// - /// This test catches regressions where the argument gets dropped or replaced with an empty map. - #[tokio::test] - async fn test_oem_manager_profiles_passed_to_machine_setup() { - use carbide_redfish::libredfish::RedfishClientPool; - use carbide_redfish::libredfish::test_support::{RedfishSim, RedfishSimAction}; - use libredfish::BiosProfileType; - use libredfish::model::service_root::RedfishVendor; - - let mut config = crate::tests::common::api_fixtures::get_config(); - // Build an oem_manager_profiles map with a Dell R760 PSU Hot Spare setting. - // This mirrors the fix for the Dell R760 PSU fan issue (nvbugs-5834644). - config.oem_manager_profiles = HashMap::from([( - RedfishVendor::Dell, - HashMap::from([( - "r760".to_string(), - HashMap::from([( - BiosProfileType::Performance, - HashMap::from([( - "ServerPwr.1.PSRapidOn".to_string(), - serde_json::Value::String("Disabled".to_string()), - )]), - )]), - )]), - )]); - - use carbide_redfish::libredfish::RedfishAuth; - use forge_secrets::credentials::{CredentialKey, CredentialType}; - - let sim = RedfishSim::default(); - let timepoint = sim.timepoint(); - let client = sim - .create_client( - "test-host", - None, - RedfishAuth::Key(CredentialKey::HostRedfish { - credential_type: CredentialType::SiteDefault, - }), - None, - ) - .await - .unwrap(); - - let result = call_machine_setup_and_handle_no_dpu_error( - client.as_ref(), - None, - 1, - &config.machine_state_handler_site_config(), - ) - .await; - - assert!(result.is_ok()); - - let actions = sim.actions_since(&timepoint).all_hosts(); - assert_eq!(actions.len(), 1); - assert_eq!( - actions[0], - RedfishSimAction::MachineSetup { - oem_manager_profiles: config.oem_manager_profiles, - } - ); - } - #[test] fn test_cycle_1() { let state_change_time = diff --git a/crates/api/src/state_controller/machine/handler/attestation.rs b/crates/machine-controller/src/handler/attestation.rs similarity index 99% rename from crates/api/src/state_controller/machine/handler/attestation.rs rename to crates/machine-controller/src/handler/attestation.rs index d1db5d597b..7400292be5 100644 --- a/crates/api/src/state_controller/machine/handler/attestation.rs +++ b/crates/machine-controller/src/handler/attestation.rs @@ -38,7 +38,7 @@ use state_controller::state_handler::{ StateHandlerContext, StateHandlerError, StateHandlerOutcome, }; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; +use crate::context::MachineStateHandlerContextObjects; pub async fn trigger_attestation( db_pool: &PgPool, diff --git a/crates/api/src/state_controller/machine/handler/bios_config.rs b/crates/machine-controller/src/handler/bios_config.rs similarity index 99% rename from crates/api/src/state_controller/machine/handler/bios_config.rs rename to crates/machine-controller/src/handler/bios_config.rs index 143fbf7fc9..d4640824f8 100644 --- a/crates/api/src/state_controller/machine/handler/bios_config.rs +++ b/crates/machine-controller/src/handler/bios_config.rs @@ -32,8 +32,8 @@ use super::{ ReachabilityParams, RebootStatus, call_machine_setup_and_handle_no_dpu_error, handler_host_power_control, trigger_reboot_if_needed, }; -use crate::state_controller::machine::config::MachineStateControllerConfig; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; +use crate::config::MachineStateControllerConfig; +use crate::context::MachineStateHandlerContextObjects; /// Outcome of configure_host_bios function. pub(super) enum BiosConfigOutcome { diff --git a/crates/api/src/state_controller/machine/handler/dpf.rs b/crates/machine-controller/src/handler/dpf.rs similarity index 99% rename from crates/api/src/state_controller/machine/handler/dpf.rs rename to crates/machine-controller/src/handler/dpf.rs index 21d24e0cb0..c16afbd4a4 100644 --- a/crates/api/src/state_controller/machine/handler/dpf.rs +++ b/crates/machine-controller/src/handler/dpf.rs @@ -33,8 +33,8 @@ use state_controller::state_handler::{ use super::helpers::{DpuInitStateHelper, ManagedHostStateHelper, ReprovisionStateHelper}; use super::{handler_host_power_control, host_power_state}; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; -use crate::state_controller::machine::dpf::DpfOperations; +use crate::context::MachineStateHandlerContextObjects; +use crate::dpf::DpfOperations; fn dpf_error(error: DpfError) -> StateHandlerError { ExternalServiceError::with_source("dpf", "", error.to_string(), "dpf_error", error).into() diff --git a/crates/api/src/state_controller/machine/handler/helpers.rs b/crates/machine-controller/src/handler/helpers.rs similarity index 100% rename from crates/api/src/state_controller/machine/handler/helpers.rs rename to crates/machine-controller/src/handler/helpers.rs diff --git a/crates/api/src/state_controller/machine/handler/machine_validation.rs b/crates/machine-controller/src/handler/machine_validation.rs similarity index 97% rename from crates/api/src/state_controller/machine/handler/machine_validation.rs rename to crates/machine-controller/src/handler/machine_validation.rs index 3a113553b5..bfdd0026ac 100644 --- a/crates/api/src/state_controller/machine/handler/machine_validation.rs +++ b/crates/machine-controller/src/handler/machine_validation.rs @@ -25,12 +25,8 @@ use state_controller::state_handler::{ }; use super::{HostHandlerParams, is_machine_validation_requested, machine_validation_completed}; -use crate::state_controller::machine::context::{ - MachineStateHandlerContextObjects, MachineStateHandlerServices, -}; -use crate::state_controller::machine::handler::{ - handler_host_power_control, rebooted, trigger_reboot_if_needed, -}; +use crate::context::{MachineStateHandlerContextObjects, MachineStateHandlerServices}; +use crate::handler::{handler_host_power_control, rebooted, trigger_reboot_if_needed}; pub(crate) async fn handle_machine_validation_state( ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, diff --git a/crates/api/src/state_controller/machine/handler/power.rs b/crates/machine-controller/src/handler/power.rs similarity index 97% rename from crates/api/src/state_controller/machine/handler/power.rs rename to crates/machine-controller/src/handler/power.rs index 3990b870bb..d66493f240 100644 --- a/crates/api/src/state_controller/machine/handler/power.rs +++ b/crates/machine-controller/src/handler/power.rs @@ -25,10 +25,8 @@ use model::power_manager::{ }; use state_controller::state_handler::{StateHandlerContext, StateHandlerError}; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; -use crate::state_controller::machine::handler::{ - PowerOptionConfig, handler_host_power_control, host_power_state, -}; +use crate::context::MachineStateHandlerContextObjects; +use crate::handler::{PowerOptionConfig, handler_host_power_control, host_power_state}; // If power state is Paused and Reset, state machine can't take any decision on it. // Ignore power manager with a log and moved to state machine. diff --git a/crates/api/src/state_controller/machine/handler/sku.rs b/crates/machine-controller/src/handler/sku.rs similarity index 99% rename from crates/api/src/state_controller/machine/handler/sku.rs rename to crates/machine-controller/src/handler/sku.rs index 11c422abd2..c5f6f28cb9 100644 --- a/crates/api/src/state_controller/machine/handler/sku.rs +++ b/crates/machine-controller/src/handler/sku.rs @@ -27,10 +27,8 @@ use state_controller::state_handler::{ StateHandlerContext, StateHandlerError, StateHandlerOutcome, }; -use crate::state_controller::machine::context::{ - MachineStateHandlerContextObjects, MachineStateHandlerServices, -}; -use crate::state_controller::machine::handler::{ +use crate::context::{MachineStateHandlerContextObjects, MachineStateHandlerServices}; +use crate::handler::{ HostHandlerParams, discovered_after_state_transition, trigger_reboot_if_needed, }; diff --git a/crates/api/src/state_controller/machine/health_report.rs b/crates/machine-controller/src/health_report.rs similarity index 100% rename from crates/api/src/state_controller/machine/health_report.rs rename to crates/machine-controller/src/health_report.rs diff --git a/crates/api/src/state_controller/machine/io.rs b/crates/machine-controller/src/io.rs similarity index 99% rename from crates/api/src/state_controller/machine/io.rs rename to crates/machine-controller/src/io.rs index 046df2dab4..693457511a 100644 --- a/crates/api/src/state_controller/machine/io.rs +++ b/crates/machine-controller/src/io.rs @@ -32,8 +32,8 @@ use model::machine::{ use sqlx::PgConnection; use state_controller::io::StateControllerIO; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; -use crate::state_controller::machine::metrics::MachineMetricsEmitter; +use crate::context::MachineStateHandlerContextObjects; +use crate::metrics::MachineMetricsEmitter; /// State Controller IO implementation for Machines #[derive(Default, Debug)] diff --git a/crates/api/src/state_controller/machine/mod.rs b/crates/machine-controller/src/lib.rs similarity index 99% rename from crates/api/src/state_controller/machine/mod.rs rename to crates/machine-controller/src/lib.rs index 993be6bcff..d8b598b16d 100644 --- a/crates/api/src/state_controller/machine/mod.rs +++ b/crates/machine-controller/src/lib.rs @@ -37,6 +37,7 @@ pub mod health_report; pub mod io; pub mod metrics; pub mod redfish; +pub(crate) mod rpc; pub mod write_ops; /// Fields of span that should be logged for each message. @@ -139,7 +140,7 @@ where Ok((machine_state, ek_cert_verification_status)) } -pub(crate) async fn handle_measuring_state( +pub async fn handle_measuring_state( measuring_state: &MeasuringState, machine_id: &MachineId, db: &mut DB, diff --git a/crates/api/src/state_controller/machine/metrics.rs b/crates/machine-controller/src/metrics.rs similarity index 99% rename from crates/api/src/state_controller/machine/metrics.rs rename to crates/machine-controller/src/metrics.rs index c9908ee538..79e9e486ec 100644 --- a/crates/api/src/state_controller/machine/metrics.rs +++ b/crates/machine-controller/src/metrics.rs @@ -19,11 +19,11 @@ use std::collections::{HashMap, HashSet}; -use ::carbide_utils::metrics::SharedMetricsHolder; use carbide_health_metrics::{ HealthIterationMetrics, HealthMetricDimension, HealthObjectMetrics, register_alerts_suppressed_gauge, register_health_gauges, }; +use carbide_utils::metrics::SharedMetricsHolder; use model::hardware_info::MachineInventorySoftwareComponent; use model::tenant::TenantOrganizationId; use opentelemetry::KeyValue; diff --git a/crates/api/src/state_controller/machine/redfish.rs b/crates/machine-controller/src/redfish.rs similarity index 97% rename from crates/api/src/state_controller/machine/redfish.rs rename to crates/machine-controller/src/redfish.rs index 0d766e5dbd..4f17185e04 100644 --- a/crates/api/src/state_controller/machine/redfish.rs +++ b/crates/machine-controller/src/redfish.rs @@ -22,8 +22,8 @@ use libredfish::{PowerState, Redfish, RedfishError, SystemPowerControl}; use model::machine::Machine; use state_controller::state_handler::StateHandlerContext; -use crate::state_controller::machine::context::MachineStateHandlerContextObjects; -use crate::state_controller::machine::write_ops::MachineWriteOp; +use crate::context::MachineStateHandlerContextObjects; +use crate::write_ops::MachineWriteOp; #[track_caller] pub fn host_power_control( diff --git a/crates/machine-controller/src/rpc.rs b/crates/machine-controller/src/rpc.rs new file mode 100644 index 0000000000..3fa0e6dbe1 --- /dev/null +++ b/crates/machine-controller/src/rpc.rs @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#[allow(dead_code)] +pub(crate) mod scout_firmware_upgrade { + include!(concat!(env!("OUT_DIR"), "/scout_firmware_upgrade.rs")); +} diff --git a/crates/api/src/state_controller/machine/write_ops.rs b/crates/machine-controller/src/write_ops.rs similarity index 100% rename from crates/api/src/state_controller/machine/write_ops.rs rename to crates/machine-controller/src/write_ops.rs diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index 5e57bad3b3..82b6dfaa62 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -859,8 +859,14 @@ fn main() -> Result<(), Box> { "forge.SpdmAttestationDetails", "#[derive(serde::Serialize)]", ) - .type_attribute("forge.ForgeAgentControlResponse.ScoutFirmwareUpgradeTask", "#[derive(serde::Serialize, serde::Deserialize)]") - .type_attribute("forge.ForgeAgentControlResponse.FileArtifact", "#[derive(serde::Serialize, serde::Deserialize)]") + .type_attribute( + "scout_firmware_upgrade.ScoutFirmwareUpgradeTask", + "#[derive(serde::Serialize, serde::Deserialize)]", + ) + .type_attribute( + "scout_firmware_upgrade.FileArtifact", + "#[derive(serde::Serialize, serde::Deserialize)]", + ) .build_server(true) .build_client(true) .protoc_arg("--experimental_allow_proto3_optional") @@ -868,6 +874,7 @@ fn main() -> Result<(), Box> { .compile_protos( &[ "proto/common.proto", + "proto/scout_firmware_upgrade.proto", "proto/forge.proto", "proto/machine_discovery.proto", "proto/mlx_device.proto", diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 28098e8414..7fc8d360e8 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -15,6 +15,7 @@ import "health.proto"; import "machine_discovery.proto"; import "measured_boot.proto"; import "mlx_device.proto"; +import "scout_firmware_upgrade.proto"; import "site_explorer.proto"; service Forge { @@ -4565,23 +4566,9 @@ message ForgeAgentControlResponse { } message FirmwareUpgrade { - ScoutFirmwareUpgradeTask task = 1; + scout_firmware_upgrade.ScoutFirmwareUpgradeTask task = 1; } - message ScoutFirmwareUpgradeTask { - string upgrade_task_id = 1; - string component_type = 2; - string target_version = 3; - FileArtifact script = 4; - uint32 execution_timeout_seconds = 5; - uint32 artifact_download_timeout_seconds = 6; - repeated FileArtifact file_artifacts = 7; - } - - message FileArtifact { - string url = 1; - string sha256 = 2; - } oneof action { Noop noop = 3; diff --git a/crates/rpc/proto/scout_firmware_upgrade.proto b/crates/rpc/proto/scout_firmware_upgrade.proto new file mode 100644 index 0000000000..b3f1a60867 --- /dev/null +++ b/crates/rpc/proto/scout_firmware_upgrade.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; + +package scout_firmware_upgrade; + +message ScoutFirmwareUpgradeTask { + string upgrade_task_id = 1; + string component_type = 2; + string target_version = 3; + FileArtifact script = 4; + uint32 execution_timeout_seconds = 5; + uint32 artifact_download_timeout_seconds = 6; + repeated FileArtifact file_artifacts = 7; +} + +message FileArtifact { + string url = 1; + string sha256 = 2; +} diff --git a/crates/rpc/src/lib.rs b/crates/rpc/src/lib.rs index 0d311abd38..6a4ed0f954 100644 --- a/crates/rpc/src/lib.rs +++ b/crates/rpc/src/lib.rs @@ -58,7 +58,7 @@ pub use crate::protos::machine_discovery::{ self, BlockDevice, Cpu, DiscoveryInfo, DmiData, NetworkInterface, NvmeDevice, PciDeviceProperties, }; -pub use crate::protos::{fmds, health, site_explorer}; +pub use crate::protos::{fmds, health, scout_firmware_upgrade, site_explorer}; pub mod errors; pub mod forge_tls_client; diff --git a/crates/rpc/src/protos/mod.rs b/crates/rpc/src/protos/mod.rs index 5c8b26297c..01f0490d54 100644 --- a/crates/rpc/src/protos/mod.rs +++ b/crates/rpc/src/protos/mod.rs @@ -25,6 +25,12 @@ pub mod common { include!(concat!(env!("OUT_DIR"), "/common.rs")); } +#[allow(non_snake_case, unknown_lints, clippy::all)] +#[rustfmt::skip] +pub mod scout_firmware_upgrade { + include!(concat!(env!("OUT_DIR"), "/scout_firmware_upgrade.rs")); +} + #[allow(non_snake_case, unknown_lints, clippy::all)] #[rustfmt::skip] pub mod forge { diff --git a/crates/scout/src/firmware_upgrade.rs b/crates/scout/src/firmware_upgrade.rs index 89b352e871..828fcc6d88 100644 --- a/crates/scout/src/firmware_upgrade.rs +++ b/crates/scout/src/firmware_upgrade.rs @@ -19,7 +19,7 @@ use std::path::{Path, PathBuf}; use std::time::Duration; use futures_util::TryStreamExt; -use rpc::forge_agent_control_response::ScoutFirmwareUpgradeTask as FirmwareUpgradeTask; +use rpc::scout_firmware_upgrade::ScoutFirmwareUpgradeTask as FirmwareUpgradeTask; use sha2::{Digest, Sha256}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; @@ -264,7 +264,7 @@ async fn sha256_file(path: &Path) -> Result> mod tests { use axum::Router; use axum::routing::get; - use rpc::forge_agent_control_response::FileArtifact; + use rpc::scout_firmware_upgrade::FileArtifact; use tokio::net::TcpListener; use super::*; diff --git a/crates/scout/src/main.rs b/crates/scout/src/main.rs index 90f0959faa..d38a12f0f4 100644 --- a/crates/scout/src/main.rs +++ b/crates/scout/src/main.rs @@ -37,7 +37,10 @@ use rpc::protos::mlx_device::{ FirmwareFlashReport as FirmwareFlashReportPb, LockStatus, MlxObservation, MlxObservationReport, PublishMlxObservationReportRequest, }; -use rpc::{ForgeScoutErrorReport, forge as rpc_forge, forge_agent_control_response as fac}; +use rpc::{ + ForgeScoutErrorReport, forge as rpc_forge, forge_agent_control_response as fac, + scout_firmware_upgrade as sfu, +}; pub use scout::{CarbideClientError, CarbideClientResult}; use tokio::sync::RwLock; use tryhard::{RetryFutureConfig, RetryPolicy}; @@ -407,7 +410,7 @@ async fn handle_action( async fn handle_firmware_upgrade_action( config: &Options, machine_id: &MachineId, - task: Option, + task: Option, ) -> Result<(), CarbideClientError> { let task = task.ok_or_else(|| { CarbideClientError::GenericError("firmware upgrade action missing task".to_string()) diff --git a/crates/ssh-console-mock-api-server/build.rs b/crates/ssh-console-mock-api-server/build.rs index 66fadb9ea1..36d2f36e05 100644 --- a/crates/ssh-console-mock-api-server/build.rs +++ b/crates/ssh-console-mock-api-server/build.rs @@ -51,6 +51,7 @@ fn main() -> Result<(), Box> { .compile_protos( &[ "proto/common.proto", + "proto/scout_firmware_upgrade.proto", "proto/dns.proto", "proto/forge.proto", "proto/machine_discovery.proto", diff --git a/crates/ssh-console-mock-api-server/src/generated/mod.rs b/crates/ssh-console-mock-api-server/src/generated/mod.rs index 91def1d871..d96d3149e2 100644 --- a/crates/ssh-console-mock-api-server/src/generated/mod.rs +++ b/crates/ssh-console-mock-api-server/src/generated/mod.rs @@ -37,4 +37,7 @@ pub mod measured_boot; pub mod mlx_device; #[allow(non_snake_case, unknown_lints, clippy::all)] #[rustfmt::skip] +pub mod scout_firmware_upgrade; +#[allow(non_snake_case, unknown_lints, clippy::all)] +#[rustfmt::skip] pub mod site_explorer;