diff --git a/Cargo.lock b/Cargo.lock index 5ff7ef2b6d..8fad44e1b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1186,11 +1186,15 @@ dependencies = [ "carbide-macros", "carbide-measured-boot", "carbide-metrics-utils", + "carbide-mqtt-common", "carbide-network", "carbide-network-segment-controller", "carbide-nvlink-manager", + "carbide-power-shelf-controller", "carbide-preingestion-manager", "carbide-prost-builder", + "carbide-rack", + "carbide-rack-controller", "carbide-redfish", "carbide-rpc", "carbide-rpc-utils", @@ -1199,6 +1203,7 @@ dependencies = [ "carbide-spdm-controller", "carbide-sqlx-testing", "carbide-ssh", + "carbide-state-controller-common", "carbide-switch-controller", "carbide-tls", "carbide-utils", @@ -1331,7 +1336,7 @@ dependencies = [ "eyre", "futures", "futures-util", - "hickory-proto 0.26.1", + "hickory-proto", "ipnetwork", "itertools 0.14.0", "lazy_static", @@ -2259,6 +2264,18 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "carbide-mqtt-common" +version = "0.1.0" +dependencies = [ + "async-trait", + "mqttea", + "opentelemetry", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "carbide-network" version = "0.0.0" @@ -2321,6 +2338,28 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-power-shelf-controller" +version = "0.0.0" +dependencies = [ + "async-trait", + "carbide-api-db", + "carbide-api-model", + "carbide-health-metrics", + "carbide-rack", + "carbide-secrets", + "carbide-utils", + "carbide-uuid", + "config-version", + "eyre", + "librms", + "mac_address", + "opentelemetry", + "sqlx", + "state-controller", + "tracing", +] + [[package]] name = "carbide-preingestion-manager" version = "0.0.1" @@ -2389,6 +2428,60 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-rack" +version = "0.0.0" +dependencies = [ + "async-trait", + "bms-dsx-exchange", + "carbide-api-db", + "carbide-api-model", + "carbide-health-report", + "carbide-mqtt-common", + "carbide-secrets", + "carbide-uuid", + "chrono", + "eyre", + "librms", + "mac_address", + "mqttea", + "opentelemetry", + "serde_json", + "sqlx", + "state-controller", + "tokio", + "tokio-util", + "tonic", + "tracing", +] + +[[package]] +name = "carbide-rack-controller" +version = "0.0.0" +dependencies = [ + "async-trait", + "carbide-api-db", + "carbide-api-model", + "carbide-health-metrics", + "carbide-rack", + "carbide-secrets", + "carbide-utils", + "carbide-uuid", + "chrono", + "config-version", + "duration-str", + "eyre", + "librms", + "mac_address", + "opentelemetry", + "serde", + "serde_json", + "sqlx", + "state-controller", + "tonic", + "tracing", +] + [[package]] name = "carbide-redfish" version = "0.0.1" @@ -2744,6 +2837,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "carbide-state-controller-common" +version = "0.0.0" +dependencies = [ + "carbide-utils", + "duration-str", + "serde", + "state-controller", +] + [[package]] name = "carbide-switch-controller" version = "0.0.0" @@ -3945,14 +4048,14 @@ checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" [[package]] name = "dhcproto" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "425ab19f6a915beac79cac8ec2810c1311b502ae14d7f294682081cf5ae4c5bb" +checksum = "c278d2f17dbcb7332f3b31788be67f76017096c5eedc293e1259f2d48b0f891f" dependencies = [ "dhcproto-macros", - "hickory-proto 0.25.2", + "hickory-proto", "ipnet", - "rand 0.9.4", + "rand 0.10.1", "thiserror 2.0.18", ] @@ -5127,7 +5230,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "hickory-proto 0.26.1", + "hickory-proto", "idna 1.1.0", "ipnet", "jni", @@ -5139,28 +5242,6 @@ dependencies = [ "url", ] -[[package]] -name = "hickory-proto" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-util", - "idna 1.1.0", - "ipnet", - "once_cell", - "rand 0.9.4", - "thiserror 2.0.18", - "tinyvec", - "tracing", - "url", -] - [[package]] name = "hickory-proto" version = "0.26.1" @@ -5190,7 +5271,7 @@ dependencies = [ "cfg-if", "futures-util", "hickory-net", - "hickory-proto 0.26.1", + "hickory-proto", "ipconfig", "ipnet", "jni", @@ -9111,12 +9192,8 @@ dependencies = [ "flume", "futures-util", "log", - "rustls-native-certs", - "rustls-pemfile", - "rustls-webpki 0.102.8", "thiserror 2.0.18", "tokio", - "tokio-rustls", "tokio-stream", "tokio-util", ] @@ -9353,7 +9430,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.13", + "rustls-webpki", "subtle", "zeroize", ] @@ -9403,7 +9480,7 @@ dependencies = [ "rustls", "rustls-native-certs", "rustls-platform-verifier-android", - "rustls-webpki 0.103.13", + "rustls-webpki", "security-framework", "security-framework-sys", "webpki-root-certs", @@ -9416,17 +9493,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" -[[package]] -name = "rustls-webpki" -version = "0.102.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted 0.9.0", -] - [[package]] name = "rustls-webpki" version = "0.103.13" diff --git a/crates/api-test-helper/src/api_server.rs b/crates/api-test-helper/src/api_server.rs index 8229112fe2..dddfcf67be 100644 --- a/crates/api-test-helper/src/api_server.rs +++ b/crates/api-test-helper/src/api_server.rs @@ -199,7 +199,6 @@ pub async fn start( explorations_per_run = 90 create_machines = true machines_created_per_run = 30 - allow_zero_dpu_hosts = true allow_proxy_to_unknown_host = false {bmc_proxy_cfg} reset_rate_limit = "3600s" diff --git a/crates/api/src/cfg/README.md b/crates/api/src/cfg/README.md index defc651db8..3e243ff117 100644 --- a/crates/api/src/cfg/README.md +++ b/crates/api/src/cfg/README.md @@ -151,7 +151,6 @@ applicable. | `rotate_switch_nvos_credentials` | `bool` | `false` | Auto-rotate switch NVOS admin credentials. | | `override_target_ip` | `Option` | — | **Deprecated.** Use `bmc_proxy`. Debug BMC IP override. | | `override_target_port` | `Option` | — | **Deprecated.** Use `bmc_proxy`. Debug BMC port override. | -| `allow_zero_dpu_hosts` | `bool` | `false` | Allow hosts with zero DPUs (set `false` in prod). | | `bmc_proxy` | `HostPortPair` | — | BMC proxy host:port for integration testing/dev. | | `allow_changing_bmc_proxy` | `Option` | *(auto)* | Allow runtime changes to `bmc_proxy`. Auto-detected from initial config. | | `reset_rate_limit` | `Duration` | `1h` | Minimum time between SiteExplorer-initiated BMC resets. | diff --git a/crates/api/src/cfg/file.rs b/crates/api/src/cfg/file.rs index c41626dd1e..5e81dbadc8 100644 --- a/crates/api/src/cfg/file.rs +++ b/crates/api/src/cfg/file.rs @@ -662,7 +662,6 @@ impl CarbideConfig { spdm_enabled: self.spdm.enabled, dpu_enable_secure_boot: self.dpu_config.dpu_enable_secure_boot, - allow_zero_dpu_hosts: self.site_explorer.allow_zero_dpu_hosts, } } } @@ -2673,7 +2672,6 @@ mod tests { machines_created_per_run: 1, override_target_ip: None, override_target_port: None, - allow_zero_dpu_hosts: false, bmc_proxy: carbide_site_explorer::config::bmc_proxy(None), allow_changing_bmc_proxy: None, reset_rate_limit: Duration::hours(1), @@ -2847,7 +2845,6 @@ mod tests { machines_created_per_run: 2, override_target_ip: Some("1.2.3.4".to_owned()), override_target_port: Some(10443), - allow_zero_dpu_hosts: false, bmc_proxy: carbide_site_explorer::config::bmc_proxy(None), allow_changing_bmc_proxy: None, reset_rate_limit: Duration::hours(2), @@ -3156,7 +3153,6 @@ mod tests { machines_created_per_run: 2, override_target_ip: Some("1.2.3.4".to_owned()), override_target_port: Some(10443), - allow_zero_dpu_hosts: false, bmc_proxy: carbide_site_explorer::config::bmc_proxy(None), allow_changing_bmc_proxy: None, reset_rate_limit: Duration::hours(2), diff --git a/crates/api/src/state_controller/machine/config/mod.rs b/crates/api/src/state_controller/machine/config/mod.rs index 99947e1d0a..d8caabb701 100644 --- a/crates/api/src/state_controller/machine/config/mod.rs +++ b/crates/api/src/state_controller/machine/config/mod.rs @@ -41,5 +41,4 @@ pub struct MachineStateHandlerSiteConfig { pub spdm_enabled: bool, pub dpu_enable_secure_boot: bool, - pub allow_zero_dpu_hosts: bool, } diff --git a/crates/api/src/state_controller/machine/handler.rs b/crates/api/src/state_controller/machine/handler.rs index d850443fd8..d921d2bde1 100644 --- a/crates/api/src/state_controller/machine/handler.rs +++ b/crates/api/src/state_controller/machine/handler.rs @@ -9482,13 +9482,9 @@ fn can_restart_reprovision(dpu_snapshots: &[Machine], version: ConfigVersion) -> dpu_reprovision_restart_requested_after_state_transition(version, *latest_requested_at) } -/// Call [`Redfish::machine_setup`], but ignore any [`RedfishError::NoDpu`] if we expect there to be no DPUs. -/// Returns `Ok(Some(job_id))` when the vendor (e.g. Dell) creates a BIOS config job that must complete -/// before configuring boot order; `Ok(None)` when no job to wait for. -/// -/// TODO(ken): This is a temporary workaround for work-in-progress on zero-DPU support (August 2024) -/// The way we should do this going forward is to plumb the actual non-DPU MAC address we want to -/// boot from, instead of special-casing NoDpu errors. +/// Call [`Redfish::machine_setup`], but ignore any [`RedfishError::NoDpu`] if we expect there to +/// be no DPUs on the host. Returns `Ok(Some(job_id))` when the vendor (e.g. Dell) creates a BIOS +/// config job that must complete before configuring boot order; `Ok(None)` when no job to wait for. async fn call_machine_setup_and_handle_no_dpu_error( redfish_client: &dyn Redfish, boot_interface_mac: Option<&str>, @@ -9503,40 +9499,38 @@ async fn call_machine_setup_and_handle_no_dpu_error( &site_config.oem_manager_profiles, ) .await; - match ( - setup_result, - expected_dpu_count, - site_config.allow_zero_dpu_hosts, - ) { - (Err(RedfishError::NoDpu), 0, true) => { - tracing::info!( - "redfish machine_setup failed due to there being no DPUs on the host. This is expected as the host has no DPUs, and we are configured to allow this." - ); - Ok(None) - } - (Ok(maybe_jid), _, _) => Ok(maybe_jid), - (Err(e), _, _) => Err(e), - } + handle_no_dpu_error(setup_result, expected_dpu_count, "machine_setup") } async fn set_boot_order_dpu_first_and_handle_no_dpu_error( redfish_client: &dyn Redfish, boot_interface_mac: &str, expected_dpu_count: usize, - allow_zero_dpu_hosts: bool, ) -> Result, RedfishError> { let setup_result = redfish_client .set_boot_order_dpu_first(boot_interface_mac) .await; - match (setup_result, expected_dpu_count, allow_zero_dpu_hosts) { - (Err(RedfishError::NoDpu), 0, true) => { + handle_no_dpu_error(setup_result, expected_dpu_count, "set_boot_order_dpu_first") +} + +/// Treat `Err(RedfishError::NoDpu)` as `Ok(None)` *only* when the host is +/// declared zero-DPU (`expected_dpu_count == 0`). Other error variants and +/// successful results pass through untouched. The `dpu_mode` gate in +/// site-explorer is what guarantees `expected_dpu_count == 0` actually +/// means the host was configured as `NoDpu`. +fn handle_no_dpu_error( + result: Result, RedfishError>, + expected_dpu_count: usize, + operation: &'static str, +) -> Result, RedfishError> { + match (result, expected_dpu_count) { + (Err(RedfishError::NoDpu), 0) => { tracing::info!( - "redfish set_boot_order_dpu_first failed due to there being no DPUs on the host. This is expected as the host has no DPUs, and we are configured to allow this." + "redfish {operation} failed with NoDpu on a zero-DPU host; treating as Ok" ); Ok(None) } - (Ok(job_id), _, _) => Ok(job_id), - (Err(e), _, _) => Err(e), + (other, _) => other, } } @@ -10420,9 +10414,9 @@ async fn set_host_boot_order( // all zero-DPU hosts because libredfish doesn't implement // `boot_first` for every vendor yet (Dell currently returns // `NotSupported`); zero-DPU hosts fall through to the - // `set_boot_order_dpu_first` path below, which downgrades - // the resulting `NoDpu` error via `allow_zero_dpu_hosts` - // and still hits `CheckBootOrder` for verification. + // `set_boot_order_dpu_first` path below, which treats the + // resulting `NoDpu` error as `Ok` and still hits `CheckBootOrder` + // for verification. // // Resolve the boot NIC MAC the same way `CheckHostConfig` does, // supporting hosts with DPU(s) and zero DPUs alike. @@ -10437,7 +10431,6 @@ async fn set_host_boot_order( redfish_client, &boot_interface_mac.to_string(), mh_snapshot.host_snapshot.associated_dpu_machine_ids().len(), - ctx.services.site_config.allow_zero_dpu_hosts, ) .await { @@ -10947,6 +10940,37 @@ mod tests { ); } + #[test] + fn handle_no_dpu_error_treats_as_ok_on_zero_dpu_host() { + let result = handle_no_dpu_error(Err(RedfishError::NoDpu), 0, "machine_setup"); + assert!(matches!(result, Ok(None))); + } + + #[test] + fn handle_no_dpu_error_surfaces_when_dpus_were_expected() { + let result = handle_no_dpu_error(Err(RedfishError::NoDpu), 2, "machine_setup"); + assert!(matches!(result, Err(RedfishError::NoDpu))); + } + + #[test] + fn handle_no_dpu_error_passes_through_success() { + let job_id = "bios-job-1".to_string(); + let result = handle_no_dpu_error(Ok(Some(job_id.clone())), 0, "machine_setup"); + assert!(matches!(result, Ok(Some(ref s)) if s == &job_id)); + + let result = handle_no_dpu_error(Ok(None), 2, "machine_setup"); + assert!(matches!(result, Ok(None))); + } + + #[test] + fn handle_no_dpu_error_does_not_touch_other_errors() { + // Other error variants must surface, even on zero-DPU hosts -- we + // only ignore the *specific* NoDpu signal. + let result = + handle_no_dpu_error(Err(RedfishError::Lockdown), 0, "set_boot_order_dpu_first"); + assert!(matches!(result, Err(RedfishError::Lockdown))); + } + #[test] fn test_cycle_1() { let state_change_time = diff --git a/crates/api/src/tests/common/api_fixtures/mod.rs b/crates/api/src/tests/common/api_fixtures/mod.rs index c751327de2..fb3657132d 100644 --- a/crates/api/src/tests/common/api_fixtures/mod.rs +++ b/crates/api/src/tests/common/api_fixtures/mod.rs @@ -256,7 +256,6 @@ lazy_static! { #[derive(Clone, Debug, Default)] pub struct TestEnvOverrides { - pub allow_zero_dpu_hosts: Option, pub site_prefixes: Option>, pub config: Option, pub create_network_segments: Option, @@ -1898,7 +1897,6 @@ pub async fn create_test_env_with_overrides( machines_created_per_run: 1, override_target_ip: None, override_target_port: None, - allow_zero_dpu_hosts: overrides.allow_zero_dpu_hosts.unwrap_or(false), bmc_proxy: Arc::new(Default::default()), allow_changing_bmc_proxy: None, reset_rate_limit: Duration::hours(1), diff --git a/crates/api/src/tests/common/api_fixtures/site_explorer.rs b/crates/api/src/tests/common/api_fixtures/site_explorer.rs index 7476a64039..64ee57528e 100644 --- a/crates/api/src/tests/common/api_fixtures/site_explorer.rs +++ b/crates/api/src/tests/common/api_fixtures/site_explorer.rs @@ -1326,6 +1326,12 @@ pub async fn register_expected_machine( if data.dpf_enabled.is_none() { data.dpf_enabled = default_dpf_enabled; } + // For fixtures that intentionally create zero-DPU hosts (no DpuConfigs), + // declare them as `NoDpu` so site-explorer accepts them. Tests that + // explicitly set `dpu_mode` via `expected_machine_data` are left alone. + if config.dpus.is_empty() && data.dpu_mode == model::expected_machine::DpuMode::DpuMode { + data.dpu_mode = model::expected_machine::DpuMode::NoDpu; + } let em = ExpectedMachine { id: Some(uuid::Uuid::new_v4()), diff --git a/crates/api/src/tests/fixtures/create_expected_machine_no_default_poweron.sql b/crates/api/src/tests/fixtures/create_expected_machine_no_default_poweron.sql index 81035a7c25..965e444326 100644 --- a/crates/api/src/tests/fixtures/create_expected_machine_no_default_poweron.sql +++ b/crates/api/src/tests/fixtures/create_expected_machine_no_default_poweron.sql @@ -1 +1 @@ -INSERT INTO expected_machines (bmc_mac_address, serial_number, bmc_username, bmc_password, metadata_name, metadata_description, metadata_labels, default_pause_ingestion_and_poweron) VALUES ('6a:6b:6c:6d:6e:6f', 'VVG121GL', 'ADMIN', 'Pwd2023x0x0x0x0x7', '', '', '{}', true); +INSERT INTO expected_machines (bmc_mac_address, serial_number, bmc_username, bmc_password, metadata_name, metadata_description, metadata_labels, default_pause_ingestion_and_poweron, dpu_mode) VALUES ('6a:6b:6c:6d:6e:6f', 'VVG121GL', 'ADMIN', 'Pwd2023x0x0x0x0x7', '', '', '{}', true, 'no_dpu'); diff --git a/crates/api/src/tests/instance_allocate.rs b/crates/api/src/tests/instance_allocate.rs index 13615def3c..eb9227a4e2 100644 --- a/crates/api/src/tests/instance_allocate.rs +++ b/crates/api/src/tests/instance_allocate.rs @@ -90,7 +90,6 @@ async fn create_test_env_for_instance_allocation( let env = common::api_fixtures::create_test_env_with_overrides( pool.clone(), TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(site_prefixes), create_network_segments: Some(false), ..Default::default() diff --git a/crates/api/src/tests/machine_creator.rs b/crates/api/src/tests/machine_creator.rs index a385eb4f8c..344b3006f2 100644 --- a/crates/api/src/tests/machine_creator.rs +++ b/crates/api/src/tests/machine_creator.rs @@ -22,7 +22,6 @@ use std::sync::Arc; use carbide_site_explorer::MachineCreator; use carbide_site_explorer::config::SiteExplorerConfig; -use carbide_site_explorer::errors::SiteExplorerError; use carbide_utils::arch::CpuArchitecture; use carbide_uuid::machine::MachineId; use itertools::Itertools; @@ -63,93 +62,6 @@ async fn discover_dpu_bmc_ip( Ok(response.address.parse()?) } -#[crate::sqlx_test] -async fn test_site_explorer_reject_zero_dpu_hosts( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let config = common::api_fixtures::get_config(); - let env = common::api_fixtures::create_test_env_with_overrides( - pool, - TestEnvOverrides::with_config(config), - ) - .await; - - let explorer_config = SiteExplorerConfig { - enabled: Arc::new(true.into()), - explorations_per_run: 2, - concurrent_explorations: 1, - run_interval: std::time::Duration::from_secs(1), - create_machines: Arc::new(true.into()), - create_power_shelves: Arc::new(true.into()), - explore_power_shelves_from_static_ip: Arc::new(true.into()), - power_shelves_created_per_run: 1, - create_switches: Arc::new(true.into()), - switches_created_per_run: 1, - allow_zero_dpu_hosts: false, - ..Default::default() - }; - let machine_creator = MachineCreator::new( - env.pool.clone(), - explorer_config, - env.common_pools.clone(), - None, - env.test_credential_manager.clone(), - ); - - let host_bmc_mac = MacAddress::from_str("a0:88:c2:08:81:98")?; - let response = env - .api - .discover_dhcp( - DhcpDiscovery::builder(host_bmc_mac, "192.0.1.1") - .vendor_string("SomeVendor") - .tonic_request(), - ) - .await - .unwrap() - .into_inner(); - assert!(!response.address.is_empty()); - - let interface_id = response.machine_interface_id; - let mut ifaces = env - .api - .find_interfaces(tonic::Request::new(rpc::forge::InterfaceSearchQuery { - id: Some(interface_id.unwrap()), - ip: None, - })) - .await - .unwrap() - .into_inner(); - - assert_eq!(ifaces.interfaces.len(), 1); - let iface = ifaces.interfaces.remove(0); - let mut addresses = iface.address; - let host_bmc_ip = addresses.remove(0); - - let exploration_report = ExploredManagedHost { - host_bmc_ip: IpAddr::from_str(&host_bmc_ip)?, - dpus: vec![], - }; - - let expected_machine = ExpectedMachine { - id: Some(uuid::Uuid::new_v4()), - bmc_mac_address: host_bmc_mac, - data: ExpectedMachineData::default(), - }; - - let Err(SiteExplorerError::NoDpusInMachine(_)) = machine_creator - .create_managed_host( - &exploration_report, - &mut EndpointExplorationReport::default(), - Some(&expected_machine), - &env.pool, - ) - .await - else { - panic!("explorer.create_managed_host should have failed with a NoDpusInMachine error") - }; - Ok(()) -} - #[crate::sqlx_test] async fn test_site_explorer_creates_managed_host( pool: sqlx::PgPool, diff --git a/crates/api/src/tests/machine_dhcp.rs b/crates/api/src/tests/machine_dhcp.rs index 55c61fd2d2..1544a0ba3e 100644 --- a/crates/api/src/tests/machine_dhcp.rs +++ b/crates/api/src/tests/machine_dhcp.rs @@ -745,7 +745,6 @@ async fn test_dhcp_allows_zero_dpu_host_with_instance( let env = create_test_env_with_overrides( pool, TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), diff --git a/crates/api/src/tests/machine_states.rs b/crates/api/src/tests/machine_states.rs index 0a17eed59f..441cf58b02 100644 --- a/crates/api/src/tests/machine_states.rs +++ b/crates/api/src/tests/machine_states.rs @@ -2450,7 +2450,6 @@ async fn zero_dpu_host_with_instance(pool: sqlx::PgPool) -> (TestEnv, TestManage let env = create_test_env_with_overrides( pool, TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), diff --git a/crates/api/src/tests/set_primary_dpu.rs b/crates/api/src/tests/set_primary_dpu.rs index 49ac48541d..3cdd8d5d7f 100644 --- a/crates/api/src/tests/set_primary_dpu.rs +++ b/crates/api/src/tests/set_primary_dpu.rs @@ -42,7 +42,6 @@ async fn test_set_primary_dpu_rejects_zero_dpu_host( let env = api_fixtures::create_test_env_with_overrides( pool, api_fixtures::TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), diff --git a/crates/api/src/tests/site_explorer.rs b/crates/api/src/tests/site_explorer.rs index 5e78a7c122..dada3f2530 100644 --- a/crates/api/src/tests/site_explorer.rs +++ b/crates/api/src/tests/site_explorer.rs @@ -226,7 +226,6 @@ async fn test_site_explorer_default_pause_ingestion_and_poweron( concurrent_explorations: 1, run_interval: std::time::Duration::from_secs(1), create_machines: Arc::new(true.into()), - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -394,7 +393,6 @@ async fn test_handle_redfish_error_powers_on_machine( concurrent_explorations: 1, run_interval: std::time::Duration::from_secs(1), create_machines: Arc::new(true.into()), - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -484,6 +482,37 @@ async fn test_site_explorer_main(pool: sqlx::PgPool) -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box> { + let env = common::api_fixtures::create_test_env(pool.clone()).await; + + let mut machine = FakeMachine::new( + "AA:AB:AC:AD:AA:11", + "Vendor1", + env.underlay_segment.unwrap(), + ); + machine.discover_dhcp(&env).await?; + + // expected_machine WITHOUT a NoDpu declaration -- the host is + // "expected to have DPUs" by default. + let mut txn = env.pool.begin().await?; + db::expected_machine::create( + &mut txn, + ExpectedMachine { + id: None, + bmc_mac_address: machine.mac, + data: ExpectedMachineData { + serial_number: "host-expected-dpus-but-has-none".to_string(), + ..Default::default() + }, + }, + ) + .await?; + txn.commit().await?; + + // BMC report with no PCIe devices / no chassis -- the gate sees + // zero DPUs. + let endpoint_explorer = Arc::new(MockEndpointExplorer::default()); + endpoint_explorer.insert_endpoint_results(vec![( + machine.ip.parse().unwrap(), + Ok(EndpointExplorationReport { + endpoint_type: EndpointType::Bmc, + vendor: Some(bmc_vendor::BMCVendor::Lenovo), + systems: vec![ComputerSystem { + serial_number: Some("0123456789".to_string()), + ..Default::default() + }], + ..Default::default() + }), + )]); + + let explorer_config = SiteExplorerConfig { + enabled: Arc::new(true.into()), + explorations_per_run: 1, + concurrent_explorations: 1, + run_interval: std::time::Duration::from_secs(1), + create_machines: Arc::new(true.into()), + ..Default::default() + }; + let test_meter = TestMeter::default(); + let explorer = SiteExplorer::new( + env.pool.clone(), + explorer_config, + test_meter.meter(), + endpoint_explorer, + Arc::new(env.config.get_firmware_config()), + env.common_pools.clone(), + env.api.work_lock_manager_handle.clone(), + env.rms_sim.as_rms_client(), + env.test_credential_manager.clone(), + ); + + // First iteration populates `explored_endpoints`; second runs + // `identify_managed_hosts` after preingestion is complete. + explorer.run_single_iteration().await.unwrap(); + let mut txn = env.pool.begin().await?; + db::explored_endpoints::set_preingestion_complete(machine.ip.parse().unwrap(), &mut txn) + .await?; + txn.commit().await?; + explorer.run_single_iteration().await.unwrap(); + + // No managed host should have been identified. + let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?; + assert!( + explored_managed_hosts.is_empty(), + "strict gate should refuse to ingest a zero-DPU host without a `NoDpu` declaration, got {:?}", + explored_managed_hosts, + ); + assert_eq!( + test_meter + .formatted_metric("carbide_site_exploration_identified_managed_hosts_count") + .unwrap(), + "0" + ); + + // The pairing-blocker metric should have ticked for `NoDpuReportedByHost`. + let blocker_metric = test_meter + .formatted_metric("carbide_host_dpu_pairing_blockers_count") + .expect("expected `carbide_host_dpu_pairing_blockers_count` to be emitted"); + assert!( + blocker_metric.contains("no_dpu_reported_by_host"), + "expected pairing-blocker metric to mention `no_dpu_reported_by_host`, got {blocker_metric}", + ); + + Ok(()) +} + +/// Companion to `test_site_explorer_skips_unexpected_zero_dpu_host`: when +/// the operator explicitly declares `dpu_mode = "nic_mode"`, a host whose +/// BMC reports zero usable DPU PCIe devices (because anything that is a +/// BlueField has been stripped as "DPU in NIC mode") should be ingested as +/// a zero-DPU managed host -- the operator has already opted into "treat +/// as zero-DPU" semantics by declaring NicMode. +#[crate::sqlx_test] +async fn test_site_explorer_ingests_nic_mode_host_with_no_observed_dpus( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = common::api_fixtures::create_test_env(pool.clone()).await; + + let mut machine = FakeMachine::new( + "AA:AB:AC:AD:AA:22", + "Vendor1", + env.underlay_segment.unwrap(), + ); + machine.discover_dhcp(&env).await?; + + let mut txn = env.pool.begin().await?; + db::expected_machine::create( + &mut txn, + ExpectedMachine { + id: None, + bmc_mac_address: machine.mac, + data: ExpectedMachineData { + serial_number: "host-nic-mode-no-observed-dpus".to_string(), + dpu_mode: model::expected_machine::DpuMode::NicMode, + ..Default::default() + }, + }, + ) + .await?; + txn.commit().await?; + + let endpoint_explorer = Arc::new(MockEndpointExplorer::default()); + endpoint_explorer.insert_endpoint_results(vec![( + machine.ip.parse().unwrap(), + Ok(EndpointExplorationReport { + endpoint_type: EndpointType::Bmc, + vendor: Some(bmc_vendor::BMCVendor::Lenovo), + systems: vec![ComputerSystem { + serial_number: Some("0123456789".to_string()), + ..Default::default() + }], + ..Default::default() + }), + )]); + + let explorer_config = SiteExplorerConfig { + enabled: Arc::new(true.into()), + explorations_per_run: 1, + concurrent_explorations: 1, + run_interval: std::time::Duration::from_secs(1), + create_machines: Arc::new(true.into()), + ..Default::default() + }; + let test_meter = TestMeter::default(); + let explorer = SiteExplorer::new( + env.pool.clone(), + explorer_config, + test_meter.meter(), + endpoint_explorer, + Arc::new(env.config.get_firmware_config()), + env.common_pools.clone(), + env.api.work_lock_manager_handle.clone(), + env.rms_sim.as_rms_client(), + env.test_credential_manager.clone(), + ); + + explorer.run_single_iteration().await.unwrap(); + let mut txn = env.pool.begin().await?; + db::explored_endpoints::set_preingestion_complete(machine.ip.parse().unwrap(), &mut txn) + .await?; + txn.commit().await?; + explorer.run_single_iteration().await.unwrap(); + + let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?; + assert_eq!( + explored_managed_hosts.len(), + 1, + "NicMode declaration should let the host through the strict gate even with zero observed DPUs", + ); + assert!( + explored_managed_hosts[0].dpus.is_empty(), + "NicMode hosts ingest with an empty `dpus` vector", + ); + + Ok(()) +} + +/// Third member of the zero-DPU triad (alongside the `DpuMode::DpuMode` +/// skip test and the `DpuMode::NicMode` ingest test): a host explicitly +/// declared `dpu_mode = "no_dpu"` ingests as a zero-DPU managed host. The +/// `NoDpu` fast-path in `identify_managed_hosts` short-circuits before any +/// DPU PCIe enumeration, so this holds regardless of what the BMC reports. +#[crate::sqlx_test] +async fn test_site_explorer_ingests_no_dpu_host( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = common::api_fixtures::create_test_env(pool.clone()).await; + + let mut machine = FakeMachine::new( + "AA:AB:AC:AD:AA:33", + "Vendor1", + env.underlay_segment.unwrap(), + ); + machine.discover_dhcp(&env).await?; + + let mut txn = env.pool.begin().await?; + db::expected_machine::create( + &mut txn, + ExpectedMachine { + id: None, + bmc_mac_address: machine.mac, + data: ExpectedMachineData { + serial_number: "host-no-dpu-declared".to_string(), + dpu_mode: model::expected_machine::DpuMode::NoDpu, + ..Default::default() + }, + }, + ) + .await?; + txn.commit().await?; + + let endpoint_explorer = Arc::new(MockEndpointExplorer::default()); + endpoint_explorer.insert_endpoint_results(vec![( + machine.ip.parse().unwrap(), + Ok(EndpointExplorationReport { + endpoint_type: EndpointType::Bmc, + vendor: Some(bmc_vendor::BMCVendor::Lenovo), + systems: vec![ComputerSystem { + serial_number: Some("0123456789".to_string()), + ..Default::default() + }], + ..Default::default() + }), + )]); + + let explorer_config = SiteExplorerConfig { + enabled: Arc::new(true.into()), + explorations_per_run: 1, + concurrent_explorations: 1, + run_interval: std::time::Duration::from_secs(1), + create_machines: Arc::new(true.into()), + ..Default::default() + }; + let test_meter = TestMeter::default(); + let explorer = SiteExplorer::new( + env.pool.clone(), + explorer_config, + test_meter.meter(), + endpoint_explorer, + Arc::new(env.config.get_firmware_config()), + env.common_pools.clone(), + env.api.work_lock_manager_handle.clone(), + env.rms_sim.as_rms_client(), + env.test_credential_manager.clone(), + ); + + explorer.run_single_iteration().await.unwrap(); + let mut txn = env.pool.begin().await?; + db::explored_endpoints::set_preingestion_complete(machine.ip.parse().unwrap(), &mut txn) + .await?; + txn.commit().await?; + explorer.run_single_iteration().await.unwrap(); + + let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?; + assert_eq!( + explored_managed_hosts.len(), + 1, + "NoDpu declaration should ingest the host as zero-DPU", + ); + assert!( + explored_managed_hosts[0].dpus.is_empty(), + "NoDpu hosts ingest with an empty `dpus` vector", + ); + + Ok(()) +} + #[crate::sqlx_test(fixtures("create_expected_machine"))] async fn test_site_explorer_audit_exploration_results( pool: sqlx::PgPool, @@ -1022,7 +1337,6 @@ async fn test_site_explorer_audit_exploration_results( machines_created_per_run: 1, override_target_ip: None, override_target_port: None, - allow_zero_dpu_hosts: false, allow_changing_bmc_proxy: None, bmc_proxy: Arc::default(), reset_rate_limit: chrono::Duration::hours(1), @@ -1902,7 +2216,6 @@ async fn test_site_explorer_new_host_fixture( let env = common::api_fixtures::create_test_env_with_overrides( pool.clone(), TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), @@ -2091,7 +2404,6 @@ async fn test_site_explorer_fixtures_zerodpu_site_explorer_before_host_dhcp( let env = common::api_fixtures::create_test_env_with_overrides( pool.clone(), TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), @@ -2181,7 +2493,6 @@ async fn test_site_explorer_fixtures_zerodpu_dhcp_before_site_explorer( let env = common::api_fixtures::create_test_env_with_overrides( pool.clone(), TestEnvOverrides { - allow_zero_dpu_hosts: Some(true), site_prefixes: Some(vec![ IpNetwork::new( FIXTURE_ADMIN_NETWORK_SEGMENT_GATEWAY.network(), @@ -2335,7 +2646,6 @@ async fn test_site_explorer_unknown_vendor( concurrent_explorations: 1, run_interval: std::time::Duration::from_secs(1), create_machines: Arc::new(true.into()), - allow_zero_dpu_hosts: true, allocate_secondary_vtep_ip: true, create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(true.into()), @@ -3079,7 +3389,6 @@ async fn test_site_explorer_power_shelf_discovery( create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(false.into()), power_shelves_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -3235,7 +3544,6 @@ async fn test_site_explorer_switch_discovery( create_machines: Arc::new(true.into()), create_switches: Arc::new(true.into()), switches_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -3385,7 +3693,6 @@ async fn test_site_explorer_power_shelf_with_expected_config( create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(false.into()), power_shelves_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -3540,7 +3847,6 @@ async fn test_site_explorer_power_shelf_creation_limit( create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(false.into()), power_shelves_created_per_run: 2, // Limit to 2 per run - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -3675,7 +3981,6 @@ async fn test_site_explorer_power_shelf_disabled( create_power_shelves: Arc::new(false.into()), // Disabled explore_power_shelves_from_static_ip: Arc::new(false.into()), power_shelves_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -3779,7 +4084,6 @@ async fn test_site_explorer_power_shelf_error_handling( create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(false.into()), power_shelves_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); @@ -4849,7 +5153,6 @@ async fn test_site_explorer_power_shelf_discovery_with_static_ip( create_power_shelves: Arc::new(true.into()), explore_power_shelves_from_static_ip: Arc::new(true.into()), power_shelves_created_per_run: 1, - allow_zero_dpu_hosts: true, ..Default::default() }; let test_meter = TestMeter::default(); diff --git a/crates/machine-a-tron/src/api_client.rs b/crates/machine-a-tron/src/api_client.rs index ed9cdded05..bd240bded3 100644 --- a/crates/machine-a-tron/src/api_client.rs +++ b/crates/machine-a-tron/src/api_client.rs @@ -501,10 +501,13 @@ impl ApiClient { /// Registers a mock expected machine. Static BMC (`bmc_ip_address`) is left unset here; /// real environments set it through the admin CLI / API when DHCP discovery is not used. + /// `dpu_mode` is the per-host operating mode -- pass `Some(NoDpu)` for zero-DPU mock hosts + /// or `Some(NicMode)` for DPU-in-NIC-mode mock hosts; `None` for normal DPU hosts. pub async fn add_expected_machine( &self, bmc_mac_address: String, chassis_serial_number: String, + dpu_mode: Option, ) -> ClientApiResult<()> { self.0 .add_expected_machine(ExpectedMachine { @@ -524,7 +527,7 @@ impl ApiClient { is_dpf_enabled: Some(true), bmc_ip_address: None, bmc_retain_credentials: None, - dpu_mode: None, + dpu_mode: dpu_mode.map(|m| m as i32), host_lifecycle_profile: None, }) .await diff --git a/crates/machine-a-tron/src/host_machine.rs b/crates/machine-a-tron/src/host_machine.rs index 5e2aded1a6..834d181685 100644 --- a/crates/machine-a-tron/src/host_machine.rs +++ b/crates/machine-a-tron/src/host_machine.rs @@ -573,6 +573,10 @@ impl HostMachineHandle { &self.0.host_info } + pub fn machine_config_section(&self) -> &str { + &self.0.machine_config_section + } + pub fn persisted(&self) -> PersistedHostMachine { let live_state = self.0.live_state.read().unwrap(); PersistedHostMachine { diff --git a/crates/machine-a-tron/src/machine_a_tron.rs b/crates/machine-a-tron/src/machine_a_tron.rs index 340cfa7783..db596616a7 100644 --- a/crates/machine-a-tron/src/machine_a_tron.rs +++ b/crates/machine-a-tron/src/machine_a_tron.rs @@ -99,13 +99,33 @@ impl MachineATron { if self.app_context.app_config.register_expected_machines { for machine in &machines { + // Derive the expected `dpu_mode` from the machine's + // MachineConfig: zero-DPU hosts declare `NoDpu`, hosts + // running their DPUs as NICs declare `NicMode`, everything + // else defers to the absolute default (DpuMode). + // Site-explorer's ingestion gate requires this explicit + // declaration for any host without DPU PCIe devices. + let dpu_mode = self + .app_context + .app_config + .machines + .get(machine.machine_config_section()) + .and_then(|config| { + if config.dpu_per_host_count == 0 { + Some(rpc::forge::DpuMode::NoDpu) + } else if config.dpus_in_nic_mode { + Some(rpc::forge::DpuMode::NicMode) + } else { + None + } + }); // Inform the API that we have finished our reboot (ie. scout is now running) self.app_context .api_client() .add_expected_machine( machine.host_info().bmc_mac_address.to_string(), machine.host_info().serial.clone(), - + dpu_mode, ) .await .inspect_err(|e| { diff --git a/crates/site-explorer/src/config.rs b/crates/site-explorer/src/config.rs index ab295a50c0..5d2f101e7a 100644 --- a/crates/site-explorer/src/config.rs +++ b/crates/site-explorer/src/config.rs @@ -96,13 +96,6 @@ pub struct SiteExplorerConfig { /// This is a debug override and should not be used in production. pub override_target_port: Option, - /// Whether to allow hosts with zero DPUs in site-explorer. This should typically be set to - /// false in production environments where we expect all hosts to have DPUs. When false, if we - /// encounter a host with no DPUs, site-explorer will throw an error for that host (because it - /// should be assumed that there's a bug in detecting the DPUs). - #[serde(default)] - pub allow_zero_dpu_hosts: bool, - /// The host:port to use as a proxy when making BMC calls to all hosts in NICo. This is used /// for integration testing, and for local development with machine-a-tron/bmc-mock. Should not /// be used in production. @@ -206,7 +199,6 @@ impl Default for SiteExplorerConfig { machines_created_per_run: Self::default_machines_created_per_run(), override_target_ip: None, override_target_port: None, - allow_zero_dpu_hosts: false, bmc_proxy: bmc_proxy(None), allow_changing_bmc_proxy: None, reset_rate_limit: Self::default_reset_rate_limit(), diff --git a/crates/site-explorer/src/errors.rs b/crates/site-explorer/src/errors.rs index 72cb0ca5f8..510128977a 100644 --- a/crates/site-explorer/src/errors.rs +++ b/crates/site-explorer/src/errors.rs @@ -15,8 +15,6 @@ * limitations under the License. */ -use std::net::IpAddr; - use db::DatabaseError; use model::errors::ModelError; use model::site_explorer::EndpointExplorationError; @@ -27,8 +25,6 @@ pub enum SiteExplorerError { DatabaseError(#[from] DatabaseError), #[error("Model error: {0}")] ModelError(#[from] ModelError), - #[error("Explored machine at {0} has no DPUs")] - NoDpusInMachine(IpAddr), #[error("{kind} already exists: {id}")] AlreadyFoundError { /// The type of the resource that already exists (e.g. Machine) diff --git a/crates/site-explorer/src/lib.rs b/crates/site-explorer/src/lib.rs index a3c93e65bf..79fa3eae39 100644 --- a/crates/site-explorer/src/lib.rs +++ b/crates/site-explorer/src/lib.rs @@ -1238,10 +1238,10 @@ impl SiteExplorer { ep.address, ); - self.redfish_powercycle( - ep.address, - ) - .await.inspect_err(|err| tracing::warn!("site explorer failed to power cycle host {} to apply DPU mode changes: {err}", ep.address)).ok(); + self.redfish_powercycle(ep.address) + .await + .inspect_err(|err| tracing::warn!("site explorer failed to power cycle host {} to apply DPU mode changes: {err}", ep.address)) + .ok(); } } else { tracing::warn!( @@ -1256,11 +1256,29 @@ impl SiteExplorer { } continue; - } else if !self.config.allow_zero_dpu_hosts { + } else if matches!(host_dpu_mode, DpuMode::DpuMode) { + // Host has no DPU PCIe devices reported by its + // BMC, and the effective `dpu_mode` is the + // default (`DpuMode`) -- i.e. neither per-host + // on `ExpectedMachine.dpu_mode` nor site-wide on + // `[site_explorer] dpu_mode` declared this host + // as zero-DPU. We expect DPUs but found none -- + // probably a misconfiguration or a DPU-discovery + // bug. Skip ingestion this cycle; site-explorer + // will retry on the next iteration, giving the + // operator a chance to either fix the host or + // declare it as `NoDpu`. + // + // (`NoDpu` hosts are handled by the fast-path + // earlier in the loop; `NicMode` hosts fall + // through to the push below with an empty `dpus` + // vector -- the operator already declared + // "treat as zero-DPU.") tracing::warn!( address = %ep.address, exploration_report = ?ep, - "cannot identify managed host because the site explorer does not see any DPUs on this host, and zero-DPU hosts are not allowed by configuration; expected_num_dpus_attached_to_host: {expected_num_dpus_attached_to_host}; dpus_explored_for_host: {dpus_explored_for_host:#?}", + ?host_dpu_mode, + "cannot identify managed host: site explorer sees no DPUs on this host and it isn't declared as `NoDpu`; declare `dpu_mode = \"no_dpu\"` to ingest as zero-DPU", ); metrics.increment_host_dpu_pairing_blocker( PairingBlockerReason::NoDpuReportedByHost, diff --git a/crates/site-explorer/src/machine_creator.rs b/crates/site-explorer/src/machine_creator.rs index 575c368953..e1eb3709e2 100644 --- a/crates/site-explorer/src/machine_creator.rs +++ b/crates/site-explorer/src/machine_creator.rs @@ -148,12 +148,6 @@ impl MachineCreator { // Zero-dpu case: If the explored host had no DPUs, we can create the machine now if managed_host.explored_host.dpus.is_empty() { - if !self.config.allow_zero_dpu_hosts { - let error = - SiteExplorerError::NoDpusInMachine(managed_host.explored_host.host_bmc_ip); - tracing::error!(%error, "Cannot create managed host for explored endpoint with no DPUs: Zero-dpu hosts are disallowed by config"); - return Err(error); - } if let Some(machine_id) = self .create_zero_dpu_machine(&mut txn, &managed_host, report, machine_data) .await? diff --git a/crates/site-explorer/src/metrics.rs b/crates/site-explorer/src/metrics.rs index a0545425bd..c7ea895d99 100644 --- a/crates/site-explorer/src/metrics.rs +++ b/crates/site-explorer/src/metrics.rs @@ -43,7 +43,9 @@ pub enum PairingBlockerReason { HostSystemReportMissing, /// Host's boot MAC not found in any discovered DPU BootInterfaceMacMismatch, - /// Host BMC did not report any DPUs in its PCIE device list + /// Host BMC reports no Bluefield PCIe devices but the host isn't + /// declared as `dpu_mode = "no_dpu"`. We expect DPUs but didn't + /// find any -- likely a misconfiguration or DPU-discovery bug. NoDpuReportedByHost, } diff --git a/dev/deployment/devspace/values.base.yaml b/dev/deployment/devspace/values.base.yaml index 3266bbfb37..f81884cd48 100644 --- a/dev/deployment/devspace/values.base.yaml +++ b/dev/deployment/devspace/values.base.yaml @@ -88,7 +88,6 @@ nico-api: enabled = true create_machines = true allow_changing_bmc_proxy = true - allow_zero_dpu_hosts = true bmc_proxy = "machine-a-tron-bmc-mock.nico-system.svc.cluster.local:1266" run_interval = "10s" machines_created_per_run = 1000 diff --git a/dev/mac-local-dev/carbide-api-config.toml b/dev/mac-local-dev/carbide-api-config.toml index 036de1047a..6bea389f11 100644 --- a/dev/mac-local-dev/carbide-api-config.toml +++ b/dev/mac-local-dev/carbide-api-config.toml @@ -154,7 +154,6 @@ create_machines = true allow_changing_bmc_proxy = true bmc_proxy = ":2000" run_interval = "10s" -allow_zero_dpu_hosts = true [component_manager] nv_switch_backend = "nsm" diff --git a/dev/webdev-env/carbide-api-config.toml b/dev/webdev-env/carbide-api-config.toml index 164a7db17e..238d314c6a 100644 --- a/dev/webdev-env/carbide-api-config.toml +++ b/dev/webdev-env/carbide-api-config.toml @@ -83,4 +83,3 @@ create_machines = false allow_changing_bmc_proxy = false bmc_proxy = ":2000" run_interval = "600s" -allow_zero_dpu_hosts = true