Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a2a54cc
fix(bootstrap): replace em dash with ASCII in SG description
chaodu-agent Jun 20, 2026
010635f
ci(oabctl): add Rust build cache
chaodu-agent Jun 20, 2026
0d0f35b
fix(create): use GHCR image URIs and add all backends
chaodu-agent Jun 20, 2026
9021bdf
feat(create): add CPU/memory sizing selection with Fargate validation
chaodu-agent Jun 20, 2026
46eda53
refactor(create): always create dedicated SG, remove selection
chaodu-agent Jun 20, 2026
017e36a
fix(apply): download S3 config at container start via command override
chaodu-agent Jun 20, 2026
e9595c3
fix(apply): inject config as base64 env var instead of S3 download
chaodu-agent Jun 20, 2026
130746a
fix(apply): add awslogs log configuration to container
chaodu-agent Jun 20, 2026
01daf63
fix(apply): fix log config build errors
chaodu-agent Jun 20, 2026
bfb0f7f
fix(apply): add execution/task role ARNs and runtime platform
chaodu-agent Jun 20, 2026
92cdc64
fix(apply): mkdir -p /etc/openab before writing config
chaodu-agent Jun 20, 2026
23dce45
fix(apply): use $HOME/.config/openab for config (non-root containers)
chaodu-agent Jun 20, 2026
20e8285
feat(apply): enable ECS Exec + use ecsctl wait_for_stable
chaodu-agent Jun 20, 2026
653a12c
feat(apply): register ecsctl alias for oabctl exec <name>
chaodu-agent Jun 20, 2026
56f65a1
feat(exec): resolve agent name directly from ECS, no ecsctl aliases
chaodu-agent Jun 20, 2026
6b1ff06
fix: default cluster to 'oab', restore Exec command struct
chaodu-agent Jun 20, 2026
406287d
fix(create): reuse existing SG if duplicate
chaodu-agent Jun 20, 2026
bc7d99c
fix: allow clippy too_many_arguments on generate_manifest
chaodu-agent Jun 20, 2026
b325bed
fix(review): address PR review findings
chaodu-agent Jun 20, 2026
7b2d4aa
fix(review): add ProvideErrorMetadata import, document arch and role …
Jun 20, 2026
97d1875
fix(review): address PR review findings
Jun 22, 2026
7892b32
fix(operator): add [workspace] to isolate from root workspace
Jun 23, 2026
ce49f80
fix(operator): use config-based cluster name and consistent service p…
Jun 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build-oabctl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ jobs:
with:
targets: ${{ inputs.target == 'linux-aarch64' && 'aarch64-unknown-linux-gnu' || '' }}

- uses: Swatinem/rust-cache@v2
with:
workspaces: operator

- name: Install cross-compilation tools
if: inputs.target == 'linux-aarch64'
run: |
Expand Down
3 changes: 3 additions & 0 deletions operator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,8 @@ serde_yaml = "0.9"
tokio = { version = "1.40", features = ["full"] }
toml = "0.8"
anyhow = "1.0"
base64 = "0.22"
dirs = "6"
rpassword = "7"

[workspace]
117 changes: 89 additions & 28 deletions operator/src/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::manifest::{OABFleetManifest, OABServiceManifest, RawManifest, Runtime
use anyhow::{Context, Result};
use aws_sdk_ecs::types::{
AssignPublicIp, AwsVpcConfiguration, CapacityProviderStrategyItem, ContainerDefinition,
KeyValuePair, NetworkConfiguration, Secret,
KeyValuePair, LogConfiguration, LogDriver, NetworkConfiguration, Secret,
};
use aws_sdk_s3::primitives::ByteStream;
use std::path::Path;
Expand Down Expand Up @@ -124,6 +124,9 @@ async fn apply_ecs(
};

let service_name = m.ecs_service_name();
let cluster = crate::config::OabConfig::load()
.map(|c| c.defaults.cluster)
.unwrap_or_else(|_| "oab".to_string());
let bucket = if let Some(b) = crate::config::OabConfig::load().ok().and_then(|c| c.bucket()) {
b
} else {
Expand Down Expand Up @@ -164,13 +167,30 @@ async fn apply_ecs(
KeyValuePair::builder().name("NAMESPACE").value(&m.metadata.namespace).build(),
KeyValuePair::builder().name("NAME").value(&m.metadata.name).build(),
];
if !m.spec.config_from.is_empty() {
env_vars.push(KeyValuePair::builder().name("CONFIG_S3_PATH").value(&m.spec.config_from).build());
}
if let Some(ref bootstrap) = m.spec.bootstrap_from {
env_vars.push(KeyValuePair::builder().name("BOOTSTRAP_FROM").value(bootstrap).build());
}

// Read and embed config.toml as base64 env var
let has_config = !m.spec.config_from.is_empty();
if has_config {
// Resolve config content: if S3 path, download; otherwise treat as local
let config_content = if let Some(s3_path) = m.spec.config_from.strip_prefix("s3://") {
let (bucket, key) = s3_path.split_once('/').context("invalid configFrom S3 URI")?;
let resp = s3.get_object().bucket(bucket).key(key).send().await
.context("failed to download config from S3")?;
resp.body.collect().await?.into_bytes().to_vec()
} else {
std::fs::read(&m.spec.config_from).context("failed to read local config file")?
};
use base64::Engine;
let b64 = base64::engine::general_purpose::STANDARD.encode(&config_content);
if b64.len() > 8192 {
anyhow::bail!("config.toml too large for env injection ({} bytes encoded, max 8192). Use S3 sidecar pattern instead.", b64.len());
}
env_vars.push(KeyValuePair::builder().name("CONFIG_B64").value(&b64).build());
}

// 3. Build secrets from map
let secrets: Vec<Secret> = m
.spec
Expand All @@ -182,22 +202,60 @@ async fn apply_ecs(
.collect();

// 4. Register task definition
let container = ContainerDefinition::builder()
let log_config = LogConfiguration::builder()
.log_driver(LogDriver::Awslogs)
.options("awslogs-group", "/oab/agents")
.options("awslogs-region", config.region().map(|r| r.as_ref()).unwrap_or("us-east-1"))
.options("awslogs-stream-prefix", &service_name)
.build()
.context("failed to build log configuration")?;

let mut container_builder = ContainerDefinition::builder()
.name("openab")
.image(&m.spec.image)
.essential(true)
.set_environment(Some(env_vars))
.set_secrets(if secrets.is_empty() { None } else { Some(secrets) })
.build();
.log_configuration(log_config);

if has_config {
container_builder = container_builder
.entry_point("sh")
.entry_point("-c")
.command("mkdir -p $HOME/.config/openab && echo $CONFIG_B64 | base64 -d > $HOME/.config/openab/config.toml && exec openab run -c $HOME/.config/openab/config.toml");
}

let container = container_builder.build();

// Resolve account ID for role ARNs
// NOTE: Role names must match those created by `oabctl bootstrap`
let sts = aws_sdk_sts::Client::new(config);
let account_id = sts.get_caller_identity().send().await?
.account().unwrap_or_default().to_string();
let execution_role = format!("arn:aws:iam::{account_id}:role/oab-task-execution");
let task_role = format!("arn:aws:iam::{account_id}:role/oab-task-role");

let iam = aws_sdk_iam::Client::new(config);
check_iam_role(&iam, "oab-task-execution").await?;
check_iam_role(&iam, "oab-task-role").await?;

let task_def = ecs
.register_task_definition()
.family(&service_name)
.execution_role_arn(&execution_role)
.task_role_arn(&task_role)
.requires_compatibilities(aws_sdk_ecs::types::Compatibility::Fargate)
.network_mode(aws_sdk_ecs::types::NetworkMode::Awsvpc)
.cpu(&m.spec.resources.cpu)
.memory(&m.spec.resources.memory)
.container_definitions(container)
.runtime_platform(
aws_sdk_ecs::types::RuntimePlatform::builder()
.operating_system_family(aws_sdk_ecs::types::OsFamily::Linux)
// TODO: make configurable via manifest spec.resources.arch for ARM64/Graviton
.cpu_architecture(aws_sdk_ecs::types::CpuArchitecture::X8664)
.build()
)
.send()
.await
.context("failed to register task definition")?;
Expand Down Expand Up @@ -228,7 +286,7 @@ async fn apply_ecs(
// Check if service exists
let existing = ecs
.describe_services()
.cluster("oab")
.cluster(&cluster)
.services(&service_name)
.send()
.await;
Expand All @@ -241,10 +299,11 @@ async fn apply_ecs(

if service_active {
ecs.update_service()
.cluster("oab")
.cluster(&cluster)
.service(&service_name)
.task_definition(&task_def_arn)
.network_configuration(network_config)
.enable_execute_command(true)
.send()
.await
.context("failed to update ECS service")?;
Expand All @@ -256,10 +315,11 @@ async fn apply_ecs(
.build()?;

ecs.create_service()
.cluster("oab")
.cluster(&cluster)
.service_name(&service_name)
.task_definition(&task_def_arn)
.desired_count(1)
.enable_execute_command(true)
.capacity_provider_strategy(cap_strategy)
.network_configuration(network_config)
.send()
Expand All @@ -273,30 +333,31 @@ async fn apply_ecs(

if wait {
eprintln!(" ⏳ Waiting for {} to stabilize...", m.metadata.name);
wait_for_stable(ecs, "oab", &service_name).await?;
eprintln!(" ✓ {} is stable", m.metadata.name);
ecsctl::apply::wait_for_stable(ecs, &cluster, &service_name).await?;
}

Ok(())
}

async fn wait_for_stable(ecs: &aws_sdk_ecs::Client, cluster: &str, service: &str) -> Result<()> {
for _ in 0..60 {
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
let resp = ecs.describe_services()
.cluster(cluster)
.services(service)
.send().await?;
if let Some(svc) = resp.services().first() {
let deployments = svc.deployments();
if deployments.len() == 1 {
if let Some(d) = deployments.first() {
if d.running_count() == d.desired_count() && d.rollout_state() == Some(&aws_sdk_ecs::types::DeploymentRolloutState::Completed) {
return Ok(());
}
}
/// Check IAM role existence. If AccessDenied, warn and proceed (caller may only have iam:PassRole).
/// Only fail hard on NoSuchEntity.
async fn check_iam_role(iam: &aws_sdk_iam::Client, role_name: &str) -> Result<()> {
use aws_sdk_iam::error::ProvideErrorMetadata;
match iam.get_role().role_name(role_name).send().await {
Ok(_) => Ok(()),
Err(e) => {
let code = e.as_service_error()
.and_then(|se| se.code())
.unwrap_or_default();
if code == "AccessDenied" || code == "AccessDeniedException" {
eprintln!(" ⚠ Cannot verify role '{}' (AccessDenied) — proceeding anyway", role_name);
Ok(())
} else {
Err(anyhow::anyhow!(
"IAM role '{}' not found — run `oabctl bootstrap` first ({})",
role_name, code
))
}
}
}
anyhow::bail!("timed out waiting for service to stabilize (5 min)")
}
}
2 changes: 1 addition & 1 deletion operator/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ async fn create(config: &aws_config::SdkConfig, imports: ImportOptions) -> Resul
_ => {
let resp = ec2.create_security_group()
.group_name(SG_NAME)
.description("OAB agent containers managed by oabctl bootstrap")
.description("OAB agent containers - managed by oabctl bootstrap")
.vpc_id(&vid)
.send().await
.context("failed to create security group")?;
Expand Down
Loading
Loading