diff --git a/crcr/aws/sweeper.tf b/crcr/aws/sweeper.tf new file mode 100644 index 00000000..bbd83e3a --- /dev/null +++ b/crcr/aws/sweeper.tf @@ -0,0 +1,29 @@ +# Active sweeper: EventBridge fires the callback lambda on a fixed schedule so it +# can scan the Redis ZSET of in-progress jobs and time out any "zombie" jobs whose +# expected-timeout score has elapsed. The callback handler routes on the constant +# payload below to branch into the cleanup logic. + +resource "aws_cloudwatch_event_rule" "sweeper" { + name = "crcr-sweeper-${var.environment}" + description = "Periodic trigger for the cross-repo-ci callback lambda to reap timed-out jobs" + schedule_expression = "rate(${var.sweeper_interval_minutes} minutes)" + tags = local.tags +} + +resource "aws_cloudwatch_event_target" "sweeper" { + rule = aws_cloudwatch_event_rule.sweeper.name + target_id = "crcr-callback-sweeper" + arn = aws_lambda_function.callback.arn + + input = jsonencode({ + source = "crcr.sweeper" + }) +} + +resource "aws_lambda_permission" "sweeper_invoke" { + statement_id = "AllowEventBridgeSweeperInvoke" + function_name = aws_lambda_function.callback.function_name + action = "lambda:InvokeFunction" + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.sweeper.arn +} diff --git a/crcr/aws/variables.tf b/crcr/aws/variables.tf index 4e5405e7..63a8ebda 100644 --- a/crcr/aws/variables.tf +++ b/crcr/aws/variables.tf @@ -66,3 +66,14 @@ variable "oot_status_ttl" { type = number default = 259200 } + +variable "sweeper_interval_minutes" { + description = "How often EventBridge triggers the callback lambda to reap timed-out jobs (minutes)" + type = number + default = 10 + + validation { + condition = var.sweeper_interval_minutes >= 2 + error_message = "sweeper_interval_minutes must be >= 2; more frequent sweeps reap too few zombies to be worthwhile." + } +}