diff --git a/app/api/v1/base.rb b/app/api/v1/base.rb index a7146caa..b8821fc4 100644 --- a/app/api/v1/base.rb +++ b/app/api/v1/base.rb @@ -17,6 +17,7 @@ require 'v1/indexed_resources' require 'v1/indexer' require 'v1/envelope_communities' +require 'v1/workflows' module API module V1 @@ -64,6 +65,8 @@ class Base < Grape::API mount API::V1::Organizations mount API::V1::Publishers end + + mount API::V1::Workflows end end end diff --git a/app/api/v1/workflows.rb b/app/api/v1/workflows.rb new file mode 100644 index 00000000..cbafc9f0 --- /dev/null +++ b/app/api/v1/workflows.rb @@ -0,0 +1,61 @@ +require 'mountable_api' +require 'helpers/shared_helpers' + +module API + module V1 + # Endpoints for operational workflows (called by Argo Workflows) + class Workflows < MountableAPI + mounted do + helpers SharedHelpers + + before do + authenticate! + end + + resource :workflows do + desc 'Indexes all S3 JSON-LD graphs to Elasticsearch. ' \ + 'S3 is treated as the source of truth. ' \ + 'Called by Argo Workflows for orchestration.' + post 'index-all-s3-to-es' do + authorize :workflow, :trigger? + + bucket_name = ENV['ENVELOPE_GRAPHS_BUCKET'] + error!({ error: 'ENVELOPE_GRAPHS_BUCKET not configured' }, 500) unless bucket_name + + es_address = ENV['ELASTICSEARCH_ADDRESS'] + error!({ error: 'ELASTICSEARCH_ADDRESS not configured' }, 500) unless es_address + + s3 = Aws::S3::Resource.new(region: ENV['AWS_REGION'].presence) + bucket = s3.bucket(bucket_name) + + errors = {} + processed = 0 + skipped = 0 + + bucket.objects.each do |object| + next unless object.key.end_with?('.json') + + processed += 1 + + begin + IndexS3GraphToEs.call(object.key) + rescue StandardError => e + errors[object.key] = "#{e.class}: #{e.message}" + end + end + + status_code = errors.empty? ? 200 : 207 + + status status_code + { + message: errors.empty? ? 'Indexing completed successfully' : 'Indexing completed with errors', + processed: processed, + errors_count: errors.size, + errors: errors.first(100).to_h + } + end + end + end + end + end +end diff --git a/app/policies/workflow_policy.rb b/app/policies/workflow_policy.rb new file mode 100644 index 00000000..380810d6 --- /dev/null +++ b/app/policies/workflow_policy.rb @@ -0,0 +1,12 @@ +require_relative 'application_policy' + +# Specifies policies for workflow operations +class WorkflowPolicy < ApplicationPolicy + def trigger? + user.admin? + end + + def show? + user.admin? + end +end diff --git a/app/services/index_s3_graph_to_es.rb b/app/services/index_s3_graph_to_es.rb new file mode 100644 index 00000000..0dd0f912 --- /dev/null +++ b/app/services/index_s3_graph_to_es.rb @@ -0,0 +1,85 @@ +# Indexes a JSON-LD graph from S3 directly to Elasticsearch +# Does not require database access - S3 is the source of truth +class IndexS3GraphToEs + attr_reader :s3_key, :community_name, :ctid + + def initialize(s3_key) + @s3_key = s3_key + parse_s3_key + end + + class << self + def call(s3_key) + new(s3_key).call + end + end + + def call + return unless elasticsearch_address + + client.index( + body: graph_json, + id: ctid, + index: community_name + ) + rescue Elastic::Transport::Transport::Errors::BadRequest => e + raise e unless e.message.include?('Limit of total fields') + + increase_total_fields_limit + retry + end + + private + + def parse_s3_key + # S3 key format: {community_name}/{ctid}.json + parts = s3_key.split('/') + @community_name = parts[0..-2].join('/') + @ctid = parts.last.sub(/\.json\z/i, '') + end + + def graph_content + @graph_content ||= s3_object.get.body.read + end + + def graph_json + @graph_json ||= JSON.parse(graph_content).to_json + end + + def client + @client ||= Elasticsearch::Client.new(host: elasticsearch_address) + end + + def elasticsearch_address + ENV['ELASTICSEARCH_ADDRESS'].presence + end + + def s3_bucket + @s3_bucket ||= s3_resource.bucket(s3_bucket_name) + end + + def s3_bucket_name + ENV['ENVELOPE_GRAPHS_BUCKET'].presence + end + + def s3_object + @s3_object ||= s3_bucket.object(s3_key) + end + + def s3_resource + @s3_resource ||= Aws::S3::Resource.new(region: ENV['AWS_REGION'].presence) + end + + def increase_total_fields_limit + settings = client.indices.get_settings(index: community_name) + + current_limit = settings + .dig(community_name, 'settings', 'index', 'mapping', 'total_fields', 'limit') + .to_i + + client.indices.put_settings( + body: { 'index.mapping.total_fields.limit' => current_limit * 2 }, + index: community_name + ) + end +end diff --git a/lib/tasks/s3.rake b/lib/tasks/s3.rake new file mode 100644 index 00000000..6ae3f5c5 --- /dev/null +++ b/lib/tasks/s3.rake @@ -0,0 +1,77 @@ +namespace :s3 do + desc 'Index all S3 JSON-LD graphs to Elasticsearch (S3 as source of truth)' + task index_all_to_es: :environment do + require 'benchmark' + require 'json' + + bucket_name = ENV['ENVELOPE_GRAPHS_BUCKET'] + abort 'ENVELOPE_GRAPHS_BUCKET environment variable is not set' unless bucket_name + + es_address = ENV['ELASTICSEARCH_ADDRESS'] + abort 'ELASTICSEARCH_ADDRESS environment variable is not set' unless es_address + + $stdout.sync = true + + s3 = Aws::S3::Resource.new(region: ENV['AWS_REGION'].presence) + bucket = s3.bucket(bucket_name) + + errors = {} + processed = 0 + skipped = 0 + + puts "Starting S3 to ES indexing from bucket: #{bucket_name}" + puts "Elasticsearch address: #{es_address}" + puts "Counting objects..." + + # Count total objects for progress reporting + total = bucket.objects.count { |obj| obj.key.end_with?('.json') } + puts "Found #{total} JSON files to index" + puts "Started at #{Time.now.utc}" + + time = Benchmark.measure do + bucket.objects.each do |object| + next unless object.key.end_with?('.json') + + processed += 1 + + begin + IndexS3GraphToEs.call(object.key) + rescue StandardError => e + errors[object.key] = "#{e.class}: #{e.message}" + end + + # Progress every 100 records + if (processed % 100).zero? + puts "Progress: processed=#{processed}/#{total} errors=#{errors.size} skipped=#{skipped}" + end + end + end + + puts time + puts "Finished at #{Time.now.utc} - processed=#{processed}, errors=#{errors.size}" + + # Write errors to file + if errors.any? + File.write('/tmp/s3_index_errors.json', JSON.pretty_generate(errors)) + puts "Wrote /tmp/s3_index_errors.json (#{errors.size} entries)" + + # Upload errors to S3 + begin + error_bucket = ENV['S3_ERRORS_BUCKET'] || bucket_name + error_key = "errors/s3-index-errors-#{Time.now.utc.strftime('%Y%m%dT%H%M%SZ')}.json" + s3_client = Aws::S3::Client.new(region: ENV['AWS_REGION'].presence) + s3_client.put_object( + bucket: error_bucket, + key: error_key, + body: File.open('/tmp/s3_index_errors.json', 'rb') + ) + puts "Uploaded errors to s3://#{error_bucket}/#{error_key}" + rescue StandardError => e + warn "Failed to upload errors to S3: #{e.class}: #{e.message}" + end + + warn "Encountered #{errors.size} errors. Sample: #{errors.to_a.first(5).to_h.inspect}" + exit 1 + end + end +end diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/README.md b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/README.md new file mode 100644 index 00000000..a90e0549 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/README.md @@ -0,0 +1,141 @@ +# Argo Workflows + +These manifests install a minimal Argo Workflows control plane into the shared `credreg-staging` namespace. The controller and server components rely on a shared PostgreSQL database (for example, the RDS modules under `terraform/environments/eks`) for workflow persistence. + +## Components +- `externalsecret.yaml` – syncs the AWS Secrets Manager entry `credreg-argo-workflows` into a Kubernetes Secret named `argo-postgres`. +- `configmap.yaml` – controller configuration that enables Postgres-based persistence; set the host/database here, while credentials come from the synced secret. +- `rbac.yaml` – service accounts plus the RBAC needed by the workflow controller and Argo server. +- `workflow-controller-deployment.yaml` – runs `workflow-controller` with the standard `argoexec` image. +- `argo-server.yaml` – exposes the Argo UI/API inside the cluster on port `2746`. +- `argo-basic-auth-externalsecret.yaml` – syncs the AWS Secrets Manager entry `credreg-argo-basic-auth` (or similar) to supply the base64-encoded `user:password` string for ingress auth. +- `argo-server-ingress.yaml` – optional HTTPS ingress + certificate (via cert-manager + Let's Encrypt) and basic auth for external access to the Argo UI. + +## Before applying +1. **Provision or reference a PostgreSQL instance.** Ensure the desired environment has a reachable database endpoint. +2. **Create the Secrets Manager entry.** Create `credreg-argo-workflows` (or adjust the `remoteRef.key` value) with JSON keys `host`, `port`, `database`, `username`, `password`, `sslmode`. The External Secrets Operator will sync it into the cluster and the controller/server pick them up via env vars. +3. **Update `configmap.yaml`.** Set `persistence.postgresql.host` (and database/table names if they differ) for the target environment. Even though credentials are secret-backed, Argo still requires the host in this config. +4. **Install Argo CRDs.** Apply the upstream CRDs from https://github.com/argoproj/argo-workflows/releases (required only once per cluster) before rolling out these manifests. +5. **Configure DNS if using the ingress.** Update `argo-server-ingress.yaml` with the desired hostname(s) and point the DNS record at the ingress controller's load balancer. + +## Apply order +```bash +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/externalsecret.yaml +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/rbac.yaml +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/configmap.yaml +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/workflow-controller-deployment.yaml +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server.yaml +# Optional ingress / certificate +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-basic-auth-externalsecret.yaml +kubectl apply -f terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server-ingress.yaml +``` + +Once the `argo-postgres` secret is synced and the controller connects to Postgres successfully, `kubectl get wf -n credreg-staging` should show persisted workflows even after pod restarts. + +## Workflow Templates + +### index-s3-to-es + +Indexes all JSON-LD graphs from S3 directly to Elasticsearch. S3 is treated as the source of truth. + +**Architecture:** +``` +Argo Workflow (curl container) + │ + ├──1. POST to Keycloak /token (client credentials grant) + │ → Obtain fresh JWT + │ + └──2. POST /workflows/index-all-s3-to-es + │ + ▼ + Registry API + │ + ├──▶ List S3 bucket objects + │ + └──▶ For each .json file: + └──▶ Index to Elasticsearch +``` + +**Prerequisites - Keycloak Service Account:** + +1. Create a Keycloak client in the `CE-Test` realm: + - **Client ID**: e.g., `argo-workflows` + - **Client authentication**: ON (confidential client) + - **Service accounts roles**: ON + - **Authentication flow**: Only "Service accounts roles" enabled + +2. Assign the admin role to the service account: + - Go to the client → Service Account Roles + - Assign `ROLE_ADMINISTRATOR` from the `RegistryAPI` client + +3. Get the client secret: + - Go to the client → Credentials + - Update the Client Secret + + +**Required configuration:** + +1. **Keycloak Credentials Secret** (`argo-keycloak-credentials`): + - `client_id` – Keycloak client ID + - `client_secret` – Keycloak client secret + +2. **Registry API environment variables** (already in app-configmap): + - `ENVELOPE_GRAPHS_BUCKET` – S3 bucket containing JSON-LD graphs + - `ELASTICSEARCH_ADDRESS` – Elasticsearch endpoint + - `AWS_REGION` – AWS region for S3 access + +**Trigger the workflow:** + +Via Argo CLI: +```bash +argo submit --from workflowtemplate/index-s3-to-es -n credreg-staging +``` + +Via Argo REST API: +```bash +kubectl port-forward -n credreg-staging svc/argo-server 2746:2746 +BEARER=$(kubectl create token argo-server -n credreg-staging) + +curl -sk https://localhost:2746/api/v1/workflows/credreg-staging \ + -H "Authorization: Bearer $BEARER" \ + -H 'Content-Type: application/json' \ + -d '{ + "workflow": { + "metadata": { "generateName": "index-s3-to-es-" }, + "spec": { "workflowTemplateRef": { "name": "index-s3-to-es" } } + } + }' +``` + +Via Argo UI: +1. Navigate to the Argo UI +2. Go to Workflow Templates +3. Select `index-s3-to-es` +4. Click "Submit" + +**Monitor workflow:** +```bash +# List workflows +kubectl get wf -n credreg-staging + +# Watch workflow status +argo watch -n credreg-staging + +# View logs +argo logs -n credreg-staging +``` + +**Workflow parameters:** + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `api-base-url` | `http://main-app.credreg-staging.svc.cluster.local:9292` | Registry API base URL | +| `keycloak-url` | `https://test-ce-kc-002.credentialengine.org/realms/CE-Test/protocol/openid-connect/token` | Keycloak token endpoint | + +Override parameters when submitting: +```bash +argo submit --from workflowtemplate/index-s3-to-es \ + -p api-base-url=http://custom-api:9292 \ + -p keycloak-url=https://other-keycloak/realms/X/protocol/openid-connect/token \ + -n credreg-staging +``` diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/TRIGGER-DUMMY-WORKFLOW.md b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/TRIGGER-DUMMY-WORKFLOW.md new file mode 100644 index 00000000..c2f32a6f --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/TRIGGER-DUMMY-WORKFLOW.md @@ -0,0 +1,79 @@ +# Triggering Argo Workflows via port-forward + curl + +Use this guide when you need to submit a workflow from your workstation without going through the ingress (no basic auth). The flow is: + +1. **Port-forward the Argo server service** + ```bash + kubectl port-forward -n credreg-staging svc/argo-server 2746:2746 + ``` + Leave this running in a separate terminal; it exposes `https://localhost:2746`. + +2. **Mint a service-account token** + ```bash + BEARER=$(kubectl create token argo-server -n credreg-staging) + ``` + Any SA with workflow submit/list permissions works (`argo-server` or `argo-workflow-controller`). + +3. **Create the workflow payload** + ```bash + cat > wf.json <<'EOF' + { + "workflow": { + "apiVersion": "argoproj.io/v1alpha1", + "kind": "Workflow", + "metadata": { "generateName": "rest-test-" }, + "spec": { + "serviceAccountName": "argo-workflow-controller", + "entrypoint": "hello", + "templates": [ + { + "name": "hello", + "container": { + "image": "public.ecr.aws/docker/library/debian:stable-slim", + "command": ["bash", "-c"], + "args": [ + "apt-get update >/dev/null && DEBIAN_FRONTEND=noninteractive apt-get install -y cowsay >/dev/null && /usr/games/cowsay \"hello from REST\"" + ] + } + } + ] + } + } + } + EOF + ``` + +4. **Submit the workflow (cURL)** + ```bash + curl -sk https://localhost:2746/api/v1/workflows/credreg-staging \ + -H "Authorization: Bearer $BEARER" \ + -H 'Content-Type: application/json' \ + -d @wf.json + ``` + A successful response echoes the workflow metadata (UID, status, etc.). + +## Trigger via Postman + +1. Keep the port-forward running: `kubectl port-forward -n credreg-staging svc/argo-server 2746:2746`. +2. Generate a Bearer token: `kubectl create token argo-server -n credreg-staging` (copy the value). +3. In Postman: + - **Method:** `POST` + - **URL:** `https://localhost:2746/api/v1/workflows/credreg-staging` + - **Headers:** + - `Authorization: Bearer ` + - `Content-Type: application/json` + - **Body:** raw JSON from `wf.json` (same payload as above). +4. Disable SSL verification in Postman (Settings → General → “SSL certificate verification” off) or import the Argo server cert so the self-signed TLS passes. +5. Send the request; you should see the workflow metadata returned. Use the same token for subsequent requests until it expires. + +5. **Verify status** + ```bash + kubectl get wf -n credreg-staging + kubectl logs -n credreg-staging wf/ + ``` + +6. **Clean up** + - `kubectl delete wf -n credreg-staging` (optional) + - Stop the `kubectl port-forward` process. + +> Tip: For ad-hoc tests, this approach avoids ingress auth entirely. When you’re ready to call the public endpoint, add the ingress basic-auth header and keep using the Bearer token in parallel. diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-basic-auth-externalsecret.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-basic-auth-externalsecret.yaml new file mode 100644 index 00000000..bd415a38 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-basic-auth-externalsecret.yaml @@ -0,0 +1,18 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: argo-basic-auth + namespace: credreg-staging +spec: + refreshInterval: 1h + secretStoreRef: + name: aws-secret-manager + kind: ClusterSecretStore + target: + name: argo-basic-auth + creationPolicy: Owner + data: + - secretKey: auth + remoteRef: + key: credreg-argo-basic-auth + property: auth diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-keycloak-credentials-secret.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-keycloak-credentials-secret.yaml new file mode 100644 index 00000000..3933afb5 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-keycloak-credentials-secret.yaml @@ -0,0 +1,25 @@ +# Keycloak service account credentials for Argo workflows +# Used to obtain JWT tokens via Client Credentials Grant +# +# Prerequisites: +# 1. Create a Keycloak client with: +# - Client authentication: ON (confidential) +# - Service accounts roles: ON +# - Assign ROLE_ADMINISTRATOR to the service account +# +# 2. Update the values below with your client credentials +# 3. Apply: kubectl apply -f argo-keycloak-credentials-secret.yaml +# +# Alternatively, use External Secrets Operator to sync from AWS Secrets Manager +apiVersion: v1 +kind: Secret +metadata: + name: argo-keycloak-credentials + namespace: credreg-staging + labels: + app: credential-registry + component: argo-workflow +type: Opaque +stringData: + client_id: "[KEYCLOAK_CLIENT_ID]" + client_secret: "[KEYCLOAK_CLIENT_SECRET]" diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server-ingress.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server-ingress.yaml new file mode 100644 index 00000000..caa32520 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server-ingress.yaml @@ -0,0 +1,46 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: argo-server-cert + namespace: credreg-staging +spec: + secretName: argo-server-tls + issuerRef: + kind: ClusterIssuer + name: letsencrypt-prod + dnsNames: + - argo-staging.credentialengineregistry.org +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: argo-server + namespace: credreg-staging + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/auth-type: "basic" + nginx.ingress.kubernetes.io/auth-secret: "argo-basic-auth" + nginx.ingress.kubernetes.io/auth-realm: "Authentication Required" + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + nginx.ingress.kubernetes.io/proxy-send-timeout: "300" + nginx.ingress.kubernetes.io/whitelist-source-range: "71.212.64.155/32,129.224.215.205/32" +spec: + ingressClassName: nginx + tls: + - hosts: + - argo-staging.credentialengineregistry.org + secretName: argo-server-tls + rules: + - host: argo-staging.credentialengineregistry.org + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: argo-server + port: + number: 2746 diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server.yaml new file mode 100644 index 00000000..e1ff3150 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/argo-server.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: argo-server + namespace: credreg-staging + labels: + app.kubernetes.io/name: argo-server + app.kubernetes.io/part-of: argo-workflows +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: argo-server + template: + metadata: + labels: + app.kubernetes.io/name: argo-server + app.kubernetes.io/part-of: argo-workflows + spec: + serviceAccountName: argo-server + containers: + - name: argo-server + image: quay.io/argoproj/argocli:v3.7.7 + imagePullPolicy: IfNotPresent + args: + - server + - --auth-mode + - server + - --namespaced + - --namespace + - credreg-staging + - --configmap + - workflow-controller-configmap + envFrom: + - secretRef: + name: argo-postgres + ports: + - containerPort: 2746 + name: web + livenessProbe: + httpGet: + scheme: HTTPS + path: /healthz + port: web + httpHeaders: + - name: Host + value: localhost + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + scheme: HTTPS + path: /healthz + port: web + httpHeaders: + - name: Host + value: localhost + initialDelaySeconds: 10 + periodSeconds: 15 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: argo-server + namespace: credreg-staging + labels: + app.kubernetes.io/name: argo-server + app.kubernetes.io/part-of: argo-workflows +spec: + selector: + app.kubernetes.io/name: argo-server + type: ClusterIP + ports: + - name: web + port: 2746 + targetPort: web diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/configmap.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/configmap.yaml new file mode 100644 index 00000000..4878162c --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/configmap.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: workflow-controller-configmap + namespace: credreg-staging +data: + config: | + metricsConfig: + enabled: false + secure: true + telemetryConfig: + enabled: false + secure: true + namespace: credreg-staging + persistence: + archive: true + nodeStatusOffload: true + postgresql: + host: argo-workflows-staging.cwdkv5tua6nq.us-east-1.rds.amazonaws.com + port: 5432 + database: argo_workflows + tableName: argo_workflows + sslMode: require + userNameSecret: + name: argo-postgres + key: username + passwordSecret: + name: argo-postgres + key: password diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/externalsecret.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/externalsecret.yaml new file mode 100644 index 00000000..5e782e72 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/externalsecret.yaml @@ -0,0 +1,38 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: argo-postgres + namespace: credreg-staging +spec: + refreshInterval: 1h + secretStoreRef: + name: aws-secret-manager + kind: ClusterSecretStore + target: + name: argo-postgres + creationPolicy: Owner + data: + - secretKey: host + remoteRef: + key: credreg-argo-workflows-staging + property: host + - secretKey: port + remoteRef: + key: credreg-argo-workflows-staging + property: port + - secretKey: database + remoteRef: + key: credreg-argo-workflows-staging + property: database + - secretKey: username + remoteRef: + key: credreg-argo-workflows-staging + property: username + - secretKey: password + remoteRef: + key: credreg-argo-workflows-staging + property: password + - secretKey: sslmode + remoteRef: + key: credreg-argo-workflows-staging + property: sslmode diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/index-s3-to-es-workflow-template.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/index-s3-to-es-workflow-template.yaml new file mode 100644 index 00000000..539f4af2 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/index-s3-to-es-workflow-template.yaml @@ -0,0 +1,109 @@ +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: index-s3-to-es + namespace: credreg-staging + labels: + app: credential-registry +spec: + serviceAccountName: argo-workflow-controller + entrypoint: index-s3-to-es + arguments: + parameters: + - name: api-base-url + value: "http://main-app.credreg-staging.svc.cluster.local:9292" + - name: keycloak-url + value: "https://test-ce-kc-002.credentialengine.org/realms/CE-Test/protocol/openid-connect/token" + templates: + - name: index-s3-to-es + metadata: + labels: + app: credential-registry + workflow: index-s3-to-es + inputs: + parameters: + - name: api-base-url + - name: keycloak-url + container: + image: curlimages/curl:latest + command: + - /bin/sh + - -c + - | + set -e + echo "=== S3 to Elasticsearch Indexing Workflow ===" + echo "Started at: $(date -u)" + echo "" + + # Step 1: Get access token from Keycloak + echo "Step 1: Authenticating with Keycloak..." + TOKEN_RESPONSE=$(curl -sf -X POST "{{inputs.parameters.keycloak-url}}" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials" \ + -d "client_id=${KEYCLOAK_CLIENT_ID}" \ + -d "client_secret=${KEYCLOAK_CLIENT_SECRET}") + + ACCESS_TOKEN=$(echo "$TOKEN_RESPONSE" | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') + + if [ -z "$ACCESS_TOKEN" ]; then + echo "ERROR: Failed to obtain access token from Keycloak" + echo "Response: $TOKEN_RESPONSE" + exit 1 + fi + echo "Successfully obtained access token" + echo "" + + # Step 2: Call Registry API + echo "Step 2: Calling Registry API at {{inputs.parameters.api-base-url}}" + response=$(curl -sf -X POST \ + "{{inputs.parameters.api-base-url}}/workflows/index-all-s3-to-es" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" \ + -w "\n%{http_code}" \ + --max-time 43200) + + # Extract HTTP status code (last line) + http_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | sed '$d') + + echo "" + echo "=== API Response ===" + echo "HTTP Status: $http_code" + echo "Body:" + echo "$body" + echo "" + + # Exit with error if not success (200 or 207) + if [ "$http_code" != "200" ] && [ "$http_code" != "207" ]; then + echo "ERROR: API call failed with status $http_code" + exit 1 + fi + + echo "=== Workflow Completed Successfully ===" + echo "Finished at: $(date -u)" + env: + - name: KEYCLOAK_CLIENT_ID + valueFrom: + secretKeyRef: + name: argo-keycloak-credentials + key: client_id + - name: KEYCLOAK_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: argo-keycloak-credentials + key: client_secret + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "200m" + memory: "256Mi" + retryStrategy: + limit: 2 + retryPolicy: OnFailure + backoff: + duration: "60s" + factor: 2 + maxDuration: "10m" + activeDeadlineSeconds: 43200 diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/rbac.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/rbac.yaml new file mode 100644 index 00000000..e7260839 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/rbac.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo-workflow-controller + namespace: credreg-staging + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/part-of: argo-workflows +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo-server + namespace: credreg-staging + labels: + app.kubernetes.io/component: server + app.kubernetes.io/part-of: argo-workflows +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: argo-workflow-controller + labels: + app.kubernetes.io/part-of: argo-workflows +rules: + - apiGroups: ["argoproj.io"] + resources: ["workflowtasksets", "workflowtasksets/status", "workflowartifactgctasks", "workflows", "workflows/finalizers", "workflows/status", "workflowtemplates", "cronworkflows", "clusterworkflowtemplates", "clusterworkflowtemplates/finalizers", "workflowtaskresults"] + verbs: ["*"] + - apiGroups: [""] + resources: ["configmaps", "persistentvolumeclaims", "pods", "pods/log", "pods/exec", "secrets", "serviceaccounts", "services", "events"] + verbs: ["*"] + - apiGroups: ["apps"] + resources: ["deployments", "replicasets", "statefulsets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "create", "update", "patch"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "delete", "get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argo-workflow-controller + labels: + app.kubernetes.io/part-of: argo-workflows +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argo-workflow-controller +subjects: + - kind: ServiceAccount + name: argo-workflow-controller + namespace: credreg-staging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: argo-server + namespace: credreg-staging +rules: + - apiGroups: ["argoproj.io"] + resources: ["workflows", "workflowtemplates", "cronworkflows"] + verbs: ["*"] + - apiGroups: [""] + resources: ["configmaps", "secrets", "pods", "pods/log", "services"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: argo-server + namespace: credreg-staging +subjects: + - kind: ServiceAccount + name: argo-server + namespace: credreg-staging +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: argo-server diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/wf.json b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/wf.json new file mode 100644 index 00000000..11fc609d --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/wf.json @@ -0,0 +1,28 @@ +{ + "workflow": { + "apiVersion": "argoproj.io/v1alpha1", + "kind": "Workflow", + "metadata": { + "generateName": "rest-test-" + }, + "spec": { + "serviceAccountName": "argo-workflow-controller", + "entrypoint": "hello", + "templates": [ + { + "name": "hello", + "container": { + "image": "public.ecr.aws/docker/library/debian:stable-slim", + "command": [ + "bash", + "-c" + ], + "args": [ + "apt-get update >/dev/null && DEBIAN_FRONTEND=noninteractive apt-get install -y cowsay >/dev/null && /usr/games/cowsay \"hello from REST\"" + ] + } + } + ] + } + } +} diff --git a/terraform/environments/eks/k8s-manifests-staging/argo-workflow/workflow-controller-deployment.yaml b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/workflow-controller-deployment.yaml new file mode 100644 index 00000000..127fb2e3 --- /dev/null +++ b/terraform/environments/eks/k8s-manifests-staging/argo-workflow/workflow-controller-deployment.yaml @@ -0,0 +1,90 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: workflow-controller + namespace: credreg-staging + labels: + app.kubernetes.io/name: workflow-controller + app.kubernetes.io/part-of: argo-workflows +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: workflow-controller + template: + metadata: + labels: + app.kubernetes.io/name: workflow-controller + app.kubernetes.io/part-of: argo-workflows + spec: + serviceAccountName: argo-workflow-controller + containers: + - name: workflow-controller + image: quay.io/argoproj/workflow-controller:v3.7.7 + imagePullPolicy: IfNotPresent + args: + - --configmap + - workflow-controller-configmap + - --executor-image + - quay.io/argoproj/argoexec:v3.7.7 + env: + - name: LEADER_ELECTION_IDENTITY + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: ARGO_POSTGRES_HOST + valueFrom: + secretKeyRef: + name: argo-postgres + key: host + - name: ARGO_POSTGRES_PORT + valueFrom: + secretKeyRef: + name: argo-postgres + key: port + - name: ARGO_POSTGRES_DB + valueFrom: + secretKeyRef: + name: argo-postgres + key: database + - name: ARGO_POSTGRES_USERNAME + valueFrom: + secretKeyRef: + name: argo-postgres + key: username + - name: ARGO_POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: argo-postgres + key: password + - name: ARGO_POSTGRES_SSLMODE + valueFrom: + secretKeyRef: + name: argo-postgres + key: sslmode + ports: + - containerPort: 9090 + name: metrics + livenessProbe: + httpGet: + port: 6060 + path: /healthz + failureThreshold: 3 + initialDelaySeconds: 90 + periodSeconds: 60 + timeoutSeconds: 30 + readinessProbe: + httpGet: + port: 6060 + path: /healthz + failureThreshold: 3 + initialDelaySeconds: 90 + periodSeconds: 60 + timeoutSeconds: 30 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi