diff --git a/.env.splunk-poc.example b/.env.splunk-poc.example new file mode 100644 index 0000000..66fb18a --- /dev/null +++ b/.env.splunk-poc.example @@ -0,0 +1,18 @@ +# Splunk Distribution POC credentials. Copy to .env.splunk-poc (gitignored). +# Locations in Splunk Observability Cloud: +# SPLUNK_ACCESS_TOKEN: Organization > Access Tokens (ingest scope) +# SPLUNK_REALM: visible in the Splunk Observability Cloud URL +# SPLUNK_HEC_TOKEN: Data Management > Connect Data > HTTP Event Collector +# (may not be available on all tiers; see POC README caveat) + +SPLUNK_ACCESS_TOKEN= +SPLUNK_REALM=us1 +SPLUNK_HEC_TOKEN= + +# Redirect otel-demo apps through the Splunk collector. +OTEL_COLLECTOR_HOST=splunk-otel-collector + +# Activate the otel-demo overlay via the base compose's include directive. +INCLUDE_COMPOSE_OTEL_DEMO=docker-compose.otel-demo.yml + +SPLUNK_COLLECTOR_VERSION=latest diff --git a/.gitignore b/.gitignore index 7630b3b..2ab7482 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ charts/*/charts/*.tgz # Terraform plan files *.plan + +# Splunk distribution POC +.env.splunk-poc +docker-compose/splunk-otel-collector/*.local.* diff --git a/docker-compose.splunk-demo.yml b/docker-compose.splunk-demo.yml new file mode 100644 index 0000000..582b6e9 --- /dev/null +++ b/docker-compose.splunk-demo.yml @@ -0,0 +1,40 @@ +# Splunk OTel distribution overlay for the side-by-side POC. +# See docker-compose/splunk-otel-collector/README.md to run. + +services: + splunk-otel-collector: + image: quay.io/signalfx/splunk-otel-collector:${SPLUNK_COLLECTOR_VERSION:-latest} + container_name: splunk-otel-collector + pull_policy: always + command: ["--config=/etc/otelcol-config.yml"] + volumes: + - ./docker-compose/splunk-otel-collector/config.yaml:/etc/otelcol-config.yml + ports: + # Non-default host ports to avoid collision with the base collector's 4317/4318. + - "14317:4317" + - "14318:4318" + - "13133:13133" + networks: + - observability-stack-network + restart: unless-stopped + environment: + - SPLUNK_ACCESS_TOKEN + - SPLUNK_REALM + - SPLUNK_API_URL=https://api.${SPLUNK_REALM}.observability.splunkcloud.com + - SPLUNK_INGEST_URL=https://ingest.${SPLUNK_REALM}.observability.splunkcloud.com + - SPLUNK_HEC_TOKEN + - SPLUNK_HEC_URL=https://ingest.${SPLUNK_REALM}.observability.splunkcloud.com/v1/log + - SPLUNK_LISTEN_INTERFACE=0.0.0.0 + - SPLUNK_MEMORY_TOTAL_MIB=512 + deploy: + resources: + limits: + memory: 768M + depends_on: + otel-collector: + condition: service_started + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:13133"] + interval: 10s + timeout: 5s + retries: 5 diff --git a/docker-compose/splunk-otel-collector/README.md b/docker-compose/splunk-otel-collector/README.md new file mode 100644 index 0000000..d3d8a04 --- /dev/null +++ b/docker-compose/splunk-otel-collector/README.md @@ -0,0 +1,69 @@ +# Splunk OTel Distribution POC + +Local POC. Inserts Splunk's [OpenTelemetry Collector distribution](https://github.com/signalfx/splunk-otel-collector) between the otel-demo apps and the base `otel-collector`. Demo telemetry fans out to Splunk Observability Cloud and OpenSearch at the same time. + +Branch: `feat/splunk-distribution-poc`. Not intended for upstream. + +## Dataflow + +``` +otel-demo apps + │ OTLP (OTEL_COLLECTOR_HOST=splunk-otel-collector) + ▼ +splunk-otel-collector + ├── signalfx exporter → Splunk Infrastructure Monitoring + ├── otlphttp/splunk-apm → Splunk APM (ingest/v2/trace/otlp) + ├── splunk_hec → Splunk Log Observer (see caveats) + └── otlp/tee → base otel-collector (unchanged OpenSearch path) +``` + +## Run + +```bash +cp .env.splunk-poc.example .env.splunk-poc +# fill in SPLUNK_ACCESS_TOKEN, SPLUNK_REALM, SPLUNK_HEC_TOKEN + +# finch compose's --env-file doesn't feed ${VAR} substitution in compose files, only +# container env. Append the POC vars into .env before `up`, restore after. +cat .env.splunk-poc >> .env +finch compose -f docker-compose.yml -f docker-compose.splunk-demo.yml up -d +git checkout -- .env +``` + +Teardown: + +```bash +finch compose -f docker-compose.yml -f docker-compose.splunk-demo.yml down +``` + +## Verify + +Counters on the Splunk collector (send success numbers; non-zero = data flowing): + +```bash +finch run --rm --network observability-stack-network curlimages/curl:latest \ + -s http://splunk-otel-collector:8888/metrics \ + | grep -E '^otelcol_exporter_sent_(spans|metric_points)_total' +``` + +Host-exposed endpoints on `splunk-otel-collector`: + +- `localhost:13133` - health check (200 = ready) +- `localhost:14317` - OTLP gRPC +- `localhost:14318` - OTLP HTTP + +In Splunk Observability Cloud: **APM > Services** shows demo services (frontend, cart, checkout, ad, recommendation, …). **Infrastructure > Metrics Explorer** shows host metadata from `splunk-otel-collector` and app metrics from the demo. + +OpenSearch side is unaffected: Trace Analytics in OSD renders the same demo services via the tee. + +## Caveats from validation + +- **Splunk HEC logs return HTTP 404** on `/v1/log` with the access token reused as HEC token. The `splunk_hec` exporter retries indefinitely without blocking other pipelines. Logs still land in OpenSearch via `otlp/tee`. Cause is likely one of: access token lacks log-ingest scope, trial tier without Log Observer, or a distinct HEC token is required. +- `smartagentreceiver` and `host_metrics` from Splunk's default `agent_config.yaml` are not included. The first is proprietary to Splunk's distribution bundle; the second is meaningless in a container. + +## Reference + +- [Splunk OTel Collector distribution](https://github.com/signalfx/splunk-otel-collector) +- [Splunk default `agent_config.yaml`](https://github.com/signalfx/splunk-otel-collector/blob/main/cmd/otelcol/config/collector/agent_config.yaml) +- [signalfxexporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/signalfxexporter) +- [splunkhecexporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/splunkhecexporter) diff --git a/docker-compose/splunk-otel-collector/config.yaml b/docker-compose/splunk-otel-collector/config.yaml new file mode 100644 index 0000000..543dfba --- /dev/null +++ b/docker-compose/splunk-otel-collector/config.yaml @@ -0,0 +1,98 @@ +# Splunk OpenTelemetry Collector config for the side-by-side POC. +# See docker-compose/splunk-otel-collector/README.md to run. +# +# Based on Splunk's default agent_config.yaml with host-level receivers and +# smartagent/opamp/http_forwarder extensions removed (not viable in-container). +# +# Reference: https://github.com/signalfx/splunk-otel-collector/blob/main/cmd/otelcol/config/collector/agent_config.yaml + +extensions: + health_check: + endpoint: "0.0.0.0:13133" + +receivers: + # OTLP on the same ports as the base collector so demo apps only need OTEL_COLLECTOR_HOST changed. + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + cors: + allowed_origins: + - "http://*" + - "https://*" + +processors: + memory_limiter: + check_interval: 5s + limit_percentage: 80 + spike_limit_percentage: 25 + + batch: + timeout: 10s + send_batch_size: 1024 + + resourcedetection: + detectors: [env, system] + +exporters: + # Splunk APM: OTLP traces to Splunk's trace ingest endpoint. + otlphttp/splunk-apm: + traces_endpoint: "${SPLUNK_INGEST_URL}/v2/trace/otlp" + headers: + "X-SF-Token": "${SPLUNK_ACCESS_TOKEN}" + + # Splunk Infrastructure Monitoring: metrics + events. + signalfx: + access_token: "${SPLUNK_ACCESS_TOKEN}" + api_url: "${SPLUNK_API_URL}" + ingest_url: "${SPLUNK_INGEST_URL}" + sync_host_metadata: true + + # Splunk Log Observer via HEC. Returns 404 in current POC validation; see README caveats. + splunk_hec: + token: "${SPLUNK_HEC_TOKEN}" + endpoint: "${SPLUNK_HEC_URL}" + source: "otel-demo" + sourcetype: "otel" + profiling_data_enabled: false + + # Forward to observability-stack's base otel-collector for the OpenSearch path. + otlp/tee: + endpoint: "otel-collector:4317" + tls: + insecure: true + + debug: + verbosity: basic + +service: + extensions: [health_check] + + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, resourcedetection, batch] + exporters: [otlphttp/splunk-apm, signalfx, otlp/tee] + + metrics: + receivers: [otlp] + processors: [memory_limiter, resourcedetection, batch] + exporters: [signalfx, otlp/tee] + + logs: + receivers: [otlp] + processors: [memory_limiter, resourcedetection, batch] + exporters: [splunk_hec, otlp/tee] + + telemetry: + logs: + level: info + metrics: + readers: + - pull: + exporter: + prometheus: + host: 0.0.0.0 + port: 8888