diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml index de8e27e..376f2be 100644 --- a/.github/workflows/go-ci.yml +++ b/.github/workflows/go-ci.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # Get tags to allow build script to get build version + fetch-depth: 0 # Get tags to allow the build script to get a build version - name: Set up Go uses: actions/setup-go@v5 diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index aa02a6a..06f8637 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # Get tags to allow build script to get build version + fetch-depth: 0 # Get tags to allow the build script to get a build version - name: Set up Go uses: actions/setup-go@v5 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index af1b603..4b86aa0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,8 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # Get tags to allow build script to get build version + ref: ${{ github.ref }} # Checkout the specified tag + fetch-depth: 0 # Get tags to allow the build script to get a build version - name: Set up Go uses: actions/setup-go@v5 diff --git a/telemetry-demo/README.md b/telemetry-demo/README.md index 0a619df..55c2731 100644 --- a/telemetry-demo/README.md +++ b/telemetry-demo/README.md @@ -1,14 +1,17 @@ # SCANOSS API Telemetry Demo -This demo provides a complete, pre-configured telemetry stack to visualize SCANOSS API metrics using OpenTelemetry, Prometheus, and Grafana. +This demo provides a complete, pre-configured telemetry stack to visualize SCANOSS API metrics using +OpenTelemetry, Promtail, Prometheus, Loki, Tempo, and Grafana. ## What's Included - -✅ **SCANOSS API** with telemetry pre-enabled -✅ **OpenTelemetry Collector** to receive and export metrics -✅ **Prometheus** to store time-series data -✅ **Grafana** for advanced dashboards -✅ **Test SCANOSS engine** (no real scanner needed) +- ✅ **SCANOSS API** with telemetry pre-enabled +- ✅ **Test SCANOSS engine** (no real scanner needed) +- ✅ **OpenTelemetry Collector** to receive and export metrics/traces +- ✅ **Promtail Collector** to receive and export logs +- ✅ **Prometheus** to store time-series data +- ✅ **Tempo** to store spans/traces data +- ✅ **Loki** to store log data +- ✅ **Grafana** for advanced dashboards ## Quick Start @@ -21,57 +24,103 @@ This demo provides a complete, pre-configured telemetry stack to visualize SCANO 2. **Wait for services to start** (about 30-60 seconds) 3. **Generate metrics by making API requests**: - +Run a continuous scan (every 2 seconds): ```bash - # Scan every 0.5 seconds - let it run to see metrics accumulate - watch -n 0.5 "echo 'file=056f9b95f439d915bd3d81ceee9ccf9a,1234,test.js' | \ + # Scan every 2 seconds - let it run to see metrics accumulate + watch -n 2.0 "echo 'file=056f9b95f439d915bd3d81ceee9ccf9a,1234,test.js' | \ curl -s -X POST 'http://localhost:5443/scan/direct' \ -F 'file=@-;filename=test.wfp'" ``` +Or run a single one-off scan: +```bash +echo 'file=056f9b95f439d915bd3d81ceee9ccf9a,1234,test.js' | curl -s -X POST 'http://localhost:5443/scan/direct' -F 'file=@-;filename=test.wfp' +``` - -## Where to See Metrics - -### Option 1: Prometheus UI (Built-in, Simple) +## Visualisation +The complete observability stack is configured to export metrics, logs, and traces. This can all be visualised in Grafana. +To login to Grafana, please browse the following URL: +- http://localhost:3000 +- No username/password required + +From there, it is possible to explore the metrics, logs, and traces. + +### Where to See Logs +To view logs, please browse the following URL: +1. http://localhost:3000/a/grafana-lokiexplore-app/explore/job/scanoss-api/logs +2. Select `job` from the "Labels" dropdown and either set equal to `scanoss-api` or leave it blank +3. And select the logs from below +4. From here, it is possible to filter and explore the logs. + +### Where to See Traces +To view traces, please browse the following URL: +1. http://localhost:3000/explore +2. Select "Tempo" from the "Outline" dropdown +3. Select one of the following Query types: + 3.1 Select `Query type` "Search" and the time window and the list of Traces below + 3.2 Select "TraceQL" from the Query type and enter a `trace_id` to search for +4. These trace IDs can then be queried in the Loki logs to see detailed information about the trace. + +### Where to See Metrics +#### Option 1: Grafana (Advanced Dashboards) +1. **URL**: http://localhost:3000/a/grafana-metricsdrilldown-app/drilldown +2. Select "prometheus" from the "Data Source" dropdown +3. Type `scanoss` into the "Search metric" field to list all SCANOSS metrics +4. Select an appropriate time window + +#### Option 2: Prometheus UI (Built-in, Simple) - **URL**: http://localhost:9090 - **Usage**: - 1. Click "Graph" tab + 1. Click the "Graph" tab 2. Try queries like: - `scanoss_api_scan_file_count_total` (total files scanned) - `rate(scanoss_api_scan_file_count_total[5m])` (requests per second) -### Option 2: Grafana (Advanced Dashboards) -- **URL**: http://localhost:3000 -- **Login**: admin / admin -- **Setup**: - 1. Connections -> Add New Connections → Prometheus - 2. URL: `http://prometheus:9090` - 3. Create dashboard with your metrics - -## Available Metrics to Query +### Data Sources +If any datasources are not automatically configured, please configure them manually: -Try these queries in Prometheus: +#### Prometheus +- **Setup**: + 1. Connections -> Add New Connections → Prometheus + 2. URL: `http://prometheus:9090` -```promql -# Total API requests over time -scanoss_api_scan_file_count_total +- A list of possible queries can be found in [Available Metrics to Query](#available-metrics-to-query) -# Request rate (requests per second) -rate(scanoss_api_scan_file_count_total[5m]) +#### Loki +- **Setup**: + 1. Connections -> Add New Connections → Loki + 2. URL: `http://loki:3100` -# Files scanned in last hour -increase(scanoss_api_scan_file_count_total[1h]) +#### Tempo +- **Setup**: + 1. Connections -> Add New Connections → Tempo + 2. URL: `http://tempo:3200` -# Total bytes scanned -scanoss_api_scan_file_size_total -# License requests -scanoss_api_license_req_count_total -``` +## Available Metrics to Query +Try these queries in Prometheus: +- Total API requests over time + ```promql + scanoss_api_scan_file_count_total + ``` +- Request rate (requests per second) + ```promql + rate(scanoss_api_scan_file_count_total[5m]) + ``` +- Files scanned in last hour + ```promql + increase(scanoss_api_scan_file_count_total[1h]) + ``` +- Total bytes scanned + ```promql + scanoss_api_scan_file_size_total + ``` +- License requests + ```promql + scanoss_api_license_req_count_total + ``` ## Stop the Demo - ```bash # Stop all services docker compose down @@ -80,17 +129,19 @@ docker compose down docker compose down -v ``` - ## Next Steps - After exploring this demo, configure telemetry for your production API using [TELEMETRY_CONFIG.md](./TELEMETRY_CONFIG.md). ## Files in This Demo -| File | Purpose | -|------|---------| -| `docker-compose.yml` | Orchestrates the complete telemetry stack | -| `otel-collector-config.yml` | Configures OpenTelemetry Collector pipelines | -| `prometheus.yml` | Defines Prometheus scrape targets | -| `config/app-config-demo.json` | API configuration with telemetry enabled | -| `TELEMETRY_CONFIG.md` | Production telemetry configuration guide | +| File | Purpose | +|-------------------------------|----------------------------------------------| +| `docker-compose.yml` | Orchestrates the complete telemetry stack | +| `otel-collector-config.yml` | Configures OpenTelemetry Collector pipelines | +| `promtail-config.yml` | Configures Promtail log Collector pipeline | +| `loki.yaml` | Defines Loki logging setup | +| `prometheus.yml` | Defines Prometheus scrape targets | +| `tempo.yml` | Defines Tempo traces setup | +| `grafana-datasources.yaml` | Data source configuration for Grafana | +| `config/app-config-demo.json` | API configuration with telemetry enabled | +| `TELEMETRY_CONFIG.md` | Production telemetry configuration guide | diff --git a/telemetry-demo/TELEMETRY_CONFIG.md b/telemetry-demo/TELEMETRY_CONFIG.md index 053ecaf..8224d94 100644 --- a/telemetry-demo/TELEMETRY_CONFIG.md +++ b/telemetry-demo/TELEMETRY_CONFIG.md @@ -22,14 +22,12 @@ Edit your service configuration file and change: ```json "Telemetry": { "Enabled": false, // ← Change to true - "ExtraMetrics": false, // ← Change to true "OltpExporter": "localhost:4317" } ``` ### Option 2: Environment Variables ```bash export OTEL_ENABLED=true -export OTEL_EXTRA=true export OTEL_EXPORTER_OLTP=localhost:4317 ``` @@ -47,17 +45,17 @@ OTEL_EXPORTER_OLTP=localhost:4317 The following metrics are exposed by the SCANOSS API (defined in [utils_service.go](https://github.com/scanoss/api.go/blob/main/pkg/service/utils_service.go)): -| Metric Name | Type | Description -|------------|------|------------- -| `scanoss-api.scan.file_count` | Counter | Files received per scan request -| `scanoss-api.scan.file_size` | Counter | Total bytes scanned -| `scanoss-api.contents.req_count` | Counter | File contents requests -| `scanoss-api.license.req_count` | Counter | License details requests -| `scanoss-api.attribution.req_count` | Counter | Attribution requests -| `scanoss-api.scan.req_time` | Histogram | Scan duration (ms) -| `scanoss-api.scan.file_time` | Histogram | Per-file scan time (ms) -| `scanoss-api.scan.req_time_sec` | Histogram | Scan duration (seconds) -| `scanoss-api.scan.file_time_sec` | Histogram | Per-file scan time (seconds) +| Metric Name | Type | Description | +|-------------------------------------|-----------|---------------------------------| +| `scanoss-api.scan.file_count` | Counter | Files received per scan request | +| `scanoss-api.scan.file_size` | Counter | Total bytes scanned | +| `scanoss-api.contents.req_count` | Counter | File contents requests | +| `scanoss-api.license.req_count` | Counter | License details requests | +| `scanoss-api.attribution.req_count` | Counter | Attribution requests | +| `scanoss-api.scan.req_time` | Histogram | Scan duration (ms) | +| `scanoss-api.scan.file_time` | Histogram | Per-file scan time (ms) | +| `scanoss-api.scan.req_time_sec` | Histogram | Scan duration (seconds) | +| `scanoss-api.scan.file_time_sec` | Histogram | Per-file scan time (seconds) | ## Metric Name Translation diff --git a/telemetry-demo/config/app-config-demo.json b/telemetry-demo/config/app-config-demo.json index 788833a..c906f74 100644 --- a/telemetry-demo/config/app-config-demo.json +++ b/telemetry-demo/config/app-config-demo.json @@ -4,20 +4,20 @@ "Port": "5443", "Addr": "", "Debug": true, - "Trace": true, - "Mode": "dev" + "Trace": false, + "Mode": "prod" }, "Logging": { - "DynamicLogging": true, + "DynamicLogging": false, "DynamicPort": "localhost:60085", "OutputPaths": [ - "stderr" + "stderr", "/var/log/scanoss/api/scanoss-api-prod.log" ], "ConfigFile": "" }, "Telemetry": { "Enabled": true, - "ExtraMetrics": true, + "ExtraMetrics": false, "OltpExporter": "otel-collector:4317" }, "Scanning": { diff --git a/telemetry-demo/docker-compose.yml b/telemetry-demo/docker-compose.yml index f4567cf..aa9816a 100644 --- a/telemetry-demo/docker-compose.yml +++ b/telemetry-demo/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: # SCANOSS API with telemetry enabled scanoss-api: @@ -13,6 +11,7 @@ services: volumes: - ./config:/app/config - ../test-support/scanoss.sh:/usr/local/bin/scanoss:ro + - ./logs/scanoss/api:/var/log/scanoss/api:rw command: ["-json-config", "/app/config/app-config-demo.json", "-debug"] depends_on: - otel-collector @@ -32,6 +31,21 @@ services: - "8889:8889" # Prometheus metrics from collector depends_on: - prometheus + - loki + networks: + - monitoring + restart: unless-stopped + + # Collect logs + promtail: + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/promtail-config.yml + volumes: + - ./promtail-config.yml:/etc/promtail/promtail-config.yml:ro + - ./logs/scanoss/api:/var/log/scanoss/api:ro + - promtail-positions:/tmp + depends_on: + - loki networks: - monitoring restart: unless-stopped @@ -46,14 +60,45 @@ services: - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' + - '--web.enable-remote-write-receiver' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--storage.tsdb.retention.time=200h' - '--web.enable-lifecycle' + - '--enable-feature=exemplar-storage' + networks: + - monitoring + restart: unless-stopped + + # ─── Tempo ──────────────── + tempo: + image: grafana/tempo:2.4.1 + container_name: tempo + restart: unless-stopped + volumes: + - ./tempo.yaml:/etc/tempo/tempo.yaml:ro + - tempo_data:/var/tempo + ports: + - "3200:3200" # Tempo HTTP API (queried by Grafana) + - "4317" # internal OTLP gRPC (not exposed externally — collector talks to it) + command: ["-config.file=/etc/tempo/tempo.yaml"] networks: - monitoring + + # ─── Loki ───────────────────── + loki: + image: grafana/loki:2.9.7 + container_name: loki restart: unless-stopped + volumes: + - ./loki.yaml:/etc/loki/loki.yaml:ro + - loki_data:/loki + ports: + - "3100:3100" + command: ["-config.file=/etc/loki/loki.yaml"] + networks: + - monitoring # Grafana - creates graphs from Prometheus data grafana: @@ -61,19 +106,28 @@ services: ports: - "3000:3000" environment: - - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # dev convenience — lock down for prod +# environment: +# - GF_SECURITY_ADMIN_PASSWORD=admin volumes: - grafana_data:/var/lib/grafana + - ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro depends_on: - prometheus + - loki networks: - monitoring restart: unless-stopped +# Networking ───────────────────── networks: monitoring: driver: bridge - +# Volumes ───────────────────── volumes: prometheus_data: - grafana_data: \ No newline at end of file + grafana_data: + loki_data: + tempo_data: + promtail-positions: \ No newline at end of file diff --git a/telemetry-demo/grafana-datasources.yaml b/telemetry-demo/grafana-datasources.yaml new file mode 100644 index 0000000..fc7fa4f --- /dev/null +++ b/telemetry-demo/grafana-datasources.yaml @@ -0,0 +1,52 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: prometheus + url: http://prometheus:9090 + isDefault: true + jsonData: + timeInterval: 15s + exemplarTraceIdDestinations: + - name: trace_id + datasourceUid: tempo + + - name: Tempo + type: tempo + uid: tempo + url: http://tempo:3200 + jsonData: + tracesToLogsV2: + datasourceUid: loki + spanStartTimeShift: "-1m" + spanEndTimeShift: "1m" + filterByTraceID: true + filterBySpanID: false + customQuery: false + tracesToMetrics: + datasourceUid: prometheus + spanStartTimeShift: "-1m" + spanEndTimeShift: "1m" + tags: + - key: service.name + value: service_name + serviceMap: + datasourceUid: prometheus + nodeGraph: + enabled: true + lokiSearch: + datasourceUid: loki + + - name: Loki + type: loki + uid: loki + url: http://loki:3100 + jsonData: + derivedFields: + # Automatically linkify trace_id fields in logs to Tempo + - name: trace_id + matcherRegex: '"trace_id":\s*"(\w+)"' + url: "$${__value.raw}" + datasourceUid: tempo + urlDisplayLabel: "View Trace" diff --git a/telemetry-demo/logs/scanoss/api/.gitignore b/telemetry-demo/logs/scanoss/api/.gitignore new file mode 100644 index 0000000..4ef2717 --- /dev/null +++ b/telemetry-demo/logs/scanoss/api/.gitignore @@ -0,0 +1,2 @@ + +*.log diff --git a/telemetry-demo/loki.yaml b/telemetry-demo/loki.yaml new file mode 100644 index 0000000..fb45d69 --- /dev/null +++ b/telemetry-demo/loki.yaml @@ -0,0 +1,39 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + log_level: info + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 72h # 3 days local; extend for prod + volume_enabled: true + allow_structured_metadata: true + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 diff --git a/telemetry-demo/otel-collector-config.yml b/telemetry-demo/otel-collector-config.yml index ae38fc7..ffbce59 100644 --- a/telemetry-demo/otel-collector-config.yml +++ b/telemetry-demo/otel-collector-config.yml @@ -12,22 +12,24 @@ processors: send_batch_size: 1024 exporters: - # Send metrics to Prometheus + # ── Metrics → Prometheus ─── prometheus: endpoint: "0.0.0.0:8889" - - # Also log to console for debugging debug: verbosity: detailed + # ── Traces → Tempo ─── + otlp/tempo: + endpoint: "tempo:4317" + tls: + insecure: true service: pipelines: traces: receivers: [otlp] processors: [batch] - exporters: [debug] - + exporters: [otlp/tempo, debug] metrics: receivers: [otlp] processors: [batch] - exporters: [prometheus, debug] \ No newline at end of file + exporters: [prometheus, debug] diff --git a/telemetry-demo/promtail-config.yml b/telemetry-demo/promtail-config.yml new file mode 100644 index 0000000..9f24f4e --- /dev/null +++ b/telemetry-demo/promtail-config.yml @@ -0,0 +1,37 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: info + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: scanoss-api + static_configs: + - targets: + - localhost + labels: + job: scanoss-api + __path__: /var/log/scanoss/api/*.log + + pipeline_stages: + - json: + expressions: + level: level + ts: ts + msg: msg + caller: caller + trace_id: trace_id + span_id: span_id + reqId: reqId + - timestamp: + source: ts + format: Unix + - labels: + level: + - output: + source: msg \ No newline at end of file diff --git a/telemetry-demo/tempo.yaml b/telemetry-demo/tempo.yaml new file mode 100644 index 0000000..129fe61 --- /dev/null +++ b/telemetry-demo/tempo.yaml @@ -0,0 +1,43 @@ +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + log_level: info + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +ingester: + max_block_duration: 5m + +compactor: + compaction: + block_retention: 72h # keep traces for 3 days locally; extend for prod + +storage: + trace: + backend: local + local: + path: /var/tempo/blocks + wal: + path: /var/tempo/wal + +metrics_generator: + registry: + external_labels: + source: tempo + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + +#overrides: +# defaults: +# metrics_generator: +# processors: [service-graphs, span-metrics] # generates RED metrics from traces +# remote_write_add_org_id_header: false