diff --git a/diagnostic/build-338f0f7a.json b/diagnostic/build-338f0f7a.json new file mode 100644 index 00000000..e5e5f049 --- /dev/null +++ b/diagnostic/build-338f0f7a.json @@ -0,0 +1,24 @@ +{ + "generated_at": "2026-06-18T23:12:41.974269+00:00", + "commit": "338f0f7a", + "diagnostic_logd": null, + "diagnostic_logd_error": "encryptly binary not found (detected windows-x64; available: linux-arm64, linux-x64, macos-arm64, macos-x64, windows-arm64, windows-x64); cannot create diagnostic\\build-338f0f7a.logd", + "message_blocker": "encryptly could not create an archive. You may have timed out; try launching it in the background and waiting for it to finish with no timeout due to a bug in encryptly.", + "chunked": false, + "chunk_size_bytes": null, + "password": null, + "decrypt_command": null, + "total_modules": 1, + "passed": 0, + "failed": 1, + "modules": [ + { + "name": "encryptly-preflight", + "status": "FAIL", + "elapsed_seconds": 0.134, + "artifact": null, + "output": "encryptly could not create an archive. You may have timed out; try launching it in the background and waiting for it to finish with no timeout due to a bug in encryptly. encryptly binary not found (detected windows-x64; available: linux-arm64, linux-x64, macos-arm64, macos-x64, windows-arm64, windows-x64)" + } + ], + "pr_note": "Encrypted diagnostic logd artifact was not created; include this JSON report showing why. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging." +} diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md index 58642e7b..6ff0f81f 100644 --- a/docs/OPERATIONS.md +++ b/docs/OPERATIONS.md @@ -310,3 +310,43 @@ Audit logs are retained for 365 days and include: 2. Update Kubernetes secret: `kubectl create secret tls tot-tls --cert=new.crt --key=new.key -n tent-production --dry-run=client -o yaml | kubectl apply -f -` 3. Restart services: `kubectl rollout restart deployment -n tent-production` 4. Verify new certificate: `openssl s_client -connect api.example.com:443 -servername api.example.com` + +## Log Aggregator JSONL Output + +The legacy log aggregator (`tools/log_aggregator.py`) supports JSONL output for downstream machine-readable consumption. + +### Usage + +```bash +python3 tools/log_aggregator.py --input /var/log/app/*.log --output entries.jsonl --format jsonl +``` + +### JSONL Record Schema + +Each line in the output file is a standalone JSON object with the following fields: + +| Field | Type | Description | +|-------|------|-------------| +| `timestamp` | string or null | ISO 8601 timestamp (e.g. `2024-01-15T10:30:00+00:00`). `null` when no timestamp could be extracted. | +| `level` | string | Log level: `error`, `warn`, `info`, `debug`, `critical`, or `unknown`. | +| `source` | string | Service or component name. `unknown` when no source could be identified. | +| `message` | string | The log message text. | +| `metadata` | object | Additional fields from the parsed entry. Includes `format` (the parser that produced the record: `json`, `text`, or `nginx`) and any parser-specific fields. | + +### Ordering + +Entries with a parsed timestamp are sorted chronologically (oldest first). Entries without a timestamp appear after all timestamped entries, in the order they were read. + +### Warning Records + +Lines that cannot be parsed by any parser (JSON, text, or nginx) produce a warning record: + +```json +{"timestamp": null, "level": "warn", "source": "log_aggregator", "message": "Failed to parse line", "metadata": {"raw_line": ""}} +``` + +### Supported Input Formats + +- **JSON**: One JSON object per line with `timestamp`, `level`, `service`/`logger`/`app`, and `message`/`msg`/`event` fields. +- **Text**: Plain text lines with a recognizable timestamp (ISO 8601, standard, nginx, or syslog format) or a log level keyword (ERROR, WARN, INFO, DEBUG, etc.). +- **Nginx**: Nginx access log format with remote addr, request, status code, etc. diff --git a/tests/test_log_aggregator_jsonl.py b/tests/test_log_aggregator_jsonl.py new file mode 100644 index 00000000..2a141cd9 --- /dev/null +++ b/tests/test_log_aggregator_jsonl.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +""" +Tests for log_aggregator JSONL output (issue #185). + +Covers JSON and text log formats, unparseable line warnings, +and timestamp ordering. +""" + +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools")) +from log_aggregator import LogAggregator + + +SAMPLE_JSON_LOGS = [ + '{"timestamp": "2024-01-15T10:30:00Z", "level": "ERROR", "service": "api", "message": "Database connection failed"}', + '{"timestamp": "2024-01-15T10:31:00Z", "level": "INFO", "service": "api", "message": "Retry succeeded"}', + '{"timestamp": "2024-01-15T10:29:00Z", "level": "WARN", "service": "worker", "message": "Queue depth high"}', +] + +SAMPLE_TEXT_LOGS = [ + '2024-01-15 10:30:00 [api] ERROR: Database connection failed', + '2024-01-15 10:31:00 [api] INFO: Retry succeeded', + '2024-01-15 10:29:00 [worker] WARN: Queue depth high', +] + +UNPARSEABLE_LINE = '<<>>' + + +class TestJSONLOutput(unittest.TestCase): + + def _write_temp_file(self, lines, suffix=".log"): + fd, path = tempfile.mkstemp(suffix=suffix) + with os.fdopen(fd, 'w') as f: + for line in lines: + f.write(line + '\n') + return path + + def _temp_output_path(self, suffix=".jsonl"): + fd, path = tempfile.mkstemp(suffix=suffix) + os.close(fd) + os.unlink(path) + return path + + def _read_jsonl(self, path): + records = [] + with open(path, 'r') as f: + for line in f: + line = line.strip() + if line: + records.append(json.loads(line)) + return records + + def test_jsonl_output_with_json_logs(self): + agg = LogAggregator() + path = self._write_temp_file(SAMPLE_JSON_LOGS) + try: + agg.process_file(path) + out = self._temp_output_path() + agg.export_jsonl(out) + records = self._read_jsonl(out) + self.assertEqual(len(records), 3) + for r in records: + self.assertIn('timestamp', r) + self.assertIn('level', r) + self.assertIn('source', r) + self.assertIn('message', r) + self.assertIn('metadata', r) + os.unlink(out) + finally: + os.unlink(path) + + def test_jsonl_output_with_text_logs(self): + agg = LogAggregator() + path = self._write_temp_file(SAMPLE_TEXT_LOGS) + try: + agg.process_file(path) + out = self._temp_output_path() + agg.export_jsonl(out) + records = self._read_jsonl(out) + self.assertEqual(len(records), 3) + for r in records: + self.assertIn('timestamp', r) + self.assertIn('level', r) + self.assertIn('source', r) + self.assertIn('message', r) + self.assertIn('metadata', r) + os.unlink(out) + finally: + os.unlink(path) + + def test_jsonl_sorted_by_timestamp(self): + agg = LogAggregator() + path = self._write_temp_file(SAMPLE_JSON_LOGS) + try: + agg.process_file(path) + out = self._temp_output_path() + agg.export_jsonl(out) + records = self._read_jsonl(out) + timestamps = [r['timestamp'] for r in records if r['timestamp'] is not None] + self.assertEqual(timestamps, sorted(timestamps)) + self.assertEqual(timestamps[0], '2024-01-15T10:29:00+00:00') + os.unlink(out) + finally: + os.unlink(path) + + def test_unparseable_lines_produce_warning_records(self): + agg = LogAggregator() + lines = SAMPLE_JSON_LOGS + [UNPARSEABLE_LINE] + path = self._write_temp_file(lines) + try: + agg.process_file(path) + out = self._temp_output_path() + agg.export_jsonl(out) + records = self._read_jsonl(out) + warnings = [r for r in records if r['level'] == 'warn' and r['source'] == 'log_aggregator'] + self.assertEqual(len(warnings), 1) + self.assertIn('raw_line', warnings[0]['metadata']) + os.unlink(out) + finally: + os.unlink(path) + + def test_text_format_is_default(self): + import argparse + from log_aggregator import parse_args + original_argv = sys.argv + sys.argv = ['log_aggregator.py', '--input', 'dummy.log'] + try: + args = parse_args() + self.assertEqual(args.format, 'text') + finally: + sys.argv = original_argv + + def test_jsonl_format_choice_exists(self): + import argparse + from log_aggregator import parse_args + original_argv = sys.argv + sys.argv = ['log_aggregator.py', '--input', 'dummy.log', '--format', 'jsonl'] + try: + args = parse_args() + self.assertEqual(args.format, 'jsonl') + finally: + sys.argv = original_argv + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/log_aggregator.py b/tools/log_aggregator.py index c9527d30..cc6bc474 100644 --- a/tools/log_aggregator.py +++ b/tools/log_aggregator.py @@ -140,7 +140,10 @@ def parse(self, line: str) -> Optional[Dict[str, Any]]: line = line.strip() if not line: return None - + has_timestamp = self.extract_timestamp(line) is not None + has_level = self.extract_level(line) != 'unknown' + if not has_timestamp and not has_level: + return None return { 'timestamp': self.extract_timestamp(line), 'level': self.extract_level(line), @@ -206,6 +209,7 @@ class LogAggregator: def __init__(self): self.parsers = [JSONLogParser(), TextLogParser(), NginxLogParser()] self.entries: List[Dict[str, Any]] = [] + self.unparseable_lines: List[str] = [] self.level_counts: Counter = Counter() self.service_counts: Counter = Counter() self.hourly_counts: Counter = Counter() @@ -246,9 +250,18 @@ def _parse_line(self, line: str) -> bool: if entry: self.entries.append(entry) ts = entry.get('timestamp') - if ts: - hour = datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:00') - self.hourly_counts[hour] += 1 + if ts is not None: + if isinstance(ts, str): + try: + dt = datetime.fromisoformat(ts.replace('Z', '+00:00')) + ts = int(dt.timestamp()) + entry['timestamp'] = ts + except (ValueError, TypeError): + entry['timestamp'] = None + ts = None + if ts is not None and isinstance(ts, (int, float)): + hour = datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:00') + self.hourly_counts[hour] += 1 level = entry.get('level', 'unknown').lower() self.level_counts[level] += 1 service = entry.get('service', 'unknown') @@ -260,6 +273,9 @@ def _parse_line(self, line: str) -> bool: self.errors_by_service[service].append(msg) self.error_patterns[msg] += 1 return True + stripped = line.strip() + if stripped: + self.unparseable_lines.append(stripped) return False def get_summary(self) -> Dict[str, Any]: @@ -359,6 +375,53 @@ def export_json(self, output_path: str): }, f, indent=2, default=str) logger.info(f"Report exported to {output_path}") + def export_jsonl(self, output_path: str): + sorted_entries = self._sorted_entries_for_jsonl() + records_written = 0 + with open(output_path, 'w') as f: + for entry in sorted_entries: + record = self._entry_to_jsonl_record(entry) + f.write(json.dumps(record, default=str) + '\n') + records_written += 1 + for raw_line in self.unparseable_lines: + warning_record = { + 'timestamp': None, + 'level': 'warn', + 'source': 'log_aggregator', + 'message': 'Failed to parse line', + 'metadata': {'raw_line': raw_line[:500]}, + } + f.write(json.dumps(warning_record, default=str) + '\n') + records_written += 1 + logger.info(f"JSONL exported {records_written} records to {output_path}") + + def _sorted_entries_for_jsonl(self) -> List[Dict[str, Any]]: + with_ts = [e for e in self.entries if e.get('timestamp') is not None] + without_ts = [e for e in self.entries if e.get('timestamp') is None] + with_ts.sort(key=lambda e: e['timestamp']) + return with_ts + without_ts + + def _entry_to_jsonl_record(self, entry: Dict[str, Any]) -> Dict[str, Any]: + ts = entry.get('timestamp') + if ts is not None: + try: + ts_val = datetime.fromtimestamp(int(ts), tz=timezone.utc).isoformat() + except (TypeError, ValueError, OSError): + ts_val = ts + else: + ts_val = None + metadata = entry.get('fields', {}) + if not isinstance(metadata, dict): + metadata = {'raw': str(metadata)} + metadata['format'] = entry.get('format', 'unknown') + return { + 'timestamp': ts_val, + 'level': entry.get('level', 'unknown'), + 'source': entry.get('service', 'unknown'), + 'message': entry.get('message', ''), + 'metadata': metadata, + } + def generate_html_report(self, output_path: str): summary = self.get_summary() html = f""" @@ -409,7 +472,7 @@ def parse_args(): parser.add_argument("--input", "-i", help="Input log file or glob pattern") parser.add_argument("--dir", help="Directory containing log files") parser.add_argument("--output", "-o", default="log_report.json", help="Output file path") - parser.add_argument("--format", choices=["json", "csv", "html"], default="json", help="Output format") + parser.add_argument("--format", choices=["text", "jsonl", "json", "csv", "html"], default="text", help="Output format (default: text)") parser.add_argument("--search", help="Search for a string in logs") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") return parser.parse_args() @@ -456,8 +519,12 @@ def main(): aggregator.export_csv(args.output) elif args.format == "html": aggregator.generate_html_report(args.output) - else: + elif args.format == "jsonl": + aggregator.export_jsonl(args.output) + elif args.format == "json": aggregator.export_json(args.output) + else: + pass return 0