diff --git a/backend/utils.py b/backend/utils.py index b5abca0..10b9c4f 100644 --- a/backend/utils.py +++ b/backend/utils.py @@ -144,18 +144,21 @@ def serialize_value(value: Any) -> Any: Returns: JSON-serializable value """ - if pd.isna(value): - return None - elif isinstance(value, (pd.Timestamp, np.datetime64)): + if isinstance(value, np.ndarray): + return value.tolist() + if isinstance(value, (pd.Timestamp, np.datetime64)): return ts_fmt(value.to_pydatetime()) if hasattr(value, 'to_pydatetime') else str(value) - elif isinstance(value, (np.integer, np.floating)): + if isinstance(value, (np.integer, np.floating)): if np.isnan(value) or np.isinf(value): return None return value.item() - elif isinstance(value, np.ndarray): - return value.tolist() - else: - return value + try: + # Scalars work with pd.isna, but arrays/other objects can raise. + if pd.isna(value): + return None + except (TypeError, ValueError): + pass + return value def clean_dict_for_json(data: dict) -> dict: diff --git a/tests/test_inbox_queue.py b/tests/test_inbox_queue.py new file mode 100644 index 0000000..8f42866 --- /dev/null +++ b/tests/test_inbox_queue.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, patch + +import pytest + +from backend.messaging.base import MessageChannel, MessageEnvelope +from backend.messaging.inbox import InboxQueue + + +def _msg( + message_id: str, + content: str, + *, + sender_id: str = "user-1", + channel: MessageChannel = MessageChannel.TELEGRAM, + seconds: int = 0, +) -> MessageEnvelope: + base = datetime(2026, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + return MessageEnvelope( + message_id=message_id, + channel=channel, + sender_id=sender_id, + content=content, + timestamp=base + timedelta(seconds=seconds), + chat_id="chat-1", + ) + + +@pytest.mark.asyncio +async def test_push_and_pop_all_roundtrip() -> None: + queue = InboxQueue() + assert not queue.has_messages() + + await queue.push(_msg("1", "hello")) + assert queue.has_messages() + + drained = await queue.pop_all() + assert len(drained) == 1 + assert drained[0].content == "hello" + assert not queue.has_messages() + + +@pytest.mark.asyncio +async def test_push_drops_oldest_when_full() -> None: + queue = InboxQueue(maxsize=2) + + await queue.push(_msg("1", "first", sender_id="a")) + await queue.push(_msg("2", "second", sender_id="b")) + await queue.push(_msg("3", "third", sender_id="c")) + + drained = await queue.pop_all() + assert [m.message_id for m in drained] == ["2", "3"] + + +@pytest.mark.asyncio +async def test_pop_all_debounces_burst_from_same_sender() -> None: + queue = InboxQueue() + + await queue.push(_msg("1", "part-1", seconds=0)) + await queue.push(_msg("2", "part-2", seconds=3)) + await queue.push(_msg("3", "separate", seconds=9)) + + drained = await queue.pop_all() + assert len(drained) == 2 + assert drained[0].content == "part-1\npart-2" + assert drained[1].content == "separate" + + +@pytest.mark.asyncio +async def test_pop_all_does_not_merge_across_sender_or_channel() -> None: + queue = InboxQueue() + + await queue.push(_msg("1", "telegram-a", sender_id="same", channel=MessageChannel.TELEGRAM, seconds=0)) + await queue.push(_msg("2", "feishu-a", sender_id="same", channel=MessageChannel.FEISHU, seconds=1)) + await queue.push(_msg("3", "telegram-b", sender_id="other", channel=MessageChannel.TELEGRAM, seconds=2)) + + drained = await queue.pop_all() + assert [m.message_id for m in drained] == ["1", "2", "3"] + + +@pytest.mark.asyncio +async def test_wait_and_pop_all_drains_followup_messages() -> None: + queue = InboxQueue() + await queue.push(_msg("1", "first", sender_id="a")) + await queue.push(_msg("2", "second", sender_id="b")) + + with patch("backend.messaging.inbox.asyncio.sleep", new_callable=AsyncMock) as sleep_mock: + drained = await queue.wait_and_pop_all() + + sleep_mock.assert_awaited_once_with(0.3) + assert [m.message_id for m in drained] == ["1", "2"] + + +@pytest.mark.asyncio +async def test_pop_all_handles_empty_and_single_message() -> None: + queue = InboxQueue() + assert await queue.pop_all() == [] + + msg = _msg("1", "only") + await queue.push(msg) + drained = await queue.pop_all() + assert len(drained) == 1 + assert drained[0] == msg diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..d07fcf0 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from zoneinfo import ZoneInfo + +import numpy as np +import pandas as pd + +from backend import utils +from backend.config import settings + + +def test_ts_fmt_normalizes_to_utc() -> None: + naive = datetime(2026, 1, 2, 3, 4, 5) + aware = datetime(2026, 1, 2, 11, 4, 5, tzinfo=ZoneInfo("Asia/Shanghai")) + + assert utils.ts_fmt(naive) == "2026-01-02T03:04:05" + assert utils.ts_fmt(aware) == "2026-01-02T03:04:05" + + +def test_parse_db_iso_utc_handles_valid_invalid_and_empty() -> None: + assert utils.parse_db_iso_utc(None) is None + assert utils.parse_db_iso_utc("") is None + assert utils.parse_db_iso_utc("not-a-timestamp") is None + + parsed_naive = utils.parse_db_iso_utc("2026-01-02T03:04:05") + assert parsed_naive == datetime(2026, 1, 2, 3, 4, 5, tzinfo=timezone.utc) + + parsed_aware = utils.parse_db_iso_utc("2026-01-02T11:04:05+08:00") + assert parsed_aware == datetime(2026, 1, 2, 3, 4, 5, tzinfo=timezone.utc) + + +def test_app_timezone_uses_config_and_falls_back_to_utc(monkeypatch) -> None: + monkeypatch.setattr(settings, "TIMEZONE", "Asia/Shanghai") + assert utils.app_timezone().key == "Asia/Shanghai" + + monkeypatch.setattr(settings, "TIMEZONE", "Invalid/Timezone") + assert utils.app_timezone().key == "UTC" + + +def test_now_helpers_are_timezone_aware(monkeypatch) -> None: + monkeypatch.setattr(settings, "TIMEZONE", "Europe/London") + + assert utils.now_utc().tzinfo == timezone.utc + assert utils.now_local().tzinfo is not None + + +def test_dataframe_to_json_safe_cleans_non_serializable_values() -> None: + df = pd.DataFrame( + { + "timestamp": pd.to_datetime(["2026-01-02T03:04:05", None], utc=True), + "value": [1.5, np.inf], + "raw": [np.nan, "ok"], + "object_dt": [pd.Timestamp("2026-01-03T04:05:06Z"), pd.NA], + } + ) + + result = utils.dataframe_to_json_safe(df) + + assert result == [ + { + "timestamp": "2026-01-02T03:04:05", + "value": 1.5, + "raw": None, + "object_dt": "2026-01-03T04:05:06", + }, + { + "timestamp": None, + "value": None, + "raw": "ok", + "object_dt": None, + }, + ] + + +def test_dataframe_to_json_safe_empty_dataframe() -> None: + assert utils.dataframe_to_json_safe(pd.DataFrame()) == [] + + +def test_serialize_value_handles_numpy_datetime_scalars_and_arrays() -> None: + assert utils.serialize_value(np.int64(7)) == 7 + assert utils.serialize_value(np.float64(np.inf)) is None + assert utils.serialize_value(np.array([1, 2, 3])) == [1, 2, 3] + assert utils.serialize_value(pd.Timestamp("2026-01-02T03:04:05Z")) == "2026-01-02T03:04:05" + assert utils.serialize_value(np.datetime64("2026-01-02T03:04:05")) == "2026-01-02T03:04:05" + + +def test_clean_dict_for_json_recursively_serializes_nested_data() -> None: + data = { + "top": np.float64(1.25), + "nested": { + "when": pd.Timestamp("2026-01-02T03:04:05Z"), + "missing": np.nan, + }, + "items": [np.int64(2), np.float64(np.nan), np.array([3, 4])], + } + + cleaned = utils.clean_dict_for_json(data) + + assert cleaned == { + "top": 1.25, + "nested": { + "when": "2026-01-02T03:04:05", + "missing": None, + }, + "items": [2, None, [3, 4]], + }