From 6d2c4a80b54aec061070ca18a01b893cc175ccab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 10:45:37 +0000 Subject: [PATCH 01/21] Add new column --- synapse/_scripts/synapse_port_db.py | 2 +- synapse/storage/schema/__init__.py | 5 ++++- .../storage/schema/main/delta/94/01_redactions_recheck.sql | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/schema/main/delta/94/01_redactions_recheck.sql diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 79b2a0c528e..eedceb170e9 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -122,7 +122,7 @@ "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], "pushers": ["enabled"], - "redactions": ["have_censored"], + "redactions": ["have_censored", "recheck"], "remote_media_cache": ["authenticated"], "room_memberships": ["participant"], "room_stats_state": ["is_federatable"], diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index c4c4d7bcc4a..e3095a9d0d0 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -19,7 +19,7 @@ # # -SCHEMA_VERSION = 93 # remember to update the list below when updating +SCHEMA_VERSION = 94 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -171,6 +171,9 @@ Changes in SCHEMA_VERSION = 93 - MSC4140: Set delayed events to be uniquely identifiable by their delay ID. + +Changes in SCHEMA_VERSION = 94 + - Add `recheck` column (boolean, default true) to the `redactions` table. """ diff --git a/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql new file mode 100644 index 00000000000..12c89b72790 --- /dev/null +++ b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql @@ -0,0 +1 @@ +ALTER TABLE redactions ADD COLUMN recheck boolean NOT NULL DEFAULT true; From be16f976c17e5dd25a56c262d82764307c3cd151 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 10:47:44 +0000 Subject: [PATCH 02/21] Fill out new recheck field --- synapse/storage/databases/main/events.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cb452dbc9b1..941a5f9f3a3 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2944,6 +2944,7 @@ def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None: values={ "redacts": event.redacts, "received_ts": self._clock.time_msec(), + "recheck": event.internal_metadata.need_to_check_redaction(), }, ) From feec319c5994f5338e9582a76b1975f4ec9d9f24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 11:11:05 +0000 Subject: [PATCH 03/21] Add background update --- .../databases/main/events_bg_updates.py | 62 ++++++++ .../94/02_redactions_recheck_bg_update.sql | 15 ++ synapse/types/storage/__init__.py | 2 + tests/storage/test_events_bg_updates.py | 133 ++++++++++++++++++ 4 files changed, 212 insertions(+) create mode 100644 synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index f8300e016b1..44eb1a33842 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -159,6 +159,11 @@ def __init__( "redactions_received_ts", self._redactions_received_ts ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + self._redactions_recheck_bg_update, + ) + # This index gets deleted in `event_fix_redactions_bytes` update self.db_pool.updates.register_background_index_update( "event_fix_redactions_bytes_create_index", @@ -747,6 +752,63 @@ def _redactions_received_ts_txn(txn: LoggingTransaction) -> int: return count + async def _redactions_recheck_bg_update( + self, progress: JsonDict, batch_size: int + ) -> int: + """Fills in the `recheck` column of the `redactions` table based on + the `recheck_redaction` field in each event's internal metadata.""" + last_event_id = progress.get("last_event_id", "") + + def _txn(txn: LoggingTransaction) -> int: + sql = """ + SELECT r.event_id, ej.internal_metadata + FROM redactions AS r + LEFT JOIN event_json AS ej USING (event_id) + WHERE r.event_id > ? + ORDER BY r.event_id ASC + LIMIT ? + """ + txn.execute(sql, (last_event_id, batch_size)) + rows = txn.fetchall() + if not rows: + return 0 + + updates = [] + for event_id, internal_metadata_json in rows: + if internal_metadata_json is not None: + internal_metadata = db_to_json(internal_metadata_json) + recheck = bool(internal_metadata.get("recheck_redaction", False)) + else: + recheck = False + updates.append((event_id, recheck)) + + self.db_pool.simple_update_many_txn( + txn, + table="redactions", + key_names=("event_id",), + key_values=[(event_id,) for event_id, _ in updates], + value_names=("recheck",), + value_values=[(recheck,) for _, recheck in updates], + ) + + upper_event_id = rows[-1][0] + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + {"last_event_id": upper_event_id}, + ) + + return len(rows) + + count = await self.db_pool.runInteraction("_redactions_recheck_bg_update", _txn) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE + ) + + return count + async def _event_fix_redactions_bytes( self, progress: JsonDict, batch_size: int ) -> int: diff --git a/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql new file mode 100644 index 00000000000..2ddbbb41402 --- /dev/null +++ b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql @@ -0,0 +1,15 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2026 New Vector, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (9402, 'redactions_recheck', '{}'); diff --git a/synapse/types/storage/__init__.py b/synapse/types/storage/__init__.py index b01653246a9..992c36cabad 100644 --- a/synapse/types/storage/__init__.py +++ b/synapse/types/storage/__init__.py @@ -64,3 +64,5 @@ class _BackgroundUpdates: ) FIXUP_MAX_DEPTH_CAP = "fixup_max_depth_cap" + + REDACTIONS_RECHECK_BG_UPDATE = "redactions_recheck" diff --git a/tests/storage/test_events_bg_updates.py b/tests/storage/test_events_bg_updates.py index d1a794c5a18..a5b53de77f2 100644 --- a/tests/storage/test_events_bg_updates.py +++ b/tests/storage/test_events_bg_updates.py @@ -14,11 +14,14 @@ # +from canonicaljson import encode_canonical_json + from twisted.internet.testing import MemoryReactor from synapse.api.constants import MAX_DEPTH from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.server import HomeServer +from synapse.types.storage import _BackgroundUpdates from synapse.util.clock import Clock from tests.unittest import HomeserverTestCase @@ -154,3 +157,133 @@ def test_fixup_max_depth_cap_bg_update_old_room_version(self) -> None: # Assert that the topological_ordering of events has not been changed # from their depth. self.assertDictEqual(event_id_to_depth, dict(rows)) + + +class TestRedactionsRecheckBgUpdate(HomeserverTestCase): + """Test the background update that backfills the `recheck` column in redactions.""" + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.store = self.hs.get_datastores().main + self.db_pool = self.store.db_pool + + # Re-insert the background update, since it already ran during setup. + self.get_success( + self.db_pool.simple_insert( + table="background_updates", + values={ + "update_name": _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.db_pool.updates._all_done = False + + def _insert_redaction( + self, + event_id: str, + redacts: str, + recheck_redaction: bool | None = None, + insert_event_json: bool = True, + ) -> None: + """Insert a row into `redactions` and optionally a matching `event_json` row. + + Args: + event_id: The event ID of the redaction event. + redacts: The event ID being redacted. + recheck_redaction: The value of `recheck_redaction` in internal metadata. + If None, the key is omitted from internal metadata. + insert_event_json: Whether to insert a corresponding row in `event_json`. + """ + self.get_success( + self.db_pool.simple_insert( + table="redactions", + values={ + "event_id": event_id, + "redacts": redacts, + "have_censored": False, + "received_ts": 0, + }, + ) + ) + + if insert_event_json: + internal_metadata: dict = {} + if recheck_redaction is not None: + internal_metadata["recheck_redaction"] = recheck_redaction + + self.get_success( + self.db_pool.simple_insert( + table="event_json", + values={ + "event_id": event_id, + "room_id": "!room:test", + "internal_metadata": encode_canonical_json( + internal_metadata + ).decode("utf-8"), + "json": "{}", + "format_version": 3, + }, + ) + ) + + def _get_recheck(self, event_id: str) -> bool: + row = self.get_success( + self.db_pool.simple_select_one( + table="redactions", + keyvalues={"event_id": event_id}, + retcols=["recheck"], + ) + ) + return bool(row[0]) + + def test_recheck_true(self) -> None: + """A redaction with recheck_redaction=True in internal metadata gets recheck=True.""" + self._insert_redaction("$redact1:test", "$target1:test", recheck_redaction=True) + + self.wait_for_background_updates() + + self.assertTrue(self._get_recheck("$redact1:test")) + + def test_recheck_false(self) -> None: + """A redaction with recheck_redaction=False in internal metadata gets recheck=False.""" + self._insert_redaction( + "$redact2:test", "$target2:test", recheck_redaction=False + ) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact2:test")) + + def test_recheck_absent_from_metadata(self) -> None: + """A redaction with no recheck_redaction key in internal metadata gets recheck=False.""" + self._insert_redaction("$redact3:test", "$target3:test", recheck_redaction=None) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact3:test")) + + def test_recheck_no_event_json(self) -> None: + """A redaction with no event_json row gets recheck=False.""" + self._insert_redaction( + "$redact4:test", "$target4:test", insert_event_json=False + ) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact4:test")) + + def test_batching(self) -> None: + """The update processes rows in batches, completing when all are done.""" + self._insert_redaction("$redact5:test", "$target5:test", recheck_redaction=True) + self._insert_redaction( + "$redact6:test", "$target6:test", recheck_redaction=False + ) + self._insert_redaction("$redact7:test", "$target7:test", recheck_redaction=True) + + self.wait_for_background_updates() + + self.assertTrue(self._get_recheck("$redact5:test")) + self.assertFalse(self._get_recheck("$redact6:test")) + self.assertTrue(self._get_recheck("$redact7:test")) From e748a3a498e333d50fac7166d706e1a60bd6078f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 14:47:52 +0000 Subject: [PATCH 04/21] Don't fetch confirmed redaction events --- .../storage/databases/main/events_worker.py | 59 ++++++++++++++----- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index cb0764feb8c..6cefc486809 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -179,7 +179,11 @@ class _EventRow: rejected_reason: if the event was rejected, the reason why. - redactions: a list of event-ids which (claim to) redact this event. + unconfirmed_redactions: a list of event-ids which (claim to) redact this event + and need to be rechecked. + + confirmed_redactions: a list of event-ids which redact this event and have been + confirmed as valid redactions. outlier: True if this event is an outlier. """ @@ -192,7 +196,8 @@ class _EventRow: format_version: int | None room_version_id: str | None rejected_reason: str | None - redactions: list[str] + unconfirmed_redactions: list[str] + confirmed_redactions: list[str] outlier: bool @@ -1359,14 +1364,20 @@ async def _fetch_event_ids_and_get_outstanding_redactions( ) row_map = await self._enqueue_events(event_ids_to_fetch) - # we need to recursively fetch any redactions of those events + # we need to recursively fetch redaction events that require + # rechecking, so we can validate them redaction_ids: set[str] = set() for event_id in event_ids_to_fetch: row = row_map.get(event_id) fetched_event_ids.add(event_id) if row: fetched_events[event_id] = row - redaction_ids.update(row.redactions) + + # If this event only has uncofirmend redactions we fetch + # them from the DB so that we check them to see if any are + # valid. + if not row.confirmed_redactions: + redaction_ids.update(row.unconfirmed_redactions) event_ids_to_fetch = redaction_ids.difference(fetched_event_ids) return event_ids_to_fetch @@ -1510,9 +1521,12 @@ async def _fetch_event_ids_and_get_outstanding_redactions( # the cache entries. result_map: dict[str, EventCacheEntry] = {} for event_id, original_ev in event_map.items(): - redactions = fetched_events[event_id].redactions + row = fetched_events[event_id] redacted_event = self._maybe_redact_event_row( - original_ev, redactions, event_map + original_ev, + row.unconfirmed_redactions, + row.confirmed_redactions, + event_map, ) cache_entry = EventCacheEntry( @@ -1606,21 +1620,25 @@ def _fetch_event_rows( format_version=row[5], room_version_id=row[6], rejected_reason=row[7], - redactions=[], + unconfirmed_redactions=[], + confirmed_redactions=[], outlier=bool(row[8]), # This is an int in SQLite3 ) # check for redactions - redactions_sql = "SELECT event_id, redacts FROM redactions WHERE " + redactions_sql = "SELECT event_id, redacts, recheck FROM redactions WHERE " clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs) txn.execute(redactions_sql + clause, args) - for redacter, redacted in txn: + for redacter, redacted, recheck in txn: d = event_dict.get(redacted) if d: - d.redactions.append(redacter) + if recheck: + d.unconfirmed_redactions.append(redacter) + else: + d.confirmed_redactions.append(redacter) # check for MSC4293 redactions to_check = [] @@ -1669,24 +1687,28 @@ def _fetch_event_rows( # backfilled events, as they have a negative stream ordering if e_row.stream_ordering >= redact_end_ordering: continue - e_row.redactions.append(redacting_event_id) + e_row.unconfirmed_redactions.append(redacting_event_id) return event_dict def _maybe_redact_event_row( self, original_ev: EventBase, - redactions: Iterable[str], + unconfirmed_redactions: Iterable[str], + confirmed_redactions: Iterable[str], event_map: dict[str, EventBase], ) -> EventBase | None: - """Given an event object and a list of possible redacting event ids, + """Given an event object and lists of possible redacting event ids, determine whether to honour any of those redactions and if so return a redacted event. Args: original_ev: The original event. - redactions: list of event ids of potential redaction events + unconfirmed_redactions: list of event ids of redaction events that need + domain rechecking (room v3+). + confirmed_redactions: list of event ids of redaction events that have + already been validated and do not need rechecking. event_map: other events which have been fetched, in which we can - look up the redaaction events. Map from event id to event. + look up the redaction events. Map from event id to event. Returns: If the event should be redacted, a pruned event object. Otherwise, None. @@ -1695,7 +1717,12 @@ def _maybe_redact_event_row( # we choose to ignore redactions of m.room.create events. return None - for redaction_id in redactions: + for redaction_id in confirmed_redactions: + redacted_event = prune_event(original_ev) + redacted_event.unsigned["redacted_by"] = redaction_id + return redacted_event + + for redaction_id in unconfirmed_redactions: redaction_event = event_map.get(redaction_id) if not redaction_event or redaction_event.rejected_reason: # we don't have the redaction event, or the redaction event was not From 9016c5c0426a44b69638227b643846fcca3d31b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 11:51:58 +0000 Subject: [PATCH 05/21] Always use EventClientSerializer --- synapse/appservice/api.py | 45 +++++++++++++++--------------------- synapse/events/utils.py | 6 ++--- synapse/rest/admin/events.py | 8 +++++-- synapse/rest/client/room.py | 4 ++-- tests/events/test_utils.py | 28 ++++++++++++++-------- 5 files changed, 48 insertions(+), 43 deletions(-) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 2bbf77a352c..b535ab76d02 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -32,7 +32,7 @@ from prometheus_client import Counter from typing_extensions import ParamSpec, TypeGuard -from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind +from synapse.api.constants import ThirdPartyEntityKind from synapse.api.errors import CodeMessageException, HttpResponseException from synapse.appservice import ( ApplicationService, @@ -40,7 +40,7 @@ TransactionUnusedFallbackKeys, ) from synapse.events import EventBase -from synapse.events.utils import SerializeEventConfig, serialize_event +from synapse.events.utils import SerializeEventConfig from synapse.http.client import SimpleHttpClient, is_unknown_endpoint from synapse.logging import opentracing from synapse.metrics import SERVER_NAME_LABEL @@ -128,6 +128,7 @@ def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.clock = hs.get_clock() self.config = hs.config.appservice + self._event_serializer = hs.get_event_client_serializer() self.protocol_meta_cache: ResponseCache[tuple[str, str]] = ResponseCache( clock=hs.get_clock(), @@ -343,7 +344,7 @@ async def push_bulk( # This is required by the configuration. assert service.hs_token is not None - serialized_events = self._serialize(service, events) + serialized_events = await self._serialize(service, events) if txn_id is None: logger.warning( @@ -539,30 +540,20 @@ async def query_keys( return response - def _serialize( + async def _serialize( self, service: "ApplicationService", events: Iterable[EventBase] ) -> list[JsonDict]: time_now = self.clock.time_msec() - return [ - serialize_event( - e, - time_now, - config=SerializeEventConfig( - as_client_event=True, - # If this is an invite or a knock membership event, and we're interested - # in this user, then include any stripped state alongside the event. - include_stripped_room_state=( - e.type == EventTypes.Member - and ( - e.membership == Membership.INVITE - or e.membership == Membership.KNOCK - ) - and service.is_interested_in_user(e.state_key) - ), - # Appservices are considered 'trusted' by the admin and should have - # applicable metadata on their events. - include_admin_metadata=True, - ), - ) - for e in events - ] + return await self._event_serializer.serialize_events( + list(events), + time_now, + config=SerializeEventConfig( + as_client_event=True, + # If this is an invite or a knock membership event, and we're interested + # in this user, then include any stripped state alongside the event. + include_stripped_room_state=True, + # Appservices are considered 'trusted' by the admin and should have + # applicable metadata on their events. + include_admin_metadata=True, + ), + ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 89eb2182afd..a105a72d1c9 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -444,7 +444,7 @@ def make_config_for_admin(existing: SerializeEventConfig) -> SerializeEventConfi return attr.evolve(existing, include_admin_metadata=True) -def serialize_event( +def _serialize_event( e: JsonDict | EventBase, time_now_ms: int, *, @@ -477,7 +477,7 @@ def serialize_event( del d["unsigned"]["age_ts"] if "redacted_because" in e.unsigned: - d["unsigned"]["redacted_because"] = serialize_event( + d["unsigned"]["redacted_because"] = _serialize_event( e.unsigned["redacted_because"], time_now_ms, config=config, @@ -617,7 +617,7 @@ async def serialize_event( ): config = make_config_for_admin(config) - serialized_event = serialize_event(event, time_now, config=config) + serialized_event = _serialize_event(event, time_now, config=config) new_unsigned = {} for callback in self._add_extra_fields_to_unsigned_client_event_callbacks: diff --git a/synapse/rest/admin/events.py b/synapse/rest/admin/events.py index 1c39d5caf30..8da7a67820a 100644 --- a/synapse/rest/admin/events.py +++ b/synapse/rest/admin/events.py @@ -5,7 +5,6 @@ from synapse.events.utils import ( SerializeEventConfig, format_event_raw, - serialize_event, ) from synapse.http.servlet import RestServlet from synapse.http.site import SynapseRequest @@ -40,6 +39,7 @@ def __init__(self, hs: "HomeServer"): self._auth = hs.get_auth() self._store = hs.get_datastores().main self._clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() async def on_GET( self, request: SynapseRequest, event_id: str @@ -64,6 +64,10 @@ async def on_GET( include_stripped_room_state=True, include_admin_metadata=True, ) - res = {"event": serialize_event(event, self._clock.time_msec(), config=config)} + res = { + "event": await self._event_serializer.serialize_event( + event, self._clock.time_msec(), config=config + ) + } return HTTPStatus.OK, res diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 9172bfcb4ed..65d9c130efc 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -55,7 +55,6 @@ EventClientSerializer, SerializeEventConfig, format_event_for_client_v2, - serialize_event, ) from synapse.handlers.pagination import GetMessagesResult from synapse.http.server import HttpServer @@ -214,6 +213,7 @@ def __init__(self, hs: "HomeServer"): self.delayed_events_handler = hs.get_delayed_events_handler() self.auth = hs.get_auth() self.clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() self._max_event_delay_ms = hs.config.server.max_event_delay_ms self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker self._msc4354_enabled = hs.config.experimental.msc4354_enabled @@ -285,7 +285,7 @@ async def on_GET( raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) if format == "event": - event = serialize_event( + event = await self._event_serializer.serialize_event( data, self.clock.time_msec(), config=SerializeEventConfig( diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index 9ea015e1381..499417e0e88 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -20,7 +20,7 @@ # import unittest as stdlib_unittest -from typing import Any, Mapping +from typing import TYPE_CHECKING, Any, Mapping import attr from parameterized import parameterized @@ -38,11 +38,15 @@ make_config_for_admin, maybe_upsert_event_field, prune_event, - serialize_event, ) from synapse.types import JsonDict, create_requester from synapse.util.frozenutils import freeze +from tests.unittest import HomeserverTestCase + +if TYPE_CHECKING: + from synapse.server import HomeServer + def MockEvent(**kwargs: Any) -> EventBase: if "event_id" not in kwargs: @@ -644,19 +648,25 @@ def test_unsigned_is_copied(self) -> None: self.assertEqual(cloned.internal_metadata.txn_id, "txn") -class SerializeEventTestCase(stdlib_unittest.TestCase): +class SerializeEventTestCase(HomeserverTestCase): + def prepare(self, reactor: Any, clock: Any, hs: "HomeServer") -> None: + self._event_serializer = hs.get_event_client_serializer() + def serialize( self, ev: EventBase, fields: list[str] | None, include_admin_metadata: bool = False, ) -> JsonDict: - return serialize_event( - ev, - 1479807801915, - config=SerializeEventConfig( - only_event_fields=fields, include_admin_metadata=include_admin_metadata - ), + return self.get_success( + self._event_serializer.serialize_event( + ev, + 1479807801915, + config=SerializeEventConfig( + only_event_fields=fields, + include_admin_metadata=include_admin_metadata, + ), + ) ) def test_event_fields_works_with_keys(self) -> None: From 41f0c7f6ac4a5968c90bdf6ab9432c13348fe44a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Mar 2026 13:38:48 +0000 Subject: [PATCH 06/21] Remove 'redacted_because' from internal unsigned --- synapse/events/utils.py | 30 +++++++++++++---- .../storage/databases/main/events_worker.py | 4 --- tests/events/test_utils.py | 13 +++++--- tests/rest/client/test_rooms.py | 8 ++--- tests/storage/test_redaction.py | 33 ++++--------------- 5 files changed, 42 insertions(+), 46 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index a105a72d1c9..88142de3128 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -476,13 +476,6 @@ def _serialize_event( d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"] del d["unsigned"]["age_ts"] - if "redacted_because" in e.unsigned: - d["unsigned"]["redacted_because"] = _serialize_event( - e.unsigned["redacted_because"], - time_now_ms, - config=config, - ) - # If we have applicable fields saved in the internal_metadata, include them in the # unsigned section of the event if the event was sent by the same session (or when # appropriate, just the same sender) as the one requesting the event. @@ -619,6 +612,29 @@ async def serialize_event( serialized_event = _serialize_event(event, time_now, config=config) + # If the event was redacted, fetch the redaction event from the database + # and include it in the serialized event's unsigned section. + redacted_by = event.unsigned.get("redacted_by") + if redacted_by is not None: + redaction_event = await self._store.get_event( + redacted_by, + allow_none=True, + ) + if redaction_event is not None: + serialized_redaction = _serialize_event( + redaction_event, time_now, config=config + ) + serialized_event.setdefault("unsigned", {})["redacted_because"] = ( + serialized_redaction + ) + # format_event_for_client_v1 copies redacted_because to the + # top level, but since we add it after that runs, do it here. + if ( + config.as_client_event + and config.event_format is format_event_for_client_v1 + ): + serialized_event["redacted_because"] = serialized_redaction + new_unsigned = {} for callback in self._add_extra_fields_to_unsigned_client_event_callbacks: u = await callback(event) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 6cefc486809..3041c3797c6 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1765,10 +1765,6 @@ def _maybe_redact_event_row( redacted_event = prune_event(original_ev) redacted_event.unsigned["redacted_by"] = redaction_id - # It's fine to add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = redaction_event - return redacted_event # no valid redaction found for this event diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index 499417e0e88..23eab6c50bf 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -779,11 +779,16 @@ def test_event_fields_all_fields_if_empty(self) -> None: ) def test_event_fields_fail_if_fields_not_str(self) -> None: - with self.assertRaises(TypeError): - self.serialize( + self.get_failure( + self._event_serializer.serialize_event( MockEvent(room_id="!foo:bar", content={"foo": "bar"}), - ["room_id", 4], # type: ignore[list-item] - ) + 1479807801915, + config=SerializeEventConfig( + only_event_fields=["room_id", 4], # type: ignore[list-item] + ), + ), + TypeError, + ) def test_default_serialize_config_excludes_admin_metadata(self) -> None: # We just really don't want this to be set to True accidentally diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index f85c9939ce4..498022fcf3e 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -4746,10 +4746,10 @@ def test_banning_remote_member_with_flag_redacts_their_events(self) -> None: original = self.get_success(self.store.get_event(message.event_id)) if not original: self.fail("Expected to find remote message in DB") - redacted_because = original.unsigned.get("redacted_because") - if not redacted_because: - self.fail("Did not find redacted_because field") - self.assertEqual(redacted_because.event_id, ban_event_id) + redacted_by = original.unsigned.get("redacted_by") + if not redacted_by: + self.fail("Did not find redacted_by field") + self.assertEqual(redacted_by, ban_event_id) def test_unbanning_remote_user_stops_redaction_action(self) -> None: bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 92eb99f1d50..1dc1aeb0434 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -158,7 +158,7 @@ def test_redact(self) -> None: event, ) - self.assertFalse("redacted_because" in event.unsigned) + self.assertNotIn("redacted_by", event.unsigned) # Redact event reason = "Because I said so" @@ -168,7 +168,7 @@ def test_redact(self) -> None: self.assertEqual(msg_event.event_id, event.event_id) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIn("redacted_by", event.unsigned) self.assertObjectHasAttributes( { @@ -179,15 +179,6 @@ def test_redact(self) -> None: event, ) - self.assertObjectHasAttributes( - { - "type": EventTypes.Redaction, - "user_id": self.u_alice.to_string(), - "content": {"reason": reason}, - }, - event.unsigned["redacted_because"], - ) - def test_redact_join(self) -> None: self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -206,7 +197,7 @@ def test_redact_join(self) -> None: event, ) - self.assertFalse(hasattr(event, "redacted_because")) + self.assertNotIn("redacted_by", event.unsigned) # Redact event reason = "Because I said so" @@ -216,7 +207,7 @@ def test_redact_join(self) -> None: event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIn("redacted_by", event.unsigned) self.assertObjectHasAttributes( { @@ -227,15 +218,6 @@ def test_redact_join(self) -> None: event, ) - self.assertObjectHasAttributes( - { - "type": EventTypes.Redaction, - "user_id": self.u_alice.to_string(), - "content": {"reason": reason}, - }, - event.unsigned["redacted_because"], - ) - def test_circular_redaction(self) -> None: redaction_event_id1 = "$redaction1_id:test" redaction_event_id2 = "$redaction2_id:test" @@ -332,9 +314,6 @@ def internal_metadata(self) -> EventInternalMetadata: # it should have been redacted self.assertEqual(fetched.unsigned["redacted_by"], redaction_event_id2) - self.assertEqual( - fetched.unsigned["redacted_because"].event_id, redaction_event_id2 - ) def test_redact_censor(self) -> None: """Test that a redacted event gets censored in the DB after a month""" @@ -355,7 +334,7 @@ def test_redact_censor(self) -> None: event, ) - self.assertFalse("redacted_because" in event.unsigned) + self.assertNotIn("redacted_by", event.unsigned) # Redact event reason = "Because I said so" @@ -363,7 +342,7 @@ def test_redact_censor(self) -> None: event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIn("redacted_by", event.unsigned) self.assertObjectHasAttributes( { From 5f4d707e347e8895193b85b2382115c5df280ed7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Mar 2026 11:23:27 +0000 Subject: [PATCH 07/21] Batch fetch redaction events from the DB --- synapse/events/utils.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 88142de3128..759e21d89f8 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -584,6 +584,7 @@ async def serialize_event( *, config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG, bundle_aggregations: dict[str, "BundledAggregations"] | None = None, + redaction_map: dict[str, "EventBase"] | None = None, ) -> JsonDict: """Serializes a single event. @@ -593,6 +594,8 @@ async def serialize_event( config: Event serialization config bundle_aggregations: A map from event_id to the aggregations to be bundled into the event. + redaction_map: Optional pre-fetched map from redaction event_id to event, + used to avoid per-event DB lookups when serializing many events. Returns: The serialized event @@ -614,12 +617,15 @@ async def serialize_event( # If the event was redacted, fetch the redaction event from the database # and include it in the serialized event's unsigned section. - redacted_by = event.unsigned.get("redacted_by") + redacted_by: str | None = event.unsigned.get("redacted_by") if redacted_by is not None: - redaction_event = await self._store.get_event( - redacted_by, - allow_none=True, - ) + if redaction_map is not None: + redaction_event: EventBase | None = redaction_map.get(redacted_by) + else: + redaction_event = await self._store.get_event( + redacted_by, + allow_none=True, + ) if redaction_event is not None: serialized_redaction = _serialize_event( redaction_event, time_now, config=config @@ -761,12 +767,23 @@ async def serialize_events( str(len(events)), ) + # Batch-fetch all redaction events in one go rather than one per event. + redaction_ids = { + e.unsigned["redacted_by"] + for e in events + if isinstance(e, EventBase) and "redacted_by" in e.unsigned + } + redaction_map = ( + await self._store.get_events(redaction_ids) if redaction_ids else {} + ) + return [ await self.serialize_event( event, time_now, config=config, bundle_aggregations=bundle_aggregations, + redaction_map=redaction_map, ) for event in events ] From b99fb4e6c400258f3727de5bf5c2ff6d2a55aacb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Mar 2026 12:04:10 +0000 Subject: [PATCH 08/21] Newsfile --- changelog.d/19581.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/19581.misc diff --git a/changelog.d/19581.misc b/changelog.d/19581.misc new file mode 100644 index 00000000000..02856f34978 --- /dev/null +++ b/changelog.d/19581.misc @@ -0,0 +1 @@ +Remove `redacted_because` from internal unsigned. From d10a6b3e535f540e66e1bec22ec746bf9abeba5e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Mar 2026 10:18:27 +0000 Subject: [PATCH 09/21] Remove redundant unsigned.redacted_because --- tests/replication/storage/test_events.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py index 28bfb8b8ea4..b0e3cc8db4a 100644 --- a/tests/replication/storage/test_events.py +++ b/tests/replication/storage/test_events.py @@ -102,7 +102,6 @@ def test_redactions(self) -> None: msg_dict = msg.get_dict() msg_dict["content"] = {} msg_dict["unsigned"]["redacted_by"] = redaction.event_id - msg_dict["unsigned"]["redacted_because"] = redaction redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) @@ -126,7 +125,6 @@ def test_backfilled_redactions(self) -> None: msg_dict = msg.get_dict() msg_dict["content"] = {} msg_dict["unsigned"]["redacted_by"] = redaction.event_id - msg_dict["unsigned"]["redacted_because"] = redaction redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) From 1d694c53706b96d69f7a0ffe710ef6a20b534302 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Mar 2026 10:23:20 +0000 Subject: [PATCH 10/21] Move 'redacted_by' to internal metadata --- rust/src/events/internal_metadata.rs | 6 ++++++ synapse/events/utils.py | 9 ++++++--- synapse/storage/databases/main/events_worker.py | 4 ++-- synapse/synapse_rust/events.pyi | 2 ++ tests/replication/storage/test_events.py | 4 ++-- tests/rest/client/test_rooms.py | 4 ++-- tests/storage/test_redaction.py | 14 +++++++------- 7 files changed, 27 insertions(+), 16 deletions(-) diff --git a/rust/src/events/internal_metadata.rs b/rust/src/events/internal_metadata.rs index 595f9cf7eb7..a53c1e771b6 100644 --- a/rust/src/events/internal_metadata.rs +++ b/rust/src/events/internal_metadata.rs @@ -261,6 +261,11 @@ pub struct EventInternalMetadata { #[pyo3(get, set)] instance_name: Option, + /// The event ID of the redaction event, if this event has been redacted. + /// This is set dynamically at load time and is not persisted to the database. + #[pyo3(get, set)] + redacted_by: Option, + /// whether this event is an outlier (ie, whether we have the state at that /// point in the DAG) #[pyo3(get, set)] @@ -289,6 +294,7 @@ impl EventInternalMetadata { data, stream_ordering: None, instance_name: None, + redacted_by: None, outlier: false, }) } diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 759e21d89f8..3919e752ac5 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -88,6 +88,7 @@ def prune_event(event: EventBase) -> EventBase: ) pruned_event.internal_metadata.instance_name = event.internal_metadata.instance_name pruned_event.internal_metadata.outlier = event.internal_metadata.outlier + pruned_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by # Mark the event as redacted pruned_event.internal_metadata.redacted = True @@ -123,6 +124,7 @@ def clone_event(event: EventBase) -> EventBase: ) new_event.internal_metadata.instance_name = event.internal_metadata.instance_name new_event.internal_metadata.outlier = event.internal_metadata.outlier + new_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by return new_event @@ -617,8 +619,9 @@ async def serialize_event( # If the event was redacted, fetch the redaction event from the database # and include it in the serialized event's unsigned section. - redacted_by: str | None = event.unsigned.get("redacted_by") + redacted_by: str | None = event.internal_metadata.redacted_by if redacted_by is not None: + serialized_event.setdefault("unsigned", {})["redacted_by"] = redacted_by if redaction_map is not None: redaction_event: EventBase | None = redaction_map.get(redacted_by) else: @@ -769,9 +772,9 @@ async def serialize_events( # Batch-fetch all redaction events in one go rather than one per event. redaction_ids = { - e.unsigned["redacted_by"] + e.internal_metadata.redacted_by for e in events - if isinstance(e, EventBase) and "redacted_by" in e.unsigned + if isinstance(e, EventBase) and e.internal_metadata.redacted_by is not None } redaction_map = ( await self._store.get_events(redaction_ids) if redaction_ids else {} diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 3041c3797c6..4d68f7bea56 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1719,7 +1719,7 @@ def _maybe_redact_event_row( for redaction_id in confirmed_redactions: redacted_event = prune_event(original_ev) - redacted_event.unsigned["redacted_by"] = redaction_id + redacted_event.internal_metadata.redacted_by = redaction_id return redacted_event for redaction_id in unconfirmed_redactions: @@ -1763,7 +1763,7 @@ def _maybe_redact_event_row( # we found a good redaction event. Redact! redacted_event = prune_event(original_ev) - redacted_event.unsigned["redacted_by"] = redaction_id + redacted_event.internal_metadata.redacted_by = redaction_id return redacted_event diff --git a/synapse/synapse_rust/events.pyi b/synapse/synapse_rust/events.pyi index 185f29694b0..e6753bbbad2 100644 --- a/synapse/synapse_rust/events.pyi +++ b/synapse/synapse_rust/events.pyi @@ -21,6 +21,8 @@ class EventInternalMetadata: """the stream ordering of this event. None, until it has been persisted.""" instance_name: str | None """the instance name of the server that persisted this event. None, until it has been persisted.""" + redacted_by: str | None + """the event ID of the redaction event, if this event has been redacted. Set dynamically at load time, not persisted.""" outlier: bool """whether this event is an outlier (ie, whether we have the state at that diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py index b0e3cc8db4a..b7b94482ef0 100644 --- a/tests/replication/storage/test_events.py +++ b/tests/replication/storage/test_events.py @@ -101,10 +101,10 @@ def test_redactions(self) -> None: msg_dict = msg.get_dict() msg_dict["content"] = {} - msg_dict["unsigned"]["redacted_by"] = redaction.event_id redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) + redacted.internal_metadata.redacted_by = redaction.event_id self.check( "get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual ) @@ -124,10 +124,10 @@ def test_backfilled_redactions(self) -> None: msg_dict = msg.get_dict() msg_dict["content"] = {} - msg_dict["unsigned"]["redacted_by"] = redaction.event_id redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) + redacted.internal_metadata.redacted_by = redaction.event_id self.check( "get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual ) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 498022fcf3e..221121007da 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -4746,7 +4746,7 @@ def test_banning_remote_member_with_flag_redacts_their_events(self) -> None: original = self.get_success(self.store.get_event(message.event_id)) if not original: self.fail("Expected to find remote message in DB") - redacted_by = original.unsigned.get("redacted_by") + redacted_by = original.internal_metadata.redacted_by if not redacted_by: self.fail("Did not find redacted_by field") self.assertEqual(redacted_by, ban_event_id) @@ -5111,7 +5111,7 @@ def test_kicking_remote_member_with_flag_redacts_their_events(self) -> None: original = self.get_success(self.store.get_event(message.event_id)) if not original: self.fail("Expected to find remote message in DB") - self.assertEqual(original.unsigned["redacted_by"], ban_event_id) + self.assertEqual(original.internal_metadata.redacted_by, ban_event_id) def test_rejoining_kicked_remote_user_stops_redaction_action(self) -> None: bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 1dc1aeb0434..c82ccf1600b 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -158,7 +158,7 @@ def test_redact(self) -> None: event, ) - self.assertNotIn("redacted_by", event.unsigned) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -168,7 +168,7 @@ def test_redact(self) -> None: self.assertEqual(msg_event.event_id, event.event_id) - self.assertIn("redacted_by", event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( { @@ -197,7 +197,7 @@ def test_redact_join(self) -> None: event, ) - self.assertNotIn("redacted_by", event.unsigned) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -207,7 +207,7 @@ def test_redact_join(self) -> None: event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertIn("redacted_by", event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( { @@ -313,7 +313,7 @@ def internal_metadata(self) -> EventInternalMetadata: fetched = self.get_success(self.store.get_event(redaction_event_id1)) # it should have been redacted - self.assertEqual(fetched.unsigned["redacted_by"], redaction_event_id2) + self.assertEqual(fetched.internal_metadata.redacted_by, redaction_event_id2) def test_redact_censor(self) -> None: """Test that a redacted event gets censored in the DB after a month""" @@ -334,7 +334,7 @@ def test_redact_censor(self) -> None: event, ) - self.assertNotIn("redacted_by", event.unsigned) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -342,7 +342,7 @@ def test_redact_censor(self) -> None: event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertIn("redacted_by", event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( { From 52a79591d5e189dfc4dec90fbbaf2cf7f0d4aa4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 Mar 2026 09:10:14 +0000 Subject: [PATCH 11/21] Fix licensing header --- .../schema/main/delta/94/01_redactions_recheck.sql | 14 ++++++++++++++ .../delta/94/02_redactions_recheck_bg_update.sql | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql index 12c89b72790..e99aa52f1f9 100644 --- a/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql +++ b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql @@ -1 +1,15 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2026 Element Creations, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + + ALTER TABLE redactions ADD COLUMN recheck boolean NOT NULL DEFAULT true; diff --git a/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql index 2ddbbb41402..6367afd318e 100644 --- a/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql +++ b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql @@ -1,7 +1,7 @@ -- -- This file is licensed under the Affero General Public License (AGPL) version 3. -- --- Copyright (C) 2026 New Vector, Ltd +-- Copyright (C) 2026 Element Creations, Ltd -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of the GNU Affero General Public License as From c047332dcae0a92e7617d85efd6e162bcf5bf2bd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 13:45:59 +0000 Subject: [PATCH 12/21] Only update recheck if it is false --- synapse/storage/databases/main/events_bg_updates.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 44eb1a33842..934cd157caf 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -780,7 +780,10 @@ def _txn(txn: LoggingTransaction) -> int: recheck = bool(internal_metadata.get("recheck_redaction", False)) else: recheck = False - updates.append((event_id, recheck)) + if not recheck: + # Column defaults to true, so we only need to update rows + # where recheck should be false. + updates.append((event_id, recheck)) self.db_pool.simple_update_many_txn( txn, From 14f5b1bc474756978b5660b9d0639bcf0f4c28bd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 13:55:07 +0000 Subject: [PATCH 13/21] Update appservice comment --- synapse/appservice/api.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index b535ab76d02..d4e9d50b966 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -549,8 +549,11 @@ async def _serialize( time_now, config=SerializeEventConfig( as_client_event=True, - # If this is an invite or a knock membership event, and we're interested - # in this user, then include any stripped state alongside the event. + # If this is an invite or a knock membership event, then include + # any stripped state alongside the event. We could narrow this + # down to only users the appservice is "interested in", however + # it's not worth the complexity of doing so, and it's simpler to + # just include it for all users. include_stripped_room_state=True, # Appservices are considered 'trusted' by the admin and should have # applicable metadata on their events. From 1e38331d39cb6713806690c43dbd2b767471891b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 13:55:53 +0000 Subject: [PATCH 14/21] Update synapse/storage/databases/main/events_worker.py Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/storage/databases/main/events_worker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 4d68f7bea56..d12dd4f2fb0 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1765,6 +1765,8 @@ def _maybe_redact_event_row( redacted_event = prune_event(original_ev) redacted_event.internal_metadata.redacted_by = redaction_id + # Note: The `redacted_because` field will later be populated by + # `EventClientSerializer.serialize_event`. return redacted_event # no valid redaction found for this event From bf5812d96484fcbba94104d82d47d94751370520 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 14:04:31 +0000 Subject: [PATCH 15/21] Filter event fields after --- synapse/events/utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 3919e752ac5..55ed60013c8 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -436,6 +436,13 @@ class SerializeEventConfig: # whether an event was soft failed by the server. include_admin_metadata: bool = False + @only_event_fields.validator + def _validate_only_event_fields( + self, attribute: attr.Attribute, value: list[str] + ) -> None: + if not isinstance(value, list) or not all(isinstance(f, str) for f in value): + raise TypeError("only_event_fields must be a list of strings") + _DEFAULT_SERIALIZE_EVENT_CONFIG = SerializeEventConfig() @@ -554,14 +561,6 @@ def _serialize_event( if e.internal_metadata.policy_server_spammy: d["unsigned"]["io.element.synapse.policy_server_spammy"] = True - only_event_fields = config.only_event_fields - if only_event_fields: - if not isinstance(only_event_fields, list) or not all( - isinstance(f, str) for f in only_event_fields - ): - raise TypeError("only_event_fields must be a list of strings") - d = only_fields(d, only_event_fields) - return d @@ -606,6 +605,11 @@ async def serialize_event( if not isinstance(event, EventBase): return event + if not isinstance(config.only_event_fields, list) or not all( + isinstance(f, str) for f in config.only_event_fields + ): + raise TypeError("only_event_fields must be a list of strings") + # Force-enable server admin metadata because the only time an event with # relevant metadata will be when the admin requested it via their admin # client config account data. Also, it's "just" some `unsigned` fields, so @@ -666,6 +670,11 @@ async def serialize_event( serialized_event, ) + # Only include fields that are the client has requested. + only_event_fields = config.only_event_fields + if only_event_fields: + serialized_event = only_fields(serialized_event, only_event_fields) + return serialized_event async def _inject_bundled_aggregations( From 199c049ff3d011fb39a711754a619da24f78e836 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 14:08:27 +0000 Subject: [PATCH 16/21] Fix validation --- synapse/events/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 55ed60013c8..24e3fce3ecb 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -425,7 +425,7 @@ class SerializeEventConfig: # the transaction_id and delay_id in the unsigned section of the event. requester: Requester | None = None # List of event fields to include. If empty, all fields will be returned. - only_event_fields: list[str] | None = None + only_event_fields: list[str] | None = attr.field(default=None) # Some events can have stripped room state stored in the `unsigned` field. # This is required for invite and knock functionality. If this option is # False, that state will be removed from the event before it is returned. @@ -438,8 +438,11 @@ class SerializeEventConfig: @only_event_fields.validator def _validate_only_event_fields( - self, attribute: attr.Attribute, value: list[str] + self, attribute: attr.Attribute, value: Any ) -> None: + if value is None: + return + if not isinstance(value, list) or not all(isinstance(f, str) for f in value): raise TypeError("only_event_fields must be a list of strings") From b8915ef1f8c77c27ab70a02cb6b30f131b9280fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 14:27:49 +0000 Subject: [PATCH 17/21] Add unit test --- synapse/events/utils.py | 7 +---- tests/events/test_utils.py | 62 ++++++++++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 24e3fce3ecb..0e3c97f1ee1 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -588,7 +588,7 @@ async def serialize_event( *, config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG, bundle_aggregations: dict[str, "BundledAggregations"] | None = None, - redaction_map: dict[str, "EventBase"] | None = None, + redaction_map: Mapping[str, "EventBase"] | None = None, ) -> JsonDict: """Serializes a single event. @@ -608,11 +608,6 @@ async def serialize_event( if not isinstance(event, EventBase): return event - if not isinstance(config.only_event_fields, list) or not all( - isinstance(f, str) for f in config.only_event_fields - ): - raise TypeError("only_event_fields must be a list of strings") - # Force-enable server admin metadata because the only time an event with # relevant metadata will be when the admin requested it via their admin # client config account data. Also, it's "just" some `unsigned` fields, so diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index 23eab6c50bf..e090e64e0e9 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -657,6 +657,7 @@ def serialize( ev: EventBase, fields: list[str] | None, include_admin_metadata: bool = False, + redaction_map: Mapping[str, EventBase] | None = None, ) -> JsonDict: return self.get_success( self._event_serializer.serialize_event( @@ -666,6 +667,7 @@ def serialize( only_event_fields=fields, include_admin_metadata=include_admin_metadata, ), + redaction_map=redaction_map, ) ) @@ -779,16 +781,10 @@ def test_event_fields_all_fields_if_empty(self) -> None: ) def test_event_fields_fail_if_fields_not_str(self) -> None: - self.get_failure( - self._event_serializer.serialize_event( - MockEvent(room_id="!foo:bar", content={"foo": "bar"}), - 1479807801915, - config=SerializeEventConfig( - only_event_fields=["room_id", 4], # type: ignore[list-item] - ), - ), - TypeError, - ) + with self.assertRaises(TypeError): + SerializeEventConfig( + only_event_fields=["room_id", 4], # type: ignore[list-item] + ) def test_default_serialize_config_excludes_admin_metadata(self) -> None: # We just really don't want this to be set to True accidentally @@ -888,6 +884,52 @@ def test_make_serialize_config_for_admin_retains_other_fields(self) -> None: ) self.assertTrue(admin_config.include_admin_metadata) + def test_redacted_because_is_filtered_out(self) -> None: + """If an event has a unsigned has a `redacted_by` field, then the + `redacted_because` should be filtered out if not specified in + `only_event_fields`.""" + + redaction_id = "$redaction_event_id" + + event = MockEvent( + type="foo", + event_id="test", + room_id="!foo:bar", + content={"foo": "bar"}, + ) + event.internal_metadata.redacted_by = redaction_id + + redaction_event = MockEvent( + type="m.room.redaction", + event_id=redaction_id, + content={"redacts": "test"}, + ) + + self.assertEqual( + self.serialize( + event, + ["content.foo"], + redaction_map={redaction_id: redaction_event}, + ), + { + "content": {"foo": "bar"}, + }, + ) + + self.assertEqual( + self.serialize( + event, + ["content.foo", "unsigned.redacted_because"], + redaction_map={redaction_id: redaction_event}, + ), + { + "content": {"foo": "bar"}, + "unsigned": { + "redacted_because": self.serialize(redaction_event, fields=None), + }, + }, + ) + class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase): def setUp(self) -> None: From 74a3a79d8c274d943e5ea587c73ed48aae524b5e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 14:28:26 +0000 Subject: [PATCH 18/21] Update synapse/storage/databases/main/events_worker.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- synapse/storage/databases/main/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index d12dd4f2fb0..cc79b8042bd 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1373,7 +1373,7 @@ async def _fetch_event_ids_and_get_outstanding_redactions( if row: fetched_events[event_id] = row - # If this event only has uncofirmend redactions we fetch + # If this event only has unconfirmed redactions we fetch # them from the DB so that we check them to see if any are # valid. if not row.confirmed_redactions: From 0bddc9aa416ab62cbf135c11c6bc2d5f3f3fcec6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 14:43:01 +0000 Subject: [PATCH 19/21] Fix old tests --- synapse/events/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 0e3c97f1ee1..284c68c57c5 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -425,7 +425,7 @@ class SerializeEventConfig: # the transaction_id and delay_id in the unsigned section of the event. requester: Requester | None = None # List of event fields to include. If empty, all fields will be returned. - only_event_fields: list[str] | None = attr.field(default=None) + only_event_fields: list[str] | None = attr.ib(default=None) # Some events can have stripped room state stored in the `unsigned` field. # This is required for invite and knock functionality. If this option is # False, that state will be removed from the event before it is returned. From ecbe4eef1f1f8c23a717d532dd0cf4d427032e5d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Mar 2026 16:23:40 +0000 Subject: [PATCH 20/21] Always include bundled aggregations --- synapse/events/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 284c68c57c5..8b313631dba 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -657,6 +657,13 @@ async def serialize_event( new_unsigned.update(serialized_event["unsigned"]) serialized_event["unsigned"] = new_unsigned + # Only include fields that are the client has requested. + # + # Note: we always return bundled aggregations, though it is unclear why. + only_event_fields = config.only_event_fields + if only_event_fields: + serialized_event = only_fields(serialized_event, only_event_fields) + # Check if there are any bundled aggregations to include with the event. if bundle_aggregations: if event.event_id in bundle_aggregations: @@ -668,11 +675,6 @@ async def serialize_event( serialized_event, ) - # Only include fields that are the client has requested. - only_event_fields = config.only_event_fields - if only_event_fields: - serialized_event = only_fields(serialized_event, only_event_fields) - return serialized_event async def _inject_bundled_aggregations( From 441d6f743a8223e24c5abbfea0683b8ec5d0746a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:15:29 +0000 Subject: [PATCH 21/21] Fix typos Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- synapse/events/utils.py | 2 +- tests/events/test_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 8b313631dba..76ebac8b17c 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -657,7 +657,7 @@ async def serialize_event( new_unsigned.update(serialized_event["unsigned"]) serialized_event["unsigned"] = new_unsigned - # Only include fields that are the client has requested. + # Only include fields that the client has requested. # # Note: we always return bundled aggregations, though it is unclear why. only_event_fields = config.only_event_fields diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index e090e64e0e9..9e06f773692 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -885,7 +885,7 @@ def test_make_serialize_config_for_admin_retains_other_fields(self) -> None: self.assertTrue(admin_config.include_admin_metadata) def test_redacted_because_is_filtered_out(self) -> None: - """If an event has a unsigned has a `redacted_by` field, then the + """If an event's unsigned dict has a `redacted_by` field, then the `redacted_because` should be filtered out if not specified in `only_event_fields`."""