From 65acb3cc2e76c238f5aee38d22626d92171a2f7c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 18 Jun 2026 14:22:30 -0400 Subject: [PATCH 1/6] Fixed #37101 -- Used netstring delimiter between vary on headers for cached pages. This prevents collisions between header values that concatenate to the same thing. --- django/utils/cache.py | 7 +++++-- tests/cache/tests.py | 23 +++++++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/django/utils/cache.py b/django/utils/cache.py index e797e9ece5c9..22c72c74df3f 100644 --- a/django/utils/cache.py +++ b/django/utils/cache.py @@ -356,8 +356,11 @@ def _generate_cache_key(request, method, headerlist, key_prefix): ctx = md5(usedforsecurity=False) for header in headerlist: value = request.META.get(header) - if value is not None: - ctx.update(value.encode()) + if value is None: + value = "" + data = value.encode() + # Use the netstring delimiter (with trailing comma). + ctx.update(b"%d:%s," % (len(data), data)) url = md5(request.build_absolute_uri().encode("ascii"), usedforsecurity=False) cache_key = "views.decorators.cache.cache_page.%s.%s.%s.%s" % ( key_prefix, diff --git a/tests/cache/tests.py b/tests/cache/tests.py index f65603002c4b..4e718f4b938f 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -2387,7 +2387,7 @@ def test_learn_cache_key(self): self.assertEqual( get_cache_key(request), "views.decorators.cache.cache_page.settingsprefix.GET." - "18a03f9c9649f7d684af5db3524f5c99.d41d8cd98f00b204e9800998ecf8427e", + "18a03f9c9649f7d684af5db3524f5c99.3b59035bd3b34e30981dc990dd93acbb", ) def test_learn_cache_key_strips_whitespace(self): @@ -2414,6 +2414,25 @@ def test_learn_cache_key_strips_whitespace(self): self.assertIsNotNone(key_b) self.assertNotEqual(key_a, key_b) + def test_learn_cache_key_no_header_collision(self): + tests = [ + ({"X-Region": "EU", "X-Tenant": ""}, {"X-Region": "", "X-Tenant": "EU"}), + ({"X-Region": "EU"}, {"X-Tenant": "EU"}), + ] + for headers_a, headers_b in tests: + with self.subTest(headers=(headers_a, headers_b)): + request_a = self.factory.get(self.path, headers=headers_a) + request_b = self.factory.get(self.path, headers=headers_b) + response = HttpResponse() + response.headers["Vary"] = "X-Region, X-Tenant" + learn_cache_key(request_a, response) + # Potentially colliding values result in different cache keys. + key_a = get_cache_key(request_a) + key_b = get_cache_key(request_b) + self.assertIsNotNone(key_a) + self.assertIsNotNone(key_b) + self.assertNotEqual(key_a, key_b) + def test_patch_cache_control(self): tests = ( # Initial Cache-Control, kwargs to patch_cache_control, expected @@ -2615,7 +2634,7 @@ def test_cache_key_i18n_translation_accept_language(self): request = self.factory.get(self.path) request.META["HTTP_ACCEPT_ENCODING"] = "gzip;q=1.0, identity; q=0.5, *;q=0" response = HttpResponse() - response.headers["Vary"] = "accept-encoding" + response.headers["Vary"] = "cookie, accept-encoding" key = learn_cache_key(request, response) self.assertIn( lang, From 8acd000725838f73c5e6781114f11866f6674c46 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 18 Jun 2026 14:22:54 -0400 Subject: [PATCH 2/6] Fixed #37174 -- Used netstring delimiter between vary on arguments for cached template fragments. This algorithm used a delimiter, but because it didn't use a length, argument values containing the delimiter could still cause collisions. --- django/core/cache/utils.py | 5 +++-- tests/cache/tests.py | 17 +++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/django/core/cache/utils.py b/django/core/cache/utils.py index 87f0f9cb0905..9c18dc8fb73d 100644 --- a/django/core/cache/utils.py +++ b/django/core/cache/utils.py @@ -7,6 +7,7 @@ def make_template_fragment_key(fragment_name, vary_on=None): hasher = md5(usedforsecurity=False) if vary_on is not None: for arg in vary_on: - hasher.update(str(arg).encode()) - hasher.update(b":") + data = str(arg).encode() + # Use the netstring delimiter (with trailing comma). + hasher.update(b"%d:%s," % (len(data), data)) return TEMPLATE_FRAGMENT_KEY_TEMPLATE % (fragment_name, hasher.hexdigest()) diff --git a/tests/cache/tests.py b/tests/cache/tests.py index 4e718f4b938f..45e47854ab48 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -3361,27 +3361,32 @@ def test_without_vary_on(self): def test_with_one_vary_on(self): key = make_template_fragment_key("foo", ["abc"]) - self.assertEqual(key, "template.cache.foo.493e283d571a73056196f1a68efd0f66") + self.assertEqual(key, "template.cache.foo.a6360ec2c58ecc4b23fd5bd00216fccd") def test_with_many_vary_on(self): key = make_template_fragment_key("bar", ["abc", "def"]) - self.assertEqual(key, "template.cache.bar.17c1a507a0cb58384f4c639067a93520") + self.assertEqual(key, "template.cache.bar.250310c146db454966b64f5fc265a540") def test_proper_escaping(self): key = make_template_fragment_key("spam", ["abc:def%"]) - self.assertEqual(key, "template.cache.spam.06c8ae8e8c430b69fb0a6443504153dc") + self.assertEqual(key, "template.cache.spam.bf6c24ef2576004284e0522c15314d8c") def test_with_ints_vary_on(self): key = make_template_fragment_key("foo", [1, 2, 3, 4, 5]) - self.assertEqual(key, "template.cache.foo.7ae8fd2e0d25d651c683bdeebdb29461") + self.assertEqual(key, "template.cache.foo.087c006c1b99e0d147f624b4921f8a13") def test_with_unicode_vary_on(self): key = make_template_fragment_key("foo", ["42ยบ", "๐Ÿ˜€"]) - self.assertEqual(key, "template.cache.foo.7ced1c94e543668590ba39b3c08b0237") + self.assertEqual(key, "template.cache.foo.ab66482052ab2084b9d25bdd04bc9b10") def test_long_vary_on(self): key = make_template_fragment_key("foo", ["x" * 10000]) - self.assertEqual(key, "template.cache.foo.3670b349b5124aa56bdb50678b02b23a") + self.assertEqual(key, "template.cache.foo.abff8a6702abde497feae7f61de2ef1e") + + def test_collision_vary_on(self): + key1 = make_template_fragment_key("foo", ["a:b", "c"]) + key2 = make_template_fragment_key("foo", ["a", "b:c"]) + self.assertNotEqual(key1, key2) class CacheHandlerTest(SimpleTestCase): From 02f94d2a899b6573f90dee5417afb8a763ee7f25 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 18 Jun 2026 14:24:42 -0400 Subject: [PATCH 3/6] Refs #37101, #37174 -- Added release note for cache misses when varying on arguments. Thanks Natalia Bidart for the review. --- docs/releases/6.1.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index 43c770e5c169..cac33db5ca4a 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -535,6 +535,16 @@ Miscellaneous * The default value of the transitional setting :setting:`SIGNED_COOKIE_LEGACY_SALT_FALLBACK` is now ``False``. +* In cases where cached pages or template fragments varied on arguments, e.g. + :ref:`vary headers ` for + :func:`~django.views.decorators.cache.cache_page` and + :class:`~django.middleware.cache.UpdateCacheMiddleware`, or the ``vary_on`` + arguments to the :ttag:`cache` template tag (generated by + :func:`~django.core.cache.utils.make_template_fragment_key`), the cache keys + are different from the keys generated by older versions of Django. After + upgrading to Django 6.1, the first request to any previously cached page or + template fragment that varies on additional information will be a cache miss. + * :class:`~django.contrib.contenttypes.fields.GenericForeignKey` now uses a separate descriptor class: the private ``GenericForeignKeyDescriptor``. From 0383d12ea3451a75be184bd1783f11031903e7ee Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 23 Jun 2026 13:04:38 -0400 Subject: [PATCH 4/6] Refs #37174 -- Doc'd that args to the {% cache %} tag are stringified. --- docs/topics/cache.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/topics/cache.txt b/docs/topics/cache.txt index d9f3009b0dc8..2449120eba2d 100644 --- a/docs/topics/cache.txt +++ b/docs/topics/cache.txt @@ -796,7 +796,8 @@ some dynamic data that appears inside the fragment. For example, you might want a separate cached copy of the sidebar used in the previous example for every user of your site. Do this by passing one or more additional arguments, which may be variables with or without filters, to the ``{% cache %}`` template tag -to uniquely identify the cache fragment: +that will uniquely identify the cache fragment when those values are converted +to strings: .. code-block:: html+django @@ -805,6 +806,15 @@ to uniquely identify the cache fragment: .. sidebar for logged-in user .. {% endcache %} +.. admonition:: Argument values converted to strings + + Because argument values are converted to strings when creating the cache + key, consider uniqueness carefully to avoid serving one fragment for two + distinct values that share a string representation, such as ``None`` and + ``"None"``. Avoid supplying a model instance, since its ``__str__()`` is + not guaranteed to be unique. Conveniently, in the example above, + ``request.user.username`` is unique for the default ``User`` model. + If :setting:`USE_I18N` is set to ``True`` the per-site middleware cache will :ref:`respect the active language`. For the ``cache`` template tag you could use one of the From 9dce456105e8e182dde6db4774ced79b222a68f9 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 24 Jun 2026 10:44:24 -0400 Subject: [PATCH 5/6] Refs #35870 -- Mentioned transitional setting in BLANK_CHOICE_DASH release note. --- docs/releases/6.1.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index cac33db5ca4a..523ef542c11a 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -657,8 +657,9 @@ Miscellaneous the top-level value. :lookup:`Key and index lookups ` are unaffected by this deprecation. -* The ``django.db.models.fields.BLANK_CHOICE_DASH`` constant is deprecated - in favor of the new constant ``django.db.models.fields.BLANK_CHOICE_LABEL``. +* The undocumented ``django.db.models.fields.BLANK_CHOICE_DASH`` constant is + deprecated. See the :setting:`USE_BLANK_CHOICE_DASH` transitional setting for + migration advice. * The :setting:`USE_BLANK_CHOICE_DASH` transitional setting is deprecated. From 67c407585ccdc01b76d78e33c082f23d46346747 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 23 Jun 2026 12:13:40 -0400 Subject: [PATCH 6/6] Fixed #37183 -- Prevented writing control characters into XML attributes. `SimplerXMLGenerator` already rejected control characters (unsupported in XML 1.0) in element content, but attribute values were written unchecked, so unsanitized input could still produce an unparseable XML document. The existing check now runs over attribute values in `startElement()`. This covers both syndication feeds, where item data may flow into attributes such as `Atom` category terms, and the XML serializer, which wraps the error to identify the offending object. --- django/core/serializers/xml_serializer.py | 8 +++++++- django/utils/xmlutils.py | 17 +++++++++++------ tests/serializers/test_xml.py | 7 +++++++ tests/syndication_tests/feeds.py | 6 ++++++ tests/syndication_tests/tests.py | 6 ++++++ tests/syndication_tests/urls.py | 1 + 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index 366e077c7847..b3725e97486d 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -63,7 +63,13 @@ def start_object(self, obj): if obj_pk is not None: attrs["pk"] = obj._meta.pk.value_to_string(obj) - self.xml.startElement("object", attrs) + try: + self.xml.startElement("object", attrs) + except UnserializableContentError: + raise ValueError( + "%s (pk:%s) contains unserializable characters" + % (obj.__class__.__name__, obj.pk) + ) def end_object(self, obj): """ diff --git a/django/utils/xmlutils.py b/django/utils/xmlutils.py index 5c65e580ed19..bc35e0609d80 100644 --- a/django/utils/xmlutils.py +++ b/django/utils/xmlutils.py @@ -2,12 +2,16 @@ Utilities for XML generation/parsing. """ -import re from xml.sax.saxutils import XMLGenerator +from django.utils.regex_helper import _lazy_re_compile + +_xml_control_chars_re = _lazy_re_compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]") + class UnserializableContentError(ValueError): - pass + def __init__(self, msg="Control characters are not supported in XML 1.0"): + super().__init__(msg) class SimplerXMLGenerator(XMLGenerator): @@ -21,15 +25,16 @@ def addQuickElement(self, name, contents=None, attrs=None): self.endElement(name) def characters(self, content): - if content and re.search(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", content): + if content and _xml_control_chars_re.search(content): # Fail loudly when content has control chars (unsupported in XML # 1.0) See https://www.w3.org/International/questions/qa-controls - raise UnserializableContentError( - "Control characters are not supported in XML 1.0" - ) + raise UnserializableContentError XMLGenerator.characters(self, content) def startElement(self, name, attrs): + for value in attrs.values(): + if isinstance(value, str) and _xml_control_chars_re.search(value): + raise UnserializableContentError # Sort attrs for a deterministic output. sorted_attrs = dict(sorted(attrs.items())) if attrs else attrs super().startElement(name, sorted_attrs) diff --git a/tests/serializers/test_xml.py b/tests/serializers/test_xml.py index 884746dfac25..6235173c9952 100644 --- a/tests/serializers/test_xml.py +++ b/tests/serializers/test_xml.py @@ -4,6 +4,7 @@ from django.core.serializers.xml_serializer import DTDForbidden from django.test import TestCase, TransactionTestCase +from .models import Actor from .tests import SerializersTestBase, SerializersTransactionTestBase @@ -79,6 +80,12 @@ def test_control_char_failure(self): serializers.serialize(self.serializer_name, [self.a1]), ) + def test_control_char_failure_attribute(self): + actor = Actor.objects.create(pk="\u0001") + msg = "Actor (pk:%s) contains unserializable characters" % actor.pk + with self.assertRaisesMessage(ValueError, msg): + serializers.serialize(self.serializer_name, [actor]) + def test_no_dtd(self): """ The XML deserializer shouldn't allow a DTD. diff --git a/tests/syndication_tests/feeds.py b/tests/syndication_tests/feeds.py index 56e540c63385..5b884755b5ba 100644 --- a/tests/syndication_tests/feeds.py +++ b/tests/syndication_tests/feeds.py @@ -321,3 +321,9 @@ def item_enclosures(self, item): feedgenerator.Enclosure("http://example.com/hello.png", "0", "image/png"), feedgenerator.Enclosure("http://example.com/goodbye.png", "0", "image/png"), ] + + +class TestInvalidAtomFeed(TestAtomFeed): + def item_categories(self): + # This control character is not serializable. + return ["\x00"] diff --git a/tests/syndication_tests/tests.py b/tests/syndication_tests/tests.py index 739139db632a..859c61dbf1d3 100644 --- a/tests/syndication_tests/tests.py +++ b/tests/syndication_tests/tests.py @@ -17,6 +17,7 @@ rfc2822_date, rfc3339_date, ) +from django.utils.xmlutils import UnserializableContentError from .models import Article, Entry @@ -526,6 +527,11 @@ def test_title_escaping(self): title = item.getElementsByTagName("title")[0] self.assertEqual(title.firstChild.wholeText, "A & B < C > D") + def test_no_control_chars_in_attributes(self): + msg = "Control characters are not supported in XML 1.0" + with self.assertRaisesMessage(UnserializableContentError, msg): + self.client.get("/syndication/atom/invalid/") + def test_naive_datetime_conversion(self): """ Datetimes are correctly converted to the local time zone. diff --git a/tests/syndication_tests/urls.py b/tests/syndication_tests/urls.py index bb1d3d990df2..3a0d0ff07446 100644 --- a/tests/syndication_tests/urls.py +++ b/tests/syndication_tests/urls.py @@ -41,6 +41,7 @@ path("syndication/rss2/multiple-enclosure/", feeds.TestMultipleEnclosureRSSFeed()), path("syndication/atom/single-enclosure/", feeds.TestSingleEnclosureAtomFeed()), path("syndication/atom/multiple-enclosure/", feeds.TestMultipleEnclosureAtomFeed()), + path("syndication/atom/invalid/", feeds.TestInvalidAtomFeed()), path( "syndication/stylesheet.xsl", lambda request: None,