diff --git a/django/core/cache/utils.py b/django/core/cache/utils.py index 87f0f9cb0905..9c18dc8fb73d 100644 --- a/django/core/cache/utils.py +++ b/django/core/cache/utils.py @@ -7,6 +7,7 @@ def make_template_fragment_key(fragment_name, vary_on=None): hasher = md5(usedforsecurity=False) if vary_on is not None: for arg in vary_on: - hasher.update(str(arg).encode()) - hasher.update(b":") + data = str(arg).encode() + # Use the netstring delimiter (with trailing comma). + hasher.update(b"%d:%s," % (len(data), data)) return TEMPLATE_FRAGMENT_KEY_TEMPLATE % (fragment_name, hasher.hexdigest()) diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index 366e077c7847..b3725e97486d 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -63,7 +63,13 @@ def start_object(self, obj): if obj_pk is not None: attrs["pk"] = obj._meta.pk.value_to_string(obj) - self.xml.startElement("object", attrs) + try: + self.xml.startElement("object", attrs) + except UnserializableContentError: + raise ValueError( + "%s (pk:%s) contains unserializable characters" + % (obj.__class__.__name__, obj.pk) + ) def end_object(self, obj): """ diff --git a/django/utils/cache.py b/django/utils/cache.py index e797e9ece5c9..22c72c74df3f 100644 --- a/django/utils/cache.py +++ b/django/utils/cache.py @@ -356,8 +356,11 @@ def _generate_cache_key(request, method, headerlist, key_prefix): ctx = md5(usedforsecurity=False) for header in headerlist: value = request.META.get(header) - if value is not None: - ctx.update(value.encode()) + if value is None: + value = "" + data = value.encode() + # Use the netstring delimiter (with trailing comma). + ctx.update(b"%d:%s," % (len(data), data)) url = md5(request.build_absolute_uri().encode("ascii"), usedforsecurity=False) cache_key = "views.decorators.cache.cache_page.%s.%s.%s.%s" % ( key_prefix, diff --git a/django/utils/xmlutils.py b/django/utils/xmlutils.py index 5c65e580ed19..bc35e0609d80 100644 --- a/django/utils/xmlutils.py +++ b/django/utils/xmlutils.py @@ -2,12 +2,16 @@ Utilities for XML generation/parsing. """ -import re from xml.sax.saxutils import XMLGenerator +from django.utils.regex_helper import _lazy_re_compile + +_xml_control_chars_re = _lazy_re_compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]") + class UnserializableContentError(ValueError): - pass + def __init__(self, msg="Control characters are not supported in XML 1.0"): + super().__init__(msg) class SimplerXMLGenerator(XMLGenerator): @@ -21,15 +25,16 @@ def addQuickElement(self, name, contents=None, attrs=None): self.endElement(name) def characters(self, content): - if content and re.search(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", content): + if content and _xml_control_chars_re.search(content): # Fail loudly when content has control chars (unsupported in XML # 1.0) See https://www.w3.org/International/questions/qa-controls - raise UnserializableContentError( - "Control characters are not supported in XML 1.0" - ) + raise UnserializableContentError XMLGenerator.characters(self, content) def startElement(self, name, attrs): + for value in attrs.values(): + if isinstance(value, str) and _xml_control_chars_re.search(value): + raise UnserializableContentError # Sort attrs for a deterministic output. sorted_attrs = dict(sorted(attrs.items())) if attrs else attrs super().startElement(name, sorted_attrs) diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index 43c770e5c169..523ef542c11a 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -535,6 +535,16 @@ Miscellaneous * The default value of the transitional setting :setting:`SIGNED_COOKIE_LEGACY_SALT_FALLBACK` is now ``False``. +* In cases where cached pages or template fragments varied on arguments, e.g. + :ref:`vary headers ` for + :func:`~django.views.decorators.cache.cache_page` and + :class:`~django.middleware.cache.UpdateCacheMiddleware`, or the ``vary_on`` + arguments to the :ttag:`cache` template tag (generated by + :func:`~django.core.cache.utils.make_template_fragment_key`), the cache keys + are different from the keys generated by older versions of Django. After + upgrading to Django 6.1, the first request to any previously cached page or + template fragment that varies on additional information will be a cache miss. + * :class:`~django.contrib.contenttypes.fields.GenericForeignKey` now uses a separate descriptor class: the private ``GenericForeignKeyDescriptor``. @@ -647,8 +657,9 @@ Miscellaneous the top-level value. :lookup:`Key and index lookups ` are unaffected by this deprecation. -* The ``django.db.models.fields.BLANK_CHOICE_DASH`` constant is deprecated - in favor of the new constant ``django.db.models.fields.BLANK_CHOICE_LABEL``. +* The undocumented ``django.db.models.fields.BLANK_CHOICE_DASH`` constant is + deprecated. See the :setting:`USE_BLANK_CHOICE_DASH` transitional setting for + migration advice. * The :setting:`USE_BLANK_CHOICE_DASH` transitional setting is deprecated. diff --git a/docs/topics/cache.txt b/docs/topics/cache.txt index d9f3009b0dc8..2449120eba2d 100644 --- a/docs/topics/cache.txt +++ b/docs/topics/cache.txt @@ -796,7 +796,8 @@ some dynamic data that appears inside the fragment. For example, you might want a separate cached copy of the sidebar used in the previous example for every user of your site. Do this by passing one or more additional arguments, which may be variables with or without filters, to the ``{% cache %}`` template tag -to uniquely identify the cache fragment: +that will uniquely identify the cache fragment when those values are converted +to strings: .. code-block:: html+django @@ -805,6 +806,15 @@ to uniquely identify the cache fragment: .. sidebar for logged-in user .. {% endcache %} +.. admonition:: Argument values converted to strings + + Because argument values are converted to strings when creating the cache + key, consider uniqueness carefully to avoid serving one fragment for two + distinct values that share a string representation, such as ``None`` and + ``"None"``. Avoid supplying a model instance, since its ``__str__()`` is + not guaranteed to be unique. Conveniently, in the example above, + ``request.user.username`` is unique for the default ``User`` model. + If :setting:`USE_I18N` is set to ``True`` the per-site middleware cache will :ref:`respect the active language`. For the ``cache`` template tag you could use one of the diff --git a/tests/cache/tests.py b/tests/cache/tests.py index f65603002c4b..45e47854ab48 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -2387,7 +2387,7 @@ def test_learn_cache_key(self): self.assertEqual( get_cache_key(request), "views.decorators.cache.cache_page.settingsprefix.GET." - "18a03f9c9649f7d684af5db3524f5c99.d41d8cd98f00b204e9800998ecf8427e", + "18a03f9c9649f7d684af5db3524f5c99.3b59035bd3b34e30981dc990dd93acbb", ) def test_learn_cache_key_strips_whitespace(self): @@ -2414,6 +2414,25 @@ def test_learn_cache_key_strips_whitespace(self): self.assertIsNotNone(key_b) self.assertNotEqual(key_a, key_b) + def test_learn_cache_key_no_header_collision(self): + tests = [ + ({"X-Region": "EU", "X-Tenant": ""}, {"X-Region": "", "X-Tenant": "EU"}), + ({"X-Region": "EU"}, {"X-Tenant": "EU"}), + ] + for headers_a, headers_b in tests: + with self.subTest(headers=(headers_a, headers_b)): + request_a = self.factory.get(self.path, headers=headers_a) + request_b = self.factory.get(self.path, headers=headers_b) + response = HttpResponse() + response.headers["Vary"] = "X-Region, X-Tenant" + learn_cache_key(request_a, response) + # Potentially colliding values result in different cache keys. + key_a = get_cache_key(request_a) + key_b = get_cache_key(request_b) + self.assertIsNotNone(key_a) + self.assertIsNotNone(key_b) + self.assertNotEqual(key_a, key_b) + def test_patch_cache_control(self): tests = ( # Initial Cache-Control, kwargs to patch_cache_control, expected @@ -2615,7 +2634,7 @@ def test_cache_key_i18n_translation_accept_language(self): request = self.factory.get(self.path) request.META["HTTP_ACCEPT_ENCODING"] = "gzip;q=1.0, identity; q=0.5, *;q=0" response = HttpResponse() - response.headers["Vary"] = "accept-encoding" + response.headers["Vary"] = "cookie, accept-encoding" key = learn_cache_key(request, response) self.assertIn( lang, @@ -3342,27 +3361,32 @@ def test_without_vary_on(self): def test_with_one_vary_on(self): key = make_template_fragment_key("foo", ["abc"]) - self.assertEqual(key, "template.cache.foo.493e283d571a73056196f1a68efd0f66") + self.assertEqual(key, "template.cache.foo.a6360ec2c58ecc4b23fd5bd00216fccd") def test_with_many_vary_on(self): key = make_template_fragment_key("bar", ["abc", "def"]) - self.assertEqual(key, "template.cache.bar.17c1a507a0cb58384f4c639067a93520") + self.assertEqual(key, "template.cache.bar.250310c146db454966b64f5fc265a540") def test_proper_escaping(self): key = make_template_fragment_key("spam", ["abc:def%"]) - self.assertEqual(key, "template.cache.spam.06c8ae8e8c430b69fb0a6443504153dc") + self.assertEqual(key, "template.cache.spam.bf6c24ef2576004284e0522c15314d8c") def test_with_ints_vary_on(self): key = make_template_fragment_key("foo", [1, 2, 3, 4, 5]) - self.assertEqual(key, "template.cache.foo.7ae8fd2e0d25d651c683bdeebdb29461") + self.assertEqual(key, "template.cache.foo.087c006c1b99e0d147f624b4921f8a13") def test_with_unicode_vary_on(self): key = make_template_fragment_key("foo", ["42ยบ", "๐Ÿ˜€"]) - self.assertEqual(key, "template.cache.foo.7ced1c94e543668590ba39b3c08b0237") + self.assertEqual(key, "template.cache.foo.ab66482052ab2084b9d25bdd04bc9b10") def test_long_vary_on(self): key = make_template_fragment_key("foo", ["x" * 10000]) - self.assertEqual(key, "template.cache.foo.3670b349b5124aa56bdb50678b02b23a") + self.assertEqual(key, "template.cache.foo.abff8a6702abde497feae7f61de2ef1e") + + def test_collision_vary_on(self): + key1 = make_template_fragment_key("foo", ["a:b", "c"]) + key2 = make_template_fragment_key("foo", ["a", "b:c"]) + self.assertNotEqual(key1, key2) class CacheHandlerTest(SimpleTestCase): diff --git a/tests/serializers/test_xml.py b/tests/serializers/test_xml.py index 884746dfac25..6235173c9952 100644 --- a/tests/serializers/test_xml.py +++ b/tests/serializers/test_xml.py @@ -4,6 +4,7 @@ from django.core.serializers.xml_serializer import DTDForbidden from django.test import TestCase, TransactionTestCase +from .models import Actor from .tests import SerializersTestBase, SerializersTransactionTestBase @@ -79,6 +80,12 @@ def test_control_char_failure(self): serializers.serialize(self.serializer_name, [self.a1]), ) + def test_control_char_failure_attribute(self): + actor = Actor.objects.create(pk="\u0001") + msg = "Actor (pk:%s) contains unserializable characters" % actor.pk + with self.assertRaisesMessage(ValueError, msg): + serializers.serialize(self.serializer_name, [actor]) + def test_no_dtd(self): """ The XML deserializer shouldn't allow a DTD. diff --git a/tests/syndication_tests/feeds.py b/tests/syndication_tests/feeds.py index 56e540c63385..5b884755b5ba 100644 --- a/tests/syndication_tests/feeds.py +++ b/tests/syndication_tests/feeds.py @@ -321,3 +321,9 @@ def item_enclosures(self, item): feedgenerator.Enclosure("http://example.com/hello.png", "0", "image/png"), feedgenerator.Enclosure("http://example.com/goodbye.png", "0", "image/png"), ] + + +class TestInvalidAtomFeed(TestAtomFeed): + def item_categories(self): + # This control character is not serializable. + return ["\x00"] diff --git a/tests/syndication_tests/tests.py b/tests/syndication_tests/tests.py index 739139db632a..859c61dbf1d3 100644 --- a/tests/syndication_tests/tests.py +++ b/tests/syndication_tests/tests.py @@ -17,6 +17,7 @@ rfc2822_date, rfc3339_date, ) +from django.utils.xmlutils import UnserializableContentError from .models import Article, Entry @@ -526,6 +527,11 @@ def test_title_escaping(self): title = item.getElementsByTagName("title")[0] self.assertEqual(title.firstChild.wholeText, "A & B < C > D") + def test_no_control_chars_in_attributes(self): + msg = "Control characters are not supported in XML 1.0" + with self.assertRaisesMessage(UnserializableContentError, msg): + self.client.get("/syndication/atom/invalid/") + def test_naive_datetime_conversion(self): """ Datetimes are correctly converted to the local time zone. diff --git a/tests/syndication_tests/urls.py b/tests/syndication_tests/urls.py index bb1d3d990df2..3a0d0ff07446 100644 --- a/tests/syndication_tests/urls.py +++ b/tests/syndication_tests/urls.py @@ -41,6 +41,7 @@ path("syndication/rss2/multiple-enclosure/", feeds.TestMultipleEnclosureRSSFeed()), path("syndication/atom/single-enclosure/", feeds.TestSingleEnclosureAtomFeed()), path("syndication/atom/multiple-enclosure/", feeds.TestMultipleEnclosureAtomFeed()), + path("syndication/atom/invalid/", feeds.TestInvalidAtomFeed()), path( "syndication/stylesheet.xsl", lambda request: None,