diff --git a/ckanext/contact/routes/_helpers.py b/ckanext/contact/routes/_helpers.py index 759d712..984ab70 100644 --- a/ckanext/contact/routes/_helpers.py +++ b/ckanext/contact/routes/_helpers.py @@ -13,7 +13,7 @@ from ckan.lib.navl.dictization_functions import unflatten from ckan.plugins import PluginImplementations, toolkit from pyisemail import is_email -from urllib.parse import urlparse +from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from ckanext.contact import recaptcha from ckanext.contact.interfaces import IContact @@ -21,6 +21,33 @@ log = logging.getLogger(__name__) +def clean_referrer_url(url, contact_path='/contact'): + """ + Remove Cloudflare challenge query parameters from a URL. If the cleaned URL + is just the contact page itself return an empty string. + + :param url: the referrer URL to clean + :param contact_path: the path of the contact page to detect self-references + :returns: the cleaned URL, or empty string if the URL is empty or self-referencing + """ + if not url: + return '' + try: + parsed = urlparse(url) + params = parse_qs(parsed.query, keep_blank_values=True) + cleaned_params = {k: v for k, v in params.items() if not k.startswith('__cf_chl_')} + cleaned_url = urlunparse(parsed._replace(query=urlencode(cleaned_params, doseq=True))) + cleaned_parsed = urlparse(cleaned_url) + if ( + cleaned_parsed.path.rstrip('/') == contact_path.rstrip('/') + and not cleaned_params + ): + return '' + return cleaned_url + except Exception: + return '' + + def validate(data_dict): """ Validates the given data and recaptcha if necessary. @@ -55,13 +82,13 @@ def validate(data_dict): errors['email'] = ['Email address appears to be invalid'] error_summary['email'] = 'Email address appears to be invalid' - # check if referrer_url starts with site_url - referrer_url = data_dict.get('referrer_url', '').strip() + # clean and validate referrer_url + referrer_url = clean_referrer_url(data_dict.get('referrer_url', '').strip()) if referrer_url: site_url = toolkit.config.get('ckan.site_url', '') - if site_url: - if not referrer_url.startswith(site_url): - data_dict.pop('referrer_url', None) + if site_url and not referrer_url.startswith(site_url): + referrer_url = '' + data_dict['referrer_url'] = referrer_url # only check the recaptcha if there are no errors if not errors: diff --git a/ckanext/contact/routes/contact.py b/ckanext/contact/routes/contact.py index cb0a2d8..484cb8a 100644 --- a/ckanext/contact/routes/contact.py +++ b/ckanext/contact/routes/contact.py @@ -73,6 +73,14 @@ def form(): except AttributeError: extra_vars['data']['name'] = extra_vars['data']['email'] = None + # Get the referrer URL from the request + raw_referrer = ( + toolkit.request.args.get('referrer_url') + or toolkit.request.referrer + or '' + ) + extra_vars['data']['referrer_url'] = _helpers.clean_referrer_url(raw_referrer) + return toolkit.render('contact/form.html', extra_vars=extra_vars) diff --git a/ckanext/contact/tests/unit/test_helpers.py b/ckanext/contact/tests/unit/test_helpers.py index 00e7cfa..590451a 100644 --- a/ckanext/contact/tests/unit/test_helpers.py +++ b/ckanext/contact/tests/unit/test_helpers.py @@ -4,7 +4,11 @@ from mock import patch, MagicMock from freezegun import freeze_time -from ckanext.contact.routes._helpers import build_subject, get_dataset_title_from_url +from ckanext.contact.routes._helpers import ( + build_subject, + clean_referrer_url, + get_dataset_title_from_url, +) class TestBuildSubject: @@ -86,6 +90,59 @@ def test_prefix_provided(self): assert subject == 'PREFIX: TEST' +class TestCleanReferrerUrl: + def test_empty_string_returns_empty(self): + assert clean_referrer_url('') == '' + + def test_none_returns_empty(self): + assert clean_referrer_url(None) == '' + + def test_clean_url_passes_through(self): + url = 'https://example.com/dataset/my-dataset' + assert clean_referrer_url(url) == url + + def test_strips_cf_chl_tk_param(self): + url = 'https://example.com/dataset/foo?__cf_chl_tk=abc123' + assert clean_referrer_url(url) == 'https://example.com/dataset/foo' + + def test_strips_multiple_cf_params(self): + url = ( + 'https://example.com/dataset/foo' + '?__cf_chl_tk=abc&__cf_chl_rt_tk=def&__cf_chl_f_tk=ghi' + ) + assert clean_referrer_url(url) == 'https://example.com/dataset/foo' + + def test_preserves_non_cf_params(self): + url = 'https://example.com/dataset/foo?page=2&__cf_chl_tk=abc&sort=name' + result = clean_referrer_url(url) + assert '__cf_chl_tk' not in result + assert 'page=2' in result + assert 'sort=name' in result + + def test_contact_self_reference_returns_empty(self): + url = 'https://example.com/contact?__cf_chl_tk=abc123' + assert clean_referrer_url(url) == '' + + def test_contact_self_reference_with_trailing_slash(self): + url = 'https://example.com/contact/?__cf_chl_tk=abc123' + assert clean_referrer_url(url, contact_path='/contact/') == '' + + def test_contact_with_real_params_not_discarded(self): + url = 'https://example.com/contact?subject=hello&__cf_chl_tk=abc' + result = clean_referrer_url(url) + assert result != '' + assert 'subject=hello' in result + assert '__cf_chl_tk' not in result + + def test_custom_contact_path(self): + url = 'https://example.com/feedback?__cf_chl_tk=abc' + assert clean_referrer_url(url, contact_path='/feedback') == '' + + def test_non_contact_page_with_only_cf_params(self): + url = 'https://example.com/dataset/foo?__cf_chl_tk=abc' + assert clean_referrer_url(url) == 'https://example.com/dataset/foo' + + class TestGetDatasetTitleFromUrl: @patch('ckanext.contact.routes._helpers.toolkit.get_action') def test_valid_dataset_url_returns_title(self, mock_get_action): diff --git a/ckanext/contact/theme/assets/modules/form-contact.js b/ckanext/contact/theme/assets/modules/form-contact.js index a1e36e0..9c1180a 100644 --- a/ckanext/contact/theme/assets/modules/form-contact.js +++ b/ckanext/contact/theme/assets/modules/form-contact.js @@ -18,10 +18,29 @@ ckan.module('form-contact', function ($, _) { initialize: function () { self = this; - // populate the referrer URL field with document.referrer + // populate the referrer URL field only if not already set by the server const referrerField = self.el.find('#field-referrer-url'); - if (referrerField.length) { - referrerField.val(document.referrer || ''); + if (referrerField.length && !referrerField.val()) { + var referrer = document.referrer || ''; + if (referrer) { + try { + var url = new URL(referrer); + var keys = Array.from(url.searchParams.keys()); + keys.forEach(function (key) { + if (key.startsWith('__cf_chl_')) { + url.searchParams.delete(key); + } + }); + var isSelfRef = + url.pathname.replace(/\/$/, '') === + window.location.pathname.replace(/\/$/, '') && + !url.searchParams.toString(); + referrer = isSelfRef ? '' : url.toString(); + } catch (e) { + // invalid URL, leave referrer as-is + } + } + referrerField.val(referrer); } // setup the recaptcha context diff --git a/ckanext/contact/theme/assets/modules/modal-contact.js b/ckanext/contact/theme/assets/modules/modal-contact.js index 6ace960..cdd065b 100644 --- a/ckanext/contact/theme/assets/modules/modal-contact.js +++ b/ckanext/contact/theme/assets/modules/modal-contact.js @@ -50,10 +50,23 @@ ckan.module('modal-contact', function ($, _) { ); self.modal = $(html); - // populate the referrer URL field with the current page URL + // populate the referrer URL field with the current page URL, stripping CF params const referrerField = self.modal.find('#field-referrer-url'); if (referrerField.length) { - referrerField.val(window.location.href || ''); + var href = window.location.href || ''; + try { + var url = new URL(href); + var keys = Array.from(url.searchParams.keys()); + keys.forEach(function (key) { + if (key.startsWith('__cf_chl_')) { + url.searchParams.delete(key); + } + }); + href = url.toString(); + } catch (e) { + // invalid URL, leave href as-is + } + referrerField.val(href); } // add a close button to the modal self.modal diff --git a/ckanext/contact/theme/templates/contact/snippets/form.html b/ckanext/contact/theme/templates/contact/snippets/form.html index 92bb533..7f0db77 100644 --- a/ckanext/contact/theme/templates/contact/snippets/form.html +++ b/ckanext/contact/theme/templates/contact/snippets/form.html @@ -34,7 +34,7 @@ value=data.content, error=errors.content, placeholder=_('What do you have to tell us?'), is_required=true) }} - + {% endblock %} diff --git a/ckanext/contact/theme/templates/header.html b/ckanext/contact/theme/templates/header.html index 965faa1..9f94216 100644 --- a/ckanext/contact/theme/templates/header.html +++ b/ckanext/contact/theme/templates/header.html @@ -2,7 +2,7 @@ {% block contact_link %}
  • - + {% if c.userobj %} {% else %}