Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ psutil = "^7.2.2"
charm-refresh = "^3.1.0.2"
charmlibs-snap = "^1.0.1"
charmlibs-interfaces-tls-certificates = "^1.8.1"
postgresql-charms-single-kernel = {extras = ["postgresql"], version="16.2.1"}
postgresql-charms-single-kernel = {url = "https://github.com/canonical/postgresql-single-kernel-library/archive/e763386979329d313cf303d03d504848e0066ff6.zip"}

[tool.poetry.group.charm-libs.dependencies]
# data_platform_libs/v0/data_interfaces.py
Expand Down
2 changes: 1 addition & 1 deletion scripts/cluster_topology_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def _httpx_get_request(url: str):
ssl_ctx = create_default_context()
with suppress(FileNotFoundError):
ssl_ctx.load_verify_locations(cafile=f"{PATRONI_CONF_PATH}/{TLS_CA_BUNDLE_FILE}")
async with AsyncClient(timeout=API_REQUEST_TIMEOUT, verify=ssl_ctx) as client:
async with AsyncClient(timeout=API_REQUEST_TIMEOUT, verify=ssl_ctx, trust_env=False) as client:
try:
return (await client.get(url)).json()
except Exception as e:
Expand Down
35 changes: 21 additions & 14 deletions src/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ def _patroni_url(self) -> str:
"""Patroni REST API URL."""
return f"https://{self.unit_ip}:8008"

@cached_property
def _session(self) -> requests.Session:
# Patroni API calls are always intra-cluster and must not go through HTTP proxies.
s = requests.Session()
s.trust_env = False
return s

@staticmethod
def _dict_to_hba_string(_dict: dict[str, Any]) -> str:
"""Transform a dictionary into a Host Based Authentication valid string."""
Expand Down Expand Up @@ -411,7 +418,7 @@ def get_patroni_health(self) -> dict[str, str]:
"""Gets, retires and parses the Patroni health endpoint."""
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(7)):
with attempt:
r = requests.get(
r = self._session.get(
f"{self._patroni_url}/health",
verify=self.verify,
timeout=API_REQUEST_TIMEOUT,
Expand Down Expand Up @@ -454,7 +461,7 @@ def is_replication_healthy(self) -> bool:
for members_ip in members_ips:
endpoint = "leader" if members_ip == primary_ip else "replica?lag=16kB"
url = self._patroni_url.replace(self.unit_ip, members_ip)
r = requests.get(
r = self._session.get(
f"{url}/{endpoint}",
verify=self.verify,
auth=self._patroni_auth,
Expand Down Expand Up @@ -520,7 +527,7 @@ def is_member_isolated(self) -> bool:
try:
for attempt in Retrying(stop=stop_after_delay(10), wait=wait_fixed(3)):
with attempt:
r = requests.get(
r = self._session.get(
f"{self._patroni_url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}",
verify=self.verify,
timeout=API_REQUEST_TIMEOUT,
Expand Down Expand Up @@ -582,7 +589,7 @@ def are_replicas_up(self) -> dict[str, bool] | None:

def promote_standby_cluster(self) -> None:
"""Promote a standby cluster to be a regular cluster."""
config_response = requests.get(
config_response = self._session.get(
f"{self._patroni_url}/config",
verify=self.verify,
auth=self._patroni_auth,
Expand All @@ -595,7 +602,7 @@ def promote_standby_cluster(self) -> None:
)
if "standby_cluster" not in config_response.json():
raise StandbyClusterAlreadyPromotedError("standby cluster is already promoted")
r = requests.patch(
r = self._session.patch(
f"{self._patroni_url}/config",
verify=self.verify,
json={"standby_cluster": None},
Expand All @@ -610,7 +617,7 @@ def promote_standby_cluster(self) -> None:

def set_max_timelines_history(self) -> None:
"""Patch the DCS with max_timelines_history limit."""
requests.patch(
self._session.patch(
f"{self._patroni_url}/config",
verify=self.verify,
json={"max_timelines_history": 50},
Expand Down Expand Up @@ -806,7 +813,7 @@ def switchover(self, candidate: str | None = None, async_cluster: bool = False)
body = {"leader": current_primary}
if candidate:
body["candidate"] = candidate
r = requests.post(
r = self._session.post(
f"{self._patroni_url}/switchover",
json=body,
verify=self.verify,
Expand Down Expand Up @@ -1018,7 +1025,7 @@ def remove_raft_member(self, member_address: str | None) -> None:
def reload_patroni_configuration(self):
"""Reload Patroni configuration after it was changed."""
logger.debug("Reloading Patroni configuration...")
r = requests.post(
r = self._session.post(
f"{self._patroni_url}/reload",
verify=self.verify,
auth=self._patroni_auth,
Expand Down Expand Up @@ -1057,7 +1064,7 @@ def restart_patroni(self) -> bool:
def restart_postgresql(self) -> None:
"""Restart PostgreSQL."""
logger.debug("Restarting PostgreSQL...")
r = requests.post(
r = self._session.post(
f"{self._patroni_url}/restart",
verify=self.verify,
auth=self._patroni_auth,
Expand All @@ -1069,7 +1076,7 @@ def restart_postgresql(self) -> None:
def reinitialize_postgresql(self) -> None:
"""Reinitialize PostgreSQL."""
logger.debug("Reinitializing PostgreSQL...")
r = requests.post(
r = self._session.post(
f"{self._patroni_url}/reinitialize",
verify=self.verify,
auth=self._patroni_auth,
Expand All @@ -1087,7 +1094,7 @@ def bulk_update_parameters_controller_by_patroni(
"""
if not base_parameters:
base_parameters = {}
r = requests.patch(
r = self._session.patch(
f"{self._patroni_url}/config",
verify=self.verify,
json={
Expand Down Expand Up @@ -1116,7 +1123,7 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
"""
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3), reraise=True):
with attempt:
current_config = requests.get(
current_config = self._session.get(
f"{self._patroni_url}/config",
verify=self.verify,
timeout=PATRONI_TIMEOUT,
Expand All @@ -1140,7 +1147,7 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
"plugin": "pgoutput",
"type": "logical",
}
r = requests.patch(
r = self._session.patch(
f"{self._patroni_url}/config",
verify=self.verify,
json={"slots": slots_patch},
Expand Down Expand Up @@ -1183,7 +1190,7 @@ def update_synchronous_node_count(self) -> None:
"""Update synchronous_node_count to the minority of the planned cluster."""
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
with attempt:
r = requests.patch(
r = self._session.patch(
f"{self._patroni_url}/config",
json=self.synchronous_configuration,
verify=self.verify,
Expand Down
110 changes: 110 additions & 0 deletions tests/integration/test_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python3
# Copyright 2026 Canonical Ltd.
# See LICENSE file for licensing details.

"""Integration test: charm deploys and operates correctly behind an HTTP proxy.

Regression test for https://github.com/canonical/postgresql-operator/issues/1714.
When Juju model-config sets an HTTP proxy, proxy environment variables leak into
all unit processes. Patroni REST API calls (intra-cluster, on private IPs) must
bypass the proxy — otherwise the charm gets stuck in "awaiting start of the
primary".

Reproduces the exact scenario from the issue: both Juju model-config proxy
settings AND cloudinit-userdata writing proxy vars to /etc/environment, with
a real Squid proxy running on the LXD host.
"""

import logging
import subprocess
import textwrap

import pytest
import requests

from .adapters import JujuFixture
from .jubilant_helpers import (
DATABASE_APP_NAME,
get_primary,
get_unit_address,
)

logger = logging.getLogger(__name__)


def _get_lxd_bridge_ip() -> str:
"""Return the IP of the lxdbr0 bridge (proxy host reachable by containers)."""
output = subprocess.run(
["ip", "-4", "-o", "addr", "show", "lxdbr0"],
check=True,
capture_output=True,
text=True,
).stdout
return output.split("inet ", 1)[1].split("/")[0]


PROXY_HOST = _get_lxd_bridge_ip()
PROXY_URL = f"http://{PROXY_HOST}:3128"

CLOUDINIT_USERDATA = textwrap.dedent("""\
#cloud-config
write_files:
- path: /etc/environment
permissions: '0644'
owner: root:root
content: |
http_proxy={proxy}
https_proxy={proxy}
HTTP_PROXY={proxy}
HTTPS_PROXY={proxy}
no_proxy=localhost,127.0.0.1,10.0.0.0/8
NO_PROXY=localhost,127.0.0.1,10.0.0.0/8
""").format(proxy=PROXY_URL)

PROXY_CONFIG = {
"http-proxy": PROXY_URL,
"https-proxy": PROXY_URL,
"no-proxy": "127.0.0.1,localhost,::1",
"cloudinit-userdata": CLOUDINIT_USERDATA,
}


@pytest.mark.abort_on_fail
def test_deploy_with_proxy(juju: JujuFixture, charm: str):
"""Deploy PostgreSQL in a model with HTTP proxy configured."""
# Apply the proxy config before deploying so the units' machines are provisioned
# with it (cloudinit-userdata only affects machines created after it is set).
juju.ext.model.set_config(PROXY_CONFIG)
juju.ext.model.deploy(
charm,
application_name=DATABASE_APP_NAME,
num_units=3,
config={"profile": "testing"},
)
juju.ext.model.set_config({"update-status-hook-interval": "10s"})
juju.ext.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500)


def test_proxy_env_vars_present_on_units(juju: JujuFixture):
"""Verify the proxy env vars are set in /etc/environment (test precondition)."""
unit_name = next(iter(juju.status().get_units(DATABASE_APP_NAME)))
env_output = juju.ssh(unit_name, "cat /etc/environment")
assert "HTTPS_PROXY" in env_output, (
"Proxy env vars not found in /etc/environment — cloudinit-userdata not applied"
)


def test_patroni_api_reachable(juju: JujuFixture):
"""Patroni REST API responds on every unit despite proxy env vars."""
units = juju.status().get_units(DATABASE_APP_NAME)
for unit_name in units:
host = get_unit_address(juju, unit_name)
result = requests.get(f"https://{host}:8008/health", verify=False)
assert result.status_code == 200, f"Patroni API unreachable on {unit_name}"


def test_get_primary_works(juju: JujuFixture):
"""The get-primary action succeeds (exercises the charm's internal Patroni client)."""
unit_name = next(iter(juju.status().get_units(DATABASE_APP_NAME)))
primary = get_primary(juju, unit_name)
assert primary, "get-primary returned empty result"
17 changes: 17 additions & 0 deletions tests/spread/test_proxy.py/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
summary: test_proxy.py
environment:
TEST_MODULE: test_proxy.py
prepare: |
apt-get update -qq
apt-get install -y -qq squid
BRIDGE_IP=$(ip -4 -o addr show lxdbr0 | awk '{print $4}' | cut -d/ -f1)
printf 'http_port %s:3128\nacl localnet src 10.0.0.0/8\nhttp_access allow localnet\nhttp_access allow localhost\nhttp_access deny all\n' "$BRIDGE_IP" > /etc/squid/squid.conf
systemctl restart squid
systemctl is-active --quiet squid
execute: |
tox run -e integration -- "tests/integration/$TEST_MODULE" --model testing --alluredir="$SPREAD_TASK/allure-results"
restore: |
systemctl stop squid
apt-get remove -y -qq squid
artifacts:
- allure-results
Loading
Loading