Steps to reproduce
Creates a playground VM
sudo lxc launch ubuntu:24.04 pg --vm -c limits.cpu=8 -c limits.memory=16GiB -d root,size=64GiB
Connects to it.
sudo lxc exec pg -- sudo --login --user ubuntu
Installs pre-requisites
sudo apt -y update && sudo apt -y upgrade
sudo snap install juju --channel=3.6/stable
sudo snap install lxd --channel=5.21/stable
Configures networking and storage
sudo lxd init --auto
sudo lxc network set lxdbr0 ipv6.address none
sudo iptables -P FORWARD ACCEPT
Bootstrap the controller
juju bootstrap localhost localhost
Prepares site1
juju add-model site1
juju deploy postgresql db1 --channel 16/edge --config profile=testing --base ubuntu@24.04
juju deploy data-integrator di1 --config database-name=testdb --base ubuntu@24.04
juju relate db1 di1
juju add-unit db1 -n 1
juju config db1 synchronous-mode-strict=false
juju offer db1:replication-offer replication-offer
Prepares site2
juju add-model site2
juju deploy postgresql db2 --channel 16/edge --config profile=testing --base ubuntu@24.04
juju add-unit db2 -n 1
juju config db2 synchronous-mode-strict=false
juju consume site1.replication-offer
juju integrate replication-offer db2:replication
Inject data and configure replication
juju switch site1
sudo apt install -y sysbench
juju run di1/leader get-credentials
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --table-size='1000000' oltp_read_only prepare
juju run -m site1 db1/leader create-replication
#########################
########
Test watcher addition under load
######## - Both sites
########
#########################
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
juju deploy postgresql-watcher w1 --channel 16/edge --config profile=testing --base ubuntu@24.04 -m site1
juju deploy postgresql-watcher w2 --channel 16/edge --config profile=testing --base ubuntu@24.04 -m site2
juju relate db1 w1 -m site1
juju relate db2 w2 -m site2
#########################
########
Test units addition under load
######## - Both sites
########
#########################
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
juju add-unit db1 -n 2 -m site1
juju add-unit db2 -n 1 -m site2
#########################
########
Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
#########################
########
Consecutive Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
#########################
########
2nd consecutive Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
Expected behavior for every node loss test
- Only A+B fails
- A new primary is elected
- Watcher updates its topology
- Site2 remain healthy
Actual behavior
- First failover works properly
- Second and last failover do not work
- Watcher seems to not update its topology following abrupt shutdown of nodes
Screenshot-1

Screenshot-2

Versions
Operating system: Ubuntu 24.04.4 LTS
Juju CLI: 3.6.21-genericlinux-amd64
Juju agent: Same as Juju
Charm revision: 1122
LXD: 5.21.4 LTS
Log output
Juju debug log:
Additional context
Steps to reproduce
Creates a playground VM
sudo lxc launch ubuntu:24.04 pg --vm -c limits.cpu=8 -c limits.memory=16GiB -d root,size=64GiB
Connects to it.
sudo lxc exec pg -- sudo --login --user ubuntu
Installs pre-requisites
sudo apt -y update && sudo apt -y upgrade
sudo snap install juju --channel=3.6/stable
sudo snap install lxd --channel=5.21/stable
Configures networking and storage
sudo lxd init --auto
sudo lxc network set lxdbr0 ipv6.address none
sudo iptables -P FORWARD ACCEPT
Bootstrap the controller
juju bootstrap localhost localhost
Prepares site1
juju add-model site1
juju deploy postgresql db1 --channel 16/edge --config profile=testing --base ubuntu@24.04
juju deploy data-integrator di1 --config database-name=testdb --base ubuntu@24.04
juju relate db1 di1
juju add-unit db1 -n 1
juju config db1 synchronous-mode-strict=false
juju offer db1:replication-offer replication-offer
Prepares site2
juju add-model site2
juju deploy postgresql db2 --channel 16/edge --config profile=testing --base ubuntu@24.04
juju add-unit db2 -n 1
juju config db2 synchronous-mode-strict=false
juju consume site1.replication-offer
juju integrate replication-offer db2:replication
Inject data and configure replication
juju switch site1
sudo apt install -y sysbench
juju run di1/leader get-credentials
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --table-size='1000000' oltp_read_only prepare
juju run -m site1 db1/leader create-replication
#########################
########
Test watcher addition under load
######## - Both sites
########
#########################
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
juju deploy postgresql-watcher w1 --channel 16/edge --config profile=testing --base ubuntu@24.04 -m site1
juju deploy postgresql-watcher w2 --channel 16/edge --config profile=testing --base ubuntu@24.04 -m site2
juju relate db1 w1 -m site1
juju relate db2 w2 -m site2
#########################
########
Test units addition under load
######## - Both sites
########
#########################
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
juju add-unit db1 -n 2 -m site1
juju add-unit db2 -n 1 -m site2
#########################
########
Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
#########################
########
Consecutive Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
#########################
########
2nd consecutive Node loss test
######## - site1
########
At the same time
A) Write traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_write_only run
B) Read-only traffic on site1::primary
sysbench --pgsql-host=site1::primary::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
C) Read-only traffic on site1::standby
sysbench --pgsql-host=site1::standby::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
D) Read-only traffic on site2::standby1 (primary member)
sysbench --pgsql-host=site2::standby1::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
E) Read-only traffic on site2::standby2
sysbench --pgsql-host=site2::standby2::ip --pgsql-user= --pgsql-password= --pgsql-port=5432 --pgsql-db=testdb --db-driver='pgsql' --threads='1' --tables='5' --report-interval=1 --time=373 oltp_read_only run
lxc stop site1::primary::machineId --force
Expected behavior for every node loss test
Actual behavior
Screenshot-1

Screenshot-2

Versions
Operating system: Ubuntu 24.04.4 LTS
Juju CLI: 3.6.21-genericlinux-amd64
Juju agent: Same as Juju
Charm revision: 1122
LXD: 5.21.4 LTS
Log output
Juju debug log:
Additional context