我有一个正在运行的三节点patroni
postgres-11
集群。patroni集群设置为:
consul:
url: <consul URL>
bootstrap:
initdb:
encoding: UTF8
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
use_slots: true
remove_data_directory_on_diverged_timelines: false
remove_data_directory_on_rewind_failure: true
parameters:
archive_command: /bin/true
archive_mode: 'on'
archive_timeout: 1800s
checkpoint_completion_target: 0.9
default_statistics_target: 100
effective_cache_size: 24GB
effective_io_concurrency: 200
hot_standby: 'on'
log_directory: /var/log/postgresql
log_filename: postgresql-%a.log
logging_collector: 'on'
maintenance_work_mem: 2GB
max_connections: 2000
max_parallel_maintenance_workers: 4
max_parallel_workers: 8
max_parallel_workers_per_gather: 4
max_replication_slots: 10
max_wal_senders: 10
max_wal_size: 8GB
max_worker_processes: 8
min_wal_size: 2GB
random_page_cost: 1.1
shared_buffers: 8GB
ssl: 'on'
ssl_cert_file: /etc/ssl/certs/ssl-cert-snakeoil.pem
ssl_key_file: /etc/ssl/private/ssl-cert-snakeoil.key
track_commit_timestamp: 'on'
wal_buffers: 16MB
wal_keep_segments: 64
wal_level: logical
wal_log_hints: 'on'
work_mem: 5242kB
postgresql:
listen: 0.0.0.0:5432
connect_address: <IP Address of the node>:5432
data_dir: /var/lib/postgresql/11/<db name>
pgpass: /tmp/pgpass0
authentication:
superuser:
username: postgres
password: postgres
replication:
username: replicator
password: replicator
rewind:
username: rewind_user
password: rewind_password
pg_hba:
- local all all peer
- host all all 127.0.0.1/32 md5
- hostssl all postgres 0.0.0.0/0 md5
- host all postgres 0.0.0.0/0 reject
- host all all 0.0.0.0/0 md5
- hostssl all postgres ::0/0 md5
- host all postgres ::0/0 reject
- host all all ::0/0 md5
- local replication all peer
- host replication all 127.0.0.1/32 md5
- hostssl replication replicator 0.0.0.0/0 md5
- hostssl replication replicator ::0/0 md5
- host replication replicator 0.0.0.0/0 md5
- host replication replicator 127.0.0.1/32 trust
- local postgres postgres peer
- host all all 0.0.0.0/0 md5
create_replica_methods:
- basebackup
basebackup:
max-rate: '250M'
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
restapi:
connect_address: <IP Address of the node>:8008
listen: 0.0.0.0:8008
当我关闭主控室时,我希望synchronous standby
能成为主控室。但在两个备用节点上,我都看到了以下错误:
INFO: following a different leader because i am not the healthiest node
不确定为什么同步备用未成为主要备用。我在Ubuntu 18.04
上使用Patroni 1.6.1
我解决了这个问题。由于某种原因,我的节点名称中的大写字母被转换成了小写字母,因此patroni无法找到备用节点。如下所示:
{"leader":"Pa-db1","sync_standby":"pa-db2"}
节点的正确名称是Pa-db2,第二个备用节点名称类似地是Pa-db3。我把所有节点的名称都改成了小写字母,即pa-db1, pa-db2, pa-db3
和故障切换成功了!