From 0ea1a4eac9b1719d0986af5b8cdc01ce70c7c2d1 Mon Sep 17 00:00:00 2001 From: n Date: Tue, 14 May 2019 11:22:04 +0200 Subject: [PATCH] check_postgresql_replication.sh --- README.md | 2 +- monitoring/check_postgresql_replication.sh | 46 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100755 monitoring/check_postgresql_replication.sh diff --git a/README.md b/README.md index 86e1d36..bd612c1 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ Scripts * [secupdate](https://forge.tourmentine.com/n/scripts/src/master/secupdate) => apply security updates & recompile kernel (FreeBSD) * [superscreen](https://forge.tourmentine.com/n/scripts/src/master/superscreen) => open multiple ssh sessions to a bunch of servers inside a screen * [zfsync.sh](https://forge.tourmentine.com/n/scripts/src/master/zfsync.sh) => sync some ZFS pools between two machines - * [pivotroot.sh](https://forge.tourmentine.com/n/scripts/src/master/pivotroot.sh) => pivot root to tmpfs on a live server in order to wipe it Greasemonkey/Userscripts ------------------------ @@ -36,6 +35,7 @@ Monitoring * [check_ntppool_score](https://forge.tourmentine.com/n/scripts/src/master/monitoring/check_ntppool_score) => Simple shell script to check score on NTP Pool Project (score must be over 10 to serve NTP). Requires Curl and bc, fits for Nagios/Shinken/Icinga/etc. * [collectd_ntppool_score](https://forge.tourmentine.com/n/scripts/src/master/monitoring/collectd_ntppool_score) => Same as [check_ntppool_score](https://forge.tourmentine.com/n/scripts/src/master/monitoring/check_ntppool_score) but for collectd graphing. + * [check_postgresql_replication.sh](https://forge.tourmentine.com/n/scripts/src/master/monitoring/check_postgresql_replication.sh) => check postgresql's replication lag. * [GonKyrellM](https://forge.tourmentine.com/n/scripts/src/master/monitoring/GonKyrellM) => Conky, GKrellM style - with "invisible" theme (well, sort of) Attic diff --git a/monitoring/check_postgresql_replication.sh b/monitoring/check_postgresql_replication.sh new file mode 100755 index 0000000..824838a --- /dev/null +++ b/monitoring/check_postgresql_replication.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# check postgresql (streaming) replication +# tested with postgresql 11.3 on FreeBSD 11.2 +# +# pg_hba.conf access should be set to trust +# warning (-w) and critical (-c) thresholds can be set in seconds +# on a low trafic server don't hesitate to set them to a high value + +args=`getopt hw:c: $*` +set -- $args + +warning=600 +critical=3600 + +while :; do + case "$1" in + -h) echo "Usage $0: [-h] [-w warning (default: $warning)] [-c critical (default: $critical)]" + shift; + exit 2; + ;; + -w) warning=$2 + shift; shift; + ;; + -c) critical=$2 + shift; shift; + ;; + --) + shift; break; + ;; + esac +done + +delay=`/usr/local/bin/psql -U postgres --pset expanded=yes -c "select now() - pg_last_xact_replay_timestamp() AS replication_delay;" | tail -2 | head -1 | cut -d '|' -f 2 | cut -d ' ' -f 2 | cut -d '.' -f 1 | awk -F: '{ print ($1 * 3600) + ($2 * 60) + $3 }'` + +if [ $delay -gt $critical ] +then + echo "CRITICAL: $delay seconds behind master" + exit 2 +elif [ $delay -gt $warning ] +then + echo "WARNING: $delay seconds behind master" + exit 1 +else + echo "OK: $delay seconds behind master" + exit 0 +fi