Linux Cluster with HAProxy and Keepalived
Application Stack Within a Cluster
Outgoing Mail is directed via a dns record dc1server00.home.local or dc2server00.home.local dependant on the datacenter to the vrrp address that is maintained by keepalived.
Haproxy is listening on port 25 (smtp) on both nodes in an active/standby fashion and is configured to direct connections to postfix on both nodes (Active/Active), therefore both nodes in the cluster are live and servicing emails at all times.
Postfix will then via normal MX delivery, deliver the email to the customer.
Keepalived
Keepalived uses the vrrp(2) protocol to maintain a virtual ip address that is able to float between any node in a cluster, for this system there is a primary and backup server set by priority in the configuration. keepalived is configured to monitor that haproxy is running on the master, if it isn’t running then the priority of the master will be reduced and the backup which will now be of higher priority will takeover the floating address.
Setup, on both servers in the cluster:
Install keepalived via the yum unix command (as root)
#yum install keepalived
add the following  line to the bottom of /etc/sysctl.conf. This allows processes to bind to sockets on the machine even if the ip does not exist. 
net.ipv4.ip_nonlocal_bind = 1
#sysctl -p
Enable keepalived to be started up on boot.
#systemctl enable keepalived.service
- DC1 keepalived config
cat /etc/keepalived/keepalived.conf
##########################################################
# GLOBAL SETTINGS
##########################################################
global_defs {
   notification_email {
       support@example.com #Set from Ansible haproxy deployment
   }
  notification_email_from keepalive@dc1server.home.local 
   smtp_server 172.31.101.115 # ip address for smtp_relay
   smtp_connect_timeout 20
}
vrrp_script chk_haproxy {
	script "systemctl status haproxy" #verify the pid existance
	interval 2		    #check every 2 seconds
	weight 2		    #add 2 points of prio if OK
}
vrrp_instance dc1server
    state MASTER
    interface eth0
    virtual_router_id 50
    priority 101
    advert_int 1
    track_interface {
      eth0
    }
    track_script {
      chk_haproxy weight 20
    }
    authentication {
      auth_type PASS
      auth_pass SOMEPASSWORD! 
    }
    virtual_ipaddress {
      10.0.10.3 dev eth0 
    }
}
- DC2 keepalived config
cat /etc/keepalived/keepalived.conf
##########################################################
# GLOBAL SETTINGS
##########################################################
global_defs {
   notification_email {
       support@example.com #Set from Ansible haproxy deployment
   }
  notification_email_from keepalive@dc2server.home.local 
   smtp_server 172.31.101.115 # ip address for robot
   smtp_connect_timeout 20
}
vrrp_script chk_haproxy {
	script "systemctl status haproxy" #verify the pid existence
	interval 2		    #check every 2 seconds
	weight 2		    #add 2 points of prio if OK
}
vrrp_instance dc2server
    state MASTER
    interface eth0
    virtual_router_id 50
    priority 101
    advert_int 1
    track_interface {
      eth0
    }
    track_script {
      chk_haproxy weight 20
    }
    authentication {
      auth_type PASS
      auth_pass SOMEPASS 
    }
    virtual_ipaddress {
      10.0.20.3 dev eth0
    }
}
HAproxy
The function of HAproxy is to load balance connections to postfix on either node in the cluster, This allows for an “Active/Active” setup. Because there is no way to tell the workload of what has been submitted to each node I have opted for a simple Round Robin load balancer algorithm. it will invoke an smtp HELO to check if the postfix ports are available and listening. Haproxy will be configured to present statistics and the ability to drain servers via a browser at http://hostname:9000/ .
- Setup on both servers in the cluster
- -
install haproxy via the yum command
# yum install haproxy
enable haproxy to be started up on boot.
# systemctl enable haproxy.service
- DC1 haproxy config
cat /etc/haproxy/haproxy.cfg
##########################################################
# GLOBAL SETTINGS
##########################################################
global
    chroot /var/lib/haproxy
    pidfile /var/run/haproxy.pid
    maxconn 4000
    user haproxy
    group haproxy
    daemon
    stats socket /var/lib/haproxy/stats
    log 172.31.0.254 local2
    ssl-default-bind-ciphers  kEECDH+aRSA+AES:kRSA+AES:+AES256:RC4-SHA:!kEDH:!LOW:!EXP:!MD5:!aNULL:!eNULL
##########################################################
# DEFAULT SETTINGS
##########################################################
defaults
    mode tcp
    log global
    option tcplog
    option dontlognull
    option redispatch
    retries 3
    timeout queue 1m
    timeout connect 10s
    timeout client 1m
    timeout server 1m
    timeout check 10s
    maxconn 3000
##########################################################
# LISTEN SETTINGS
##########################################################
listen stats
    bind *:9000 ssl crt /etc/pki/tls/private/
    mode http   
    stats enable
    stats hide-version
    stats uri /
    stats realm HAProxy\ Statistics
    stats auth admin:p@ssw0rd
    stats admin if TRUE
##########################################################
# FRONTEND SETTINGS
##########################################################
frontend smtpdc1 
    bind 10.0.10.3:25
    use_backend emailservice
##########################################################
# BACKEND SETTINGS
##########################################################
backend emailservice
    description Emailservice
    balance roundrobin
    option tcplog
    option smtpchk HELO haproxy
    server dc1server01 10.0.10.1:25 check 
    server dc1server02 10.0.10.2:25 check 
- DC2 haproxy config
cat /etc/haproxy/haproxy.cfg
##########################################################
# GLOBAL SETTINGS
##########################################################
global
    chroot /var/lib/haproxy
    pidfile /var/run/haproxy.pid
    maxconn 4000
    user haproxy
    group haproxy
    daemon
    stats socket /var/lib/haproxy/stats
    log 172.31.0.254 local2
    ssl-default-bind-ciphers  kEECDH+aRSA+AES:kRSA+AES:+AES256:RC4-SHA:!kEDH:!LOW:!EXP:!MD5:!aNULL:!eNULL
##########################################################
# DEFAULT SETTINGS
##########################################################
defaults
    mode tcp
    log global
    option tcplog
    option dontlognull
    option redispatch
    retries 3
    timeout queue 1m
    timeout connect 10s
    timeout client 1m
    timeout server 1m
    timeout check 10s
    maxconn 3000
##########################################################
# LISTEN SETTINGS
##########################################################
listen stats
    bind *:9000 ssl crt /etc/pki/tls/private/
    mode http   
    stats enable
    stats hide-version
    stats uri /
    stats realm HAProxy\ Statistics
    stats auth admin:p@ssw0rd
    stats admin if TRUE
##########################################################
# FRONTEND SETTINGS
##########################################################
frontend smtpdc1 
    bind 10.0.20.3:25
    use_backend emailservice
##########################################################
# BACKEND SETTINGS
##########################################################
backend emailservice
    description Emailservice
    balance roundrobin
    option tcplog
    option smtpchk HELO haproxy
    server dc2server01 10.0.20.1:25 check 
    server dc2server02 10.0.20.2:25 check 
Troubleshooting
[root@dc1server01 ~]# fgrep -i keep /var/log/messages* /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived[1141]: Starting Keepalived v1.2.13 (11/18,2014) /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived[1563]: Starting Healthcheck child process, pid=1564 /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived[1563]: Starting VRRP child process, pid=1565 /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Netlink reflector reports IP 10.0.10.1 added /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Netlink reflector reports IP fe80::215:5dff:fee3:5004 added /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Registering Kernel netlink reflector /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Registering Kernel netlink command channel /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Registering gratuitous ARP shared channel /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Opening file '/etc/keepalived/keepalived.conf'. /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Truncating auth_pass to 8 characters /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Configuration is using : 66367 Bytes /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Using LinkWatch kernel netlink reflector... /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: VRRP sockpool: [ifindex(2), proto(112), unicast(0), fd(10,11)] /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: VRRP_Script(chk_haproxy) succeeded /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Netlink reflector reports IP 10.0.10.1 added /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Netlink reflector reports IP fe80::215:5dff:fee3:5004 added /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Registering Kernel netlink reflector /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Registering Kernel netlink command channel /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Opening file '/etc/keepalived/keepalived.conf'. /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Configuration is using : 7586 Bytes /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Using LinkWatch kernel netlink reflector... /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Transition to MASTER STATE /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Received higher prio advert /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Entering BACKUP STATE /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_healthcheckers[1564]: Netlink reflector reports IP fe80::215:5dff:fee3:5004 added /var/log/messages-20160822:Aug 22 09:30:58 dc1server01 Keepalived_vrrp[1565]: Netlink reflector reports IP fe80::215:5dff:fee3:5004 added /var/log/messages-20160822:Aug 22 09:38:29 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Transition to MASTER STATE /var/log/messages-20160822:Aug 22 09:38:30 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Entering MASTER STATE /var/log/messages-20160822:Aug 22 09:38:30 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) setting protocol VIPs. /var/log/messages-20160822:Aug 22 09:38:30 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Sending gratuitous ARPs on eth0 for 10.0.10.3 /var/log/messages-20160822:Aug 22 09:38:30 dc1server01 Keepalived_healthcheckers[1564]: Netlink reflector reports IP 10.0.10.3 added /var/log/messages-20160822:Aug 22 09:38:35 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Sending gratuitous ARPs on eth0 for 10.0.10.3 /var/log/messages-20160822:Aug 22 09:44:53 dc1server01 Keepalived_vrrp[1565]: VRRP_Instance(dc1server) Received lower prio advert, forcing new election
[root@dc2server01 ~]# fgrep -i keep /var/log/messages* /var/log/messages:Aug 22 10:53:04 dc2server01 Keepalived_vrrp[1234]: VRRP_Script(chk_haproxy) failed /var/log/messages:Aug 22 10:53:05 dc2server01 Keepalived_vrrp[1234]: VRRP_Instance(dc2server) Received higher prio advert /var/log/messages:Aug 22 10:53:05 dc2server01 Keepalived_vrrp[1234]: VRRP_Instance(dc2server) Entering BACKUP STATE /var/log/messages:Aug 22 10:53:05 dc2server01 Keepalived_vrrp[1234]: VRRP_Instance(dc2server) removing protocol VIPs. /var/log/messages:Aug 22 10:53:05 dc2server01 Keepalived_healthcheckers[1233]: Netlink reflector reports IP 10.0.20.3 removed /var/log/messages:Aug 22 11:04:49 dc2server01 Keepalived_vrrp[1234]: VRRP_Script(chk_haproxy) succeeded
