33.0: heartbeat



0. 理论

什么是Linux-HA?

Linux-HA项目为linux、BSD、OS X等系统提供了高可用(集群)的解决方案,它的特点是,R高可靠性、A高可用性和S服务性(RAS)。

什么是heartbeat?

heartbeat是Linux-HA中最主要的一个软件,采用的是GPL-licence,是一个高可用集群的管理软件。

什么是corosync? 现在pacemaker默认是使用corosync来代替heartbeat,它也是一个集群消息通信层

heartbeat的特点

heartbeat发展历史

1997年开始,Linux-HA项目开始收集HA技术,最开始heartbeat,只是两个nodes的互相备份和非常简单的主备自动切换,并没有资源管理。

在2.1.4版本以后,乃至今天的3.0版本,heartbeat中的资源管理功能分裂出来成为了单独的pacemaker,也拆分出来单独的resource agent和”glue”,heartbeat只是单纯的成为了集群消息通信层

Heartbeat 3.0拆分之后的组成部分:

heartbeat的配置文件:

heartbeat扩展阅读:

1. 环境准备(所有节点均执行)

主机配置

item name node1 node2
主机名称 master cluster1
网卡 eth0 eth0
ip 10.10.180.221 10.10.180.222
防火墙 规则临时清空 规则临时清空
selinux 关闭 关闭
# 配置/etc/hosts
vim /etc/hosts
********************************
10.10.180.221 master
10.10.180.222 cluster1
********************************
 
# 安装环境包
yum install -y gcc flex bison net-snmp OpenIPMI
yum install -y glib2-devel bzip2-devel libtool-ltdl-devel libxml2-devel e2fsprogs-devel libxslt-devel  docbook-dtds docbook-style-xsl libaio-devel
yum groupinstall base "Development Tools" -y
 
# 安装a2x命令
wget http://jaist.dl.sourceforge.net/project/asciidoc/asciidoc/8.6.9/asciidoc-8.6.9.zip
unzip asciidoc-8.6.9.zip
cd asciidoc-8.6.9
./configure
make && make install

# 用户准备
groupadd haclient
useradd -g haclient hacluster

2. 安装heartbeat(所有节点均执行)

# 安装cluster glue
# glue(胶水的意思)是用来粘合Heartbeat、Pacemake以及Resource Agent的一系列类库、工具的集合。
# 下载并解压glue
wget http://hg.linux-ha.org/glue/archive/glue-1.0.12.tar.bz2
tar jxf glue-1.0.12.tar.bz2
 
# 编译安装glue
cd Reusable-Cluster-Components-glue--glue-1.0.12/
./autogen.sh
./configure --prefix=/usr/local/heartbeat --sysconfdir=/etc/ha.d --libdir=/usr/local/heartbeat/lib64 --with-daemon-user=hacluster --with-daemon-group=haclient LIBS='/lib64/libuuid.so.1'
make
make install
  
# 安装agent
# resource-agents为集群资源的访问提供了一系列标准的接口。
wget https://github.com/ClusterLabs/resource-agents/archive/v3.9.6.tar.gz
tar zxf v3.9.6.tar.gz
cd resource-agents-3.9.6/
./autogen.sh
./configure --prefix=/usr/local/heartbeat --sysconfdir=/etc/ha.d --libdir=/usr/local/heartbeat/lib64 CFLAGS=-I/usr/local/heartbeat/include LDFLAGS=-L/usr/local/heartbeat/lib64 LIBS='/lib64/libuuid.so.1'
 
# 将自定义路径下的lib软连接到系统lib目录,避免编译时找不到库文件
ln -s /usr/local/heartbeat/lib64/* /lib64/
make
make install 

# 安装Heartbeat
# 下载解压heartbeat
wget http://hg.linux-ha.org/dev/archive/STABLE-3.0.6.tar.bz2
tar jxf STABLE-3.0.6.tar.bz2
 
# 编译安装
cd Heartbeat-3-0-STABLE-3.0.6/
./bootstrap
./ConfigureMe configure --prefix=/usr/local/heartbeat --sysconfdir=/etc/ha.d --libdir=/usr/local/heartbeat/lib64 CFLAGS=-I/usr/local/heartbeat/include LDFLAGS=-L/usr/local/heartbeat/lib64 LIBS='/lib64/libuuid.so.1'
vim /usr/local/heartbeat/include/heartbeat/glue_config.h
*******************************************
# 删除下面这行
#define HA_HBCONF_DIR "/etc/ha.d/ha.d/"
*******************************************
make
make install 

3. 配置

# master配置
cd /usr/local/heartbeat/share/doc/heartbeat/
cp authkeys ha.cf haresources /etc/ha.d/ha.d
chmod 600 /etc/ha.d/ha.d/authkeys

cd /etc/ha.d/ha.d
vim authkeys
*********************************
auth 3
#1 crc
#2 sha1 HI!
3 md5 Hello!
*********************************
 
vim haresources
*********************************
master 10.10.180.220/24/eth0:0 nginx
*********************************
 
cat ha.cf |grep -Ev "^ ?+$|^ ?+#"
*********************************
debugfile /var/log/ha-debug
logfile /var/log/ha-log
logfacility     local0
keepalive 2
deadtime 30
warntime 10
initdead 120
udpport 694
ucast eth0 10.10.180.222
auto_failback on
node master
node cluster1
ping 10.10.180.1
respawn hacluster /usr/local/heartbeat/libexec/heartbeat/ipfail
*********************************
# 配置解释
# "ping ping-node"和"respawn user /path/to/ipfail"搭配,ping-node无法ping通的时候,会切换走
 
# 拷贝配置到cluster1上去
scp authkeys ha.cf haresources root@cluster1:/etc/ha.d/ha.d 
# cluster1配置
vim /etc/ha.d/ha.cf
******************************
ucast eth0 10.10.180.221
****************************** 

4. 服务启动

# 因为我们配置的是做nginx的HA,所以提前安装nginx,并确保/etc/init.d中有nginx的启动脚本,名称为nginx
yum install epel-release
yum install nginx -y
 
# 先启动master
/etc/init.d/heartbeat start
# 再启动cluster1
/etc/init.d/heartbeat start
 
# 查看master上的网卡
ifconfig
eth0      Link encap:Ethernet  HWaddr 00:0C:29:6B:32:10
          inet addr:10.10.180.221  Bcast:10.10.180.255  Mask:255.255.255.0
          inet6 addr: fe80::20c:29ff:fe6b:3210/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:136017 errors:0 dropped:0 overruns:0 frame:0
          TX packets:57592 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:119110190 (113.5 MiB)  TX bytes:5460405 (5.2 MiB)
 
eth0:0    Link encap:Ethernet  HWaddr 00:0C:29:6B:32:10
          inet addr:10.10.180.220  Bcast:10.10.180.255  Mask:255.255.255.0
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
 
lo        Link encap:Local Loopback
          inet addr:127.0.0.1  Mask:255.0.0.0
          inet6 addr: ::1/128 Scope:Host
          UP LOOPBACK RUNNING  MTU:16436  Metric:1
          RX packets:2 errors:0 dropped:0 overruns:0 frame:0
          TX packets:2 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0
          RX bytes:168 (168.0 b)  TX bytes:168 (168.0 b)
 
# master上会自动启动nginx服务
ps aux |grep nginx
root      46884  0.0  0.1  96468  1972 ?        Ss   09:43   0:00 nginx: master process /usr/sbin/nginx -c /etc/nginx/nginx.conf
nginx     46885  0.0  0.2  96856  2628 ?        S    09:43   0:00 nginx: worker process
root      46889  0.0  0.0 103248   840 pts/2    S+   09:44   0:00 grep nginx 

5. 切换效果检测

# 在master和cluster上分别修改index.html文件以便识别
 
# master上
cd /usr/share/nginx/html/
mv index.html index.html.bak
echo "this is master\!" > index.html
 
# cluster1上
cd /usr/share/nginx/html/
mv index.html index.html.bak
echo "this is cluster1 \!" > index.html

访问效果:

# master上停掉heartbeat节点
/etc/init.d/heartbeat stop
# 发现master上的eth0:0停掉,nginx关闭
# 而cluster1上的eth0:0启动,nginx启动

访问效果:

master启动heartbeat服务,因为我们配置了auto_failback on,所以master会自动接管回来nginx

实验停掉master的network,也会自动切换到cluster

实验用iptables禁ping(“iptables -I INPUT -p icmp -j DROP”),也会切换到cluster

6. 安装中的错误

glue make 错误1

错误信息:

make
Making all in include
...
gmake[1]: *** No rule to make target \`all\'.  Stop.
gmake[1]: Leaving directory \`/usr/local/src/Reusable-Cluster-Components-glue--glue-1.0.12/libltdl\'
make: *** [all-recursive] Error 1

解决办法:

yum install libtool-ltdl-devel

glue make 错误2

错误信息:

./.libs/libplumb.so: undefined reference to \`uuid_parse\'
./.libs/libplumb.so: undefined reference to \`uuid_generate\'
./.libs/libplumb.so: undefined reference to \`uuid_copy\'
./.libs/libplumb.so: undefined reference to \`uuid_is_null\'
./.libs/libplumb.so: undefined reference to \`uuid_unparse\'
./.libs/libplumb.so: undefined reference to \`uuid_clear\'
./.libs/libplumb.so: undefined reference to \`uuid_compare\'
collect2: ld returned 1 exit status
gmake[2]: *** [ipctest] Error 1
gmake[2]: Leaving directory \`/usr/local/src/Reusable-Cluster-Components-glue--glue-1.0.12/lib/clplumbing\'
gmake[1]: *** [all-recursive] Error 1
gmake[1]: Leaving directory \`/usr/local/src/Reusable-Cluster-Components-glue--glue-1.0.12/lib\'
make: *** [all-recursive] Error 1

解决办法:

./configure ... LIBS='/lib64/libuuid.so.1'

glue make 错误3

错误信息:

gmake[2]: a2x: Command not found
gmake[2]: *** [hb_report.8] Error 127
gmake[2]: Leaving directory \`/usr/local/src/Reusable-Cluster-Components-glue--glue-1.0.12/doc\'
gmake[1]: *** [all-recursive] Error 1
gmake[1]: Leaving directory \`/usr/local/src/Reusable-Cluster-Components-glue--glue-1.0.12/doc\'
make: *** [all-recursive] Error 1

解决办法:

# 下载安装a2x(https://sourceforge.net/projects/asciidoc/?source=dlp)
wget http://jaist.dl.sourceforge.net/project/asciidoc/asciidoc/8.6.9/asciidoc-8.6.9.zip
unzip asciidoc-8.6.9.zip
cd asciidoc-8.6.9
./configure
make && make install

heartbeat make错误

错误信息:

glue_config.h:105:1: error: "HA_HBCONF_DIR" redefined

解决办法:

# 删除/usr/local/heartbeat/include/heartbeat/glue_config.h的最后一行:
#define HA_HBCONF_DIR "/usr/local/heartbeat/etc/ha.d/"