3.2cluster部署
1、在对应的机器,下载、解压redis
#详见sentinel部署内相关命令
2、创建对应的文件目录
mkdir -p /usr/lcoal/redis5/cluster/data /usr/lcoal/redis5/cluster/data/conf /usr/lcoal/redis5/cluster/log /usr/lcoal/redis5/cluster/run /usr/lcoal/redis5/cluster/script
3、修改redis.conf配置文件(以7000端口为例)
#【network】
#端口号
port 7000
#允许哪些IP能访问该redis
bind 172.32.1.59 172.32.1.91 172.32.1.140
#设置客户端连接时的超时时间,单位为秒,默认值为0,表示不关闭,生产环境按需配置
timeout 0
#【SECURITY】
#为当前redis节点配置密码,生产环境一定要设置
requirepass dt2020
#命令重命名,可将一些危险命令禁用或重命名
rename-command
#【GENERAL】
#设置为yes表示指定Redis以守护进程的方式启动(后台启动),默认为no
daemonize yes
#pid文件路径
pidfile /usr/local/redis5/cluster/run/redis_7000.pid
#系统日志文件路径
logfile "/usr/local/redis5/cluster/log/error_7000.log"
#【SNAPSHOTTING】 //这里的配置与持久化有关
#数据目录,数据库的写入会在这个目录。rdb、aof文件以及redis cluster模式下的node.conf文件均会创建在这个目录下
dir /usr/local/redis5/cluster/data/redis_7000
#rdb的文件名
dbfilename dump_7000.rdb
#开启aof,默认不开启,生产环境一定要开启
appendonly yes
#aof的文件名
appendfilename "appendonly_7000.aof"
#当redis重写aof文件时,redis可以先读一个rdb来加快重写的速度,当这个选项打开时,重写的aof文件由 两部分组成:rdb文件+aof文件。
aof-use-rdb-preamble yes
#【replication】
设置master端的IP与端口信息(slave端需配置),cluster集群的从节点可不配置,开启集群时可指定主从关系
replicaof <master ip> <master port>
#主节点的密码,如果master设置了密码(requirepass),slave端需要配置此选项(slave端需配置),cluster集群的从节点可不配置,开启集群时可指定主从关系
masterauth <master-password>
#【slow_log】
#慢日志阈值,超过该值则记录到慢日志,单位us,如果该值=0则记录所有命令,如果<0则不记录任何命令;默认10ms
slowlog-log-slower-than 10000
#慢查询列表,表示慢日志最大能存储多少条命令,线上建议调大
slowlog-max-len 128
#【MEMORY MANAGEMENT】
#表示redis最大可用内存,可带B、K、M、G等单位,不带单位则表示字节;如果maxmemory值为0,表示不做限制;一般推荐Redis设置内存为最大物理内存的四分之三
maxmemory <bytes>
#表示当内存使用达到最大值时,redis使用的过期策略,建议设置为 volatile-lru
maxmemory-policy volatile-lru
#【cluster集群配置】
#设置为yes表示该节点将开启集群
cluster-enabled yes
#集群内部配置文件,不用人为修改,是集群节点自动维护的文件,会记录集群有哪些节点、节点状态等
cluster-config-file 'node-7000.conf'
#节点超时时间,单位毫秒,超过该时间,节点被认为故障,如果主节点超过该时间还是不可达,从节点会升为主节点
cluster-node-timeout 15000
#在部分key所在的节点不可用时,如果此参数设置为"yes"(默认值), 则整个集群停止接受操作;如果此参数设置为”no”,则集群依然为可达节点上的key提供读操作
cluster-require-full-coverage no
#如果设置成0,则无论从节点与主节点失联多久,从节点都会尝试升级成主节点,此时能保证最高的可用性,但从节点与主节点的数据可能差别较大。
#如果设置成正数,则cluster-node-timeout乘以cluster-slave-validity-factor得到的时间,是从节点与主节点失联后,此从节点数据有效的最长时间,超过这个时间,从节点不会启动故障迁移。
#注意,如果此参数配置为非0,将可能出现由于某主节点失联却没有从节点能顶上的情况,从而导致集群不能正常工作,在这种情况下,只有等到原来的主节点重新回归到集群,集群才恢复运作。
cluster-replica-validity-factor 10
#迁移临界点:从节点可以迁移至其它没有从节点的主节点(孤立主节点),该参数为1表示,某个从节点,只有在其主节点还有另外1个多余的从节点时,才可以被迁移至孤立主节点
cluster-migration-barrier 1
4、启动6个节点的redis
$redis_file/src/redis-server $redis_file/conf/redis_$port.conf
5、开启集群
#redis5可以用redis-cli --cluster命令一键开启集群
#--cluster-replicas 1指定每个主节点有一个从节点,-a指定密码,从前到后的6个IP:port,分别对应主1、主2、主3、从1、从2、从3
redis-cli --cluster create 172.32.1.59:7000 172.32.1.91:7001 172.32.1.140:7002 172.32.1.140:7011 172.32.1.59:7012 172.32.1.91:7010 --cluster-replicas 1 -a dt2020
#redis4可以用redis-trib一键开启集群
【redis-trib】
1.先安装依赖
#redis-trib.rb是由ruby语言编写的,需要安装ruby环境
yum install zlib-devel
yum install openssl-devel
redis-trib.rb是由ruby语言编写的,需要安装ruby环境
2.安装ruby
tar zxf rubb.xxx.tar.gz
./configure -prefix=/usr/local/ruby
make
make install
3.gem安装redis接口
gem install redis
4.用redis-trib一键开启集群
redis-trib.rb create --replicas 1 172.32.1.59:7000 172.32.1.91:7001 172.32.1.140:7002 172.32.1.140:7011 172.32.1.59:7012 172.32.1.91:7010
4、故障转移
4.1 redis-sentinel故障转移
1、查看sentinel的状态信息
# redis-cli -p 26379
172.32.1.59:26379> info sentinel
# Sentinel
sentinel_masters:1#监控master的数量
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
#监控的相关信息,master节点为172.32.1.59:6379,有2个slave节点,有3个sentinel节点
master0:name=mymaster,status=ok,address=172.32.1.59:6379,slaves=2,sentinels=3
2、查看当前复制状态信息:
# redis-cli -p 6379
172.32.1.59:6379> auth dt2020
OK
172.32.1.59:6379> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=172.32.1.91,port=6379,state=online,offset=85035377,lag=1
slave1:ip=172.32.1.140,port=6379,state=online,offset=85035510,lag=1
master_replid:cd6fead5e104abef2c74d41d645fc990fbfb5c4c
master_replid2:da9cb3d76a3fe842ce2ec418fd0daffe5f3d3b83
master_repl_offset:85035776
second_repl_offset:1602123
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:83987201
repl_backlog_histlen:1048576
3、模拟故障:
登陆sentinel节点
redis-cli -p 26379
>sentinel failover mymaster
OK
或直接kill redis的主节点
4、查看sentinel日志:
13117:X 28 Apr 2020 17:45:00.239 # Executing user requested FAILOVER of 'mymaster'
13117:X 28 Apr 2020 17:45:00.239 # +new-epoch 3
13117:X 28 Apr 2020 17:45:00.239 # +try-failover master mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:00.281 # +vote-for-leader 6802ff42c75720ed5e818b45b5ac6168228a037e 3
13117:X 28 Apr 2020 17:45:00.281 # +elected-leader master mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:00.281 # +failover-state-select-slave master mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:00.333 # +selected-slave slave 172.32.1.140:6379 172.32.1.140 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:00.333 * +failover-state-send-slaveof-noone slave 172.32.1.140:6379 172.32.1.140 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:00.395 * +failover-state-wait-promotion slave 172.32.1.140:6379 172.32.1.140 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:01.332 # +promoted-slave slave 172.32.1.140:6379 172.32.1.140 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:01.332 # +failover-state-reconf-slaves master mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:01.380 * +slave-reconf-sent slave 172.32.1.91:6379 172.32.1.91 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:02.370 * +slave-reconf-inprog slave 172.32.1.91:6379 172.32.1.91 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:02.370 * +slave-reconf-done slave 172.32.1.91:6380 172.32.1.91 6379 @ mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:02.440 # +failover-end master mymaster mymaster 172.32.1.59 6379
13117:X 28 Apr 2020 17:45:02.440 # +switch-master mymaster 172.32.1.59 6379 172.32.1.140 6379
13117:X 28 Apr 2020 17:45:02.440 * +slave slave 172.32.1.91:6379 172.32.1.91 6379 @ mymaster 172.32.1.140 6379
13117:X 28 Apr 2020 17:45:02.440 * +slave slave 172.32.1.59:6379 172.32.1.59 6379 @ mymaster 172.32.1.140 6379
5、查看复制状态,172.32.1.59:6379已变为slave节点,172.32.1.140:6379变为master节点
172.32.1.59:6379> info replication
# Replication
role:slave
master_host:172.32.1.140
master_port:6379
master_link_status:up
master_last_io_seconds_ago:1
master_sync_in_progress:0
slave_repl_offset:85117164
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:2bcfea0c73779200d95b35bc3c8fd241a9b444a2
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:85117164
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:85088603
repl_backlog_histlen:28562
4.2redis-cluster故障转移
1、#查看当前集群状态:
# redis-cli --cluster check 172.32.1.59:7000 -a dt2020
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
172.32.1.59:7000 (372c6721...) -> 0 keys | 5461 slots | 1 slaves.
172.32.1.91:7001 (f5d02386...) -> 0 keys | 5462 slots | 1 slaves.
172.32.1.140:7002 (75427ec8...) -> 0 keys | 5461 slots | 1 slaves.
[OK] 0 keys in 3 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node 172.32.1.59:7000)
M: 372c67215d9ebcbd3e54c80120263a7300fa173a 172.32.1.59:7000
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: 3257571369e23dd7ed6a69b79d8f40d095b5982b 172.32.1.59:7012
slots: (0 slots) slave
replicates f5d0238686f868684ff55910229cec62d64bc3fd
M: f5d0238686f868684ff55910229cec62d64bc3fd 172.32.1.91:7001
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
M: 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d 172.32.1.140:7002
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
S: 3ae32dd1aa45786118052cfa65963b7467c68e9d 172.32.1.140:7011
slots: (0 slots) slave
replicates 372c67215d9ebcbd3e54c80120263a7300fa173a
S: c1ae0f9c1b2ccdbed0c054d83653e1aeae34a408 172.32.1.91:7010
slots: (0 slots) slave
replicates 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
可以发现当前集群中主从的关系是
172.32.1.59:7000(master)---172.32.1.140:7011(slave)
172.32.1.91:7001(master)---172.32.1.59:7012(slave)
172.32.1.140:7002(master)---172.32.1.91:7010(slave)
2、手动模拟故障
CLUSTER FAILOVER命令进行手动故障转移,会将主从节点的关系进行互换,需要在要进行故障转移的主服务器的其中一个从服务器执行
登陆7012从节点,执行cluster failover,再次查看集群状态信息,发现7012与7000的主从关系发生互换,7012变为主节点
# redis-cli --cluster check 172.32.1.59:7000 -a dt2020
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
172.32.1.91:7001 (f5d02386...) -> 0 keys | 5462 slots | 1 slaves.
172.32.1.140:7002 (75427ec8...) -> 0 keys | 5461 slots | 1 slaves.
172.32.1.140:7011 (3ae32dd1...) -> 0 keys | 5461 slots | 1 slaves.
[OK] 0 keys in 3 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node 172.32.1.59:7000)
S: 372c67215d9ebcbd3e54c80120263a7300fa173a 172.32.1.59:7000
slots: (0 slots) slave
replicates 3ae32dd1aa45786118052cfa65963b7467c68e9d
S: 3257571369e23dd7ed6a69b79d8f40d095b5982b 172.32.1.59:7012
slots: (0 slots) slave
replicates f5d0238686f868684ff55910229cec62d64bc3fd
M: f5d0238686f868684ff55910229cec62d64bc3fd 172.32.1.91:7001
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
M: 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d 172.32.1.140:7002
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
M: 3ae32dd1aa45786118052cfa65963b7467c68e9d 172.32.1.140:7011
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: c1ae0f9c1b2ccdbed0c054d83653e1aeae34a408 172.32.1.91:7010
slots: (0 slots) slave
replicates 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
当前集群主从关系是:
172.32.1.59:7000(master)---172.32.1.140:7011(slave)
172.32.1.59:7012(master)---172.32.1.91:7001(slave)
172.32.1.140:7002(master)---172.32.1.91:7010(slave)
3、模拟主节点故障
目前集群中7001是主节点,7012是该节点的从节点,模拟7001主节点故障
# redis-cli -p 7001 -a dt2020 debug segfault或直接kill
4、7001主节点故障后,再次检查集群状态,发现原slave节点7012变为主节点,整个集群还是可以提供正常的服务
# redis-cli --cluster check 172.32.1.59:7000 -a dt2020
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
Could not connect to Redis at 127.0.0.1:7001: Connection refused
172.32.1.59:7012 (32575713...) -> 0 keys | 5462 slots | 0 slaves.
172.32.1.140:7002 (75427ec8...) -> 0 keys | 5461 slots | 1 slaves.
172.32.1.140:7011 (3ae32dd1...) -> 0 keys | 5461 slots | 1 slaves.
[OK] 0 keys in 3 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node 172.32.1.59:7000)
S: 372c67215d9ebcbd3e54c80120263a7300fa173a 172.32.1.59:7000
slots: (0 slots) slave
replicates 3ae32dd1aa45786118052cfa65963b7467c68e9d
M: 3257571369e23dd7ed6a69b79d8f40d095b5982b 172.32.1.59:7012
slots:[5461-10922] (5462 slots) master
M: 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d 172.32.1.140:7002
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
M: 3ae32dd1aa45786118052cfa65963b7467c68e9d 172.32.1.140:7011
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: c1ae0f9c1b2ccdbed0c054d83653e1aeae34a408 172.32.1.91:7010
slots: (0 slots) slave
replicates 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
5、重新启动 7001 节点
redis-server redis_7001.conf
再次观察集群状态,发现7001节点已变为7012的slave节点
# redis-cli --cluster check 172.32.1.59:7000 -a dt2020
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
172.32.1.59:7012 (32575713...) -> 0 keys | 5462 slots | 1 slaves.
172.32.1.140:7002 (75427ec8...) -> 0 keys | 5461 slots | 1 slaves.
172.32.1.140:7011 (3ae32dd1...) -> 0 keys | 5461 slots | 1 slaves.
[OK] 0 keys in 3 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node 172.32.1.59:7000)
S: 372c67215d9ebcbd3e54c80120263a7300fa173a 172.32.1.59:7000
slots: (0 slots) slave
replicates 3ae32dd1aa45786118052cfa65963b7467c68e9d
M: 3257571369e23dd7ed6a69b79d8f40d095b5982b 172.32.1.59:7012
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
S: f5d0238686f868684ff55910229cec62d64bc3fd 172.32.1.91:7001
slots: (0 slots) slave
replicates 3257571369e23dd7ed6a69b79d8f40d095b5982b
M: 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d 172.32.1.140:7002
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
M: 3ae32dd1aa45786118052cfa65963b7467c68e9d 172.32.1.140:7011
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: c1ae0f9c1b2ccdbed0c054d83653e1aeae34a408 172.32.1.91:7010
slots: (0 slots) slave
replicates 75427ec80ad8634c4f1bd95c5fb35c1349bdd23d
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
当前集群主从关系是:
172.32.1.140:7011(master)---172.32.1.59:7000(slave)
172.32.1.59:7012(master)---172.32.1.91:7001(slave)
172.32.1.140:7002(master)---172.32.1.91:7010(slave)