[root@cs-node02 ~]# /etc/init.d/cman stop

Stopping cluster:
Leaving fence domain… found dlm lockspace /sys/kernel/dlm/rgmanager
fence_tool: cannot leave due to active systems
[FAILED]

[root@cs-node02 ~]# /etc/init.d/rgmanager stop

[root@cs-node02 ~]# /etc/init.d/gfs2 stop

[root@cs-node02 ~]# /etc/init.d/clvm stop

[root@cs-node02 ~]# /etc/init.d/cman stop

[root@cs-node02 ~]# dlm_tool leave rgmanager

[root@cs-node02 ~]# /etc/init.d/cman start

[root@cs-node02 ~]# /etc/init.d/clvm start

[root@cs-node02 ~]# /etc/init.d/gfs2 start

[root@cs-node02 ~]# /etc/init.d/rgmanager start

[root@cs-node02 ~]# clusvcadm -e svc01
or
[root@cs-node02 ~]# clusvcadm -e svc01 -m node01-hb

/etc/init.d/rgmanager stop ; /etc/init.d/cman stop
/etc/init.d/cman start ; /etc/init.d/rgmanager start

rm -f /var/lib/ricci/queue/597545572

####################################################################################
-kdump 발생으로 인하여 절체되는 경우 : 정남기 과정이 전달해줌 (cluster.conf 에서 kdump_fence는 메소드를 분리)
파란색 표시와 같이 응답대시시간을 60초로 하였을 경우
60초 이전에 received 메세지를 받으면 fence se_ossdb2-HB success 라는 메세지가 나오며,
실제 fence는 이루어 지지 않지만 서비스는 정상이관이 되고 kdump를 수행하는 대상서버에서는
kdump core 파일이 완료 되면 정상적으로 reboot 하여서 cluster 멤버의 일원이 된다.

Jul 4 15:07:41 se_ossdb1 fenced[2151]: fencing node se_ossdb2-HB
Jul 4 15:07:41 se_ossdb1 fence_kdump[3792]: waiting for message from '192.168.10.72' ---> 핫빗 아이피
Jul 4 15:07:48 se_ossdb1 fence_kdump[3792]: received valid message from '192.168.10.72'
Jul 4 15:07:48 se_ossdb1 fenced[2151]: fence se_ossdb2-HB success
Jul 4 15:07:49 se_ossdb1 rgmanager[2526]: Taking over service service:DB from down member se_ossdb2-HB

[root@se_ossdb1 cluster]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.0.71 RHCS-NODE01
192.168.0.72 RHCS-NODE02
192.168.10.11 fence1
192.168.10.12 fence2
192.168.10.71 RHCS-NODE01-HB
192.168.10.72 RHCS-NODE02-HB
#192.168.20.71 RHCS-NODE01-HB02
#192.168.20.72 RHCS-NODE02-HB02
192.168.0.155 eusamsdb01-HB
192.168.0.156 eusamsdb02-HB

####################################################################################

 

## Method 통합 시 아래와 같이. 하지만 node01로 부터 kdump 관련 receive를 받지 못 함.

Jan 15 15:12:02 cs-node02 corosync[8181]: [TOTEM ] A processor failed, forming new configuration.
Jan 15 15:12:04 cs-node02 corosync[8181]: [QUORUM] Members[1]: 2
Jan 15 15:12:04 cs-node02 corosync[8181]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jan 15 15:12:04 cs-node02 corosync[8181]: [CPG ] chosen downlist: sender r(0) ip(192.168.26.102) ; members(old:2 left:1)
Jan 15 15:12:04 cs-node02 corosync[8181]: [MAIN ] Completed service synchronization, ready to provide service.
Jan 15 15:12:04 cs-node02 rgmanager[8637]: State change: node01-hb DOWN
Jan 15 15:12:04 cs-node02 kernel: dlm: closing connection to node 1
Jan 15 15:12:04 cs-node02 fenced[8245]: fencing node node01-hb
Jan 15 15:12:52 cs-node02 fence_kdump[14843]: waiting for message from '192.168.26.101'  -> 한참 뒤 waiting을 시작한다.
Jan 15 15:13:51 cs-node02 corosync[8181]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jan 15 15:13:51 cs-node02 corosync[8181]: [QUORUM] Members[2]: 1 2
Jan 15 15:13:51 cs-node02 corosync[8181]: [QUORUM] Members[2]: 1 2
Jan 15 15:13:51 cs-node02 corosync[8181]: [CPG ] chosen downlist: sender r(0) ip(192.168.26.101) ; members(old:1 left:0)
Jan 15 15:13:51 cs-node02 corosync[8181]: [MAIN ] Completed service synchronization, ready to provide service.
Jan 15 15:15:52 cs-node02 fence_kdump[14843]: timeout after 180 seconds
Jan 15 15:15:52 cs-node02 fenced[8245]: fence node01-hb dev 0.1 agent fence_kdump result: error from agent
Jan 15 15:15:52 cs-node02 fenced[8245]: fence node01-hb failed
Jan 15 15:15:56 cs-node02 rgmanager[8637]: Waiting for node 1 to reboot

## Method 분리 시 아래와 같이. waiting for message 시도하지 않고 node01로 부터 kdump 관련 receive를 받지 못 함.
Jan 15 15:24:31 cs-node02 corosync[25304]: [TOTEM ] A processor failed, forming new configuration.
Jan 15 15:24:33 cs-node02 corosync[25304]: [QUORUM] Members[1]: 2
Jan 15 15:24:33 cs-node02 corosync[25304]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jan 15 15:24:33 cs-node02 corosync[25304]: [CPG ] chosen downlist: sender r(0) ip(192.168.26.102) ; members(old:2 left:1)
Jan 15 15:24:33 cs-node02 corosync[25304]: [MAIN ] Completed service synchronization, ready to provide service.
Jan 15 15:24:33 cs-node02 rgmanager[25529]: State change: node01-hb DOWN
Jan 15 15:24:33 cs-node02 kernel: dlm: closing connection to node 1
Jan 15 15:24:34 cs-node02 fenced[25358]: fencing node node01-hb
Jan 15 15:25:19 cs-node02 fenced[25358]: fence node01-hb success
Jan 15 15:25:20 cs-node02 rgmanager[25529]: Taking over service service:svc01 from down member node01-hb
Jan 15 15:25:22 cs-node02 rgmanager[26710]: [fs] mounting /dev/dm-2 on /share
Jan 15 15:25:23 cs-node02 kernel: EXT4-fs (dm-2): mounted filesystem with ordered data mode. Opts:
Jan 15 15:25:24 cs-node02 rgmanager[26816]: [ip] Adding IPv4 address 192.168.22.103/24 to eth0
Jan 15 15:25:27 cs-node02 ntpd[14762]: Listen normally on 11 eth0 192.168.22.103 UDP 123
Jan 15 15:25:27 cs-node02 ntpd[14762]: peers refreshed
Jan 15 15:25:27 cs-node02 rgmanager[26901]: [script] Executing /etc/init.d/mysql start
Jan 15 15:25:32 cs-node02 rgmanager[25529]: Service service:svc01 started

 

 

한참 뒤 waiting을 시작하는 문제로 kdump를 늘리기 위해 아래와 같이 진행했다.

1) kdump 시간을 늘리기 위해 kdump.conf를 수정하여 레벨을 올린다.

$ vim /etc/kdump.conf

core_collector makedumpfile -c --message-level 1 -d 0 (기존 31에서 0으로 수정)

 

2) 메모리 사용량 늘리기

아래는 메모리 사용량을 강제로 올려 덤프 시간을 늘려주기 위한 코드이다.

$ vim memory.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

int main(int argc, char** argv) {
    int max = -1;
    int sleep_interval = 2;
    int mb = 0;
    char* buffer;

    if(argc > 1)
        max = atoi(argv[1]);
    if(argc > 2)
        sleep_interval = atoi(argv[2]);

    while((buffer=malloc(1024*1024)) != NULL && mb != max) {
        memset(buffer, 0, 1024*1024);
        mb++;
        printf("Allocated %d MB\n", mb);
    }
    printf("sleeping for %d\n",sleep_interval);
    sleep(sleep_interval);
    return 0;
}

 

3) 컴파일하고 실행하여 메모리 사용량을 올린다.

$ gcc -o memory memory.c

$ ./memory

 

4) 패닉 상황 만들어서 kdump가 동작되게 함.

$ echo c > /proc/sysrq-trigger

rhcs kdump 테스트

답글 남기기

이메일 주소는 공개되지 않습니다. 필수 필드는 *로 표시됩니다