diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index a70bfa5f..ae238c17 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -21,7 +21,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Resource agents to control the HANA database in system replication setup -Version: 1.2.4 +Version: 1.2.5 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ diff --git a/man/SAPHanaSR-replay-archive.8 b/man/SAPHanaSR-replay-archive.8 index 48d03ad3..b6e3312a 100644 --- a/man/SAPHanaSR-replay-archive.8 +++ b/man/SAPHanaSR-replay-archive.8 @@ -1,13 +1,15 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-replay-archive 8 "13 Apr 2023" "" "SAPHanaSR" +.TH SAPHanaSR-replay-archive 8 "09 Feb 2024" "" "SAPHanaSR" .\" .SH SYNOPSIS \fBSAPHanaSR-replay-archive\fR [ --help | --version ] .br -\fBSAPHanaSR-replay-archive\fR [--sid=LIST_OF_SIDS] CRM_REPORT_ARCHIVE +\fBSAPHanaSR-replay-archive\fR --sid \fISID\fR --cib \fICIB\fR [\fICIB\fR [...]] +[--format \fIFORMAT\fR] [--select \fICOLUMN\fR] [--sort \fISORT\fR] +[--properties \fIPROPERTIES\fR] [--from \fIFROM\fR] [--to \fITO\fR] .br -\fBSAPHanaSR-replay-archive\fR [--sid=LIST_OF_SIDS] --pengine=PATH_TO_PENGINE_FILES + .\" .SH DESCRIPTION SAPHanaSR-replay-archive can be used to re-play SAPHanaSR status changes info @@ -18,18 +20,36 @@ files and shows status information in historical order. Basically SAPHanaSR-show is called for each pe-input file. .\" .SH OPTIONS -.HP -\fB --help\fR - show help. -.HP -\fB --version\fR - show version. -.HP -\fB --sid=\fRLIST_OF_SIDS - Option to specify a list of SIDs -.HP -\fB --format=\fRFORMAT - Output format (tables, book or script). +.TP +\fB--help\fR +show help +.TP +\fB--version\fR +show version +.TP +\fB--sid\fR \fISID\fR +specify the SID to check for +.TP +\fB--cib\fR [ \fICIB\fR [\fICIB\fR [...]] | ./pengine/* ] +specify the path to CIB file(s) +.TP +\fB--format\fR [ table | path | script | json ] +output format, default is table. +.TP +\fB--from\fR "\fIYYYY-MM-DD HH:MM:SS\fR" +from which point in time to show +.TP +\fB--to\fR "\fIYYYY-MM-DD HH:MM:SS\fR" +to which point in time to show +.TP +\fB--sort\fR \fICOLUMN\fR +specify the column name to sort by +.TP +\fB--select\fR [ default | test | minimal | sr | all ] +selecton of attributes to be printed, default is default +.TP +\fB--properties\fR \fIPROPERTIES\fR + specify the properties file .\" .SH RETURN CODES .B 0 @@ -39,25 +59,25 @@ Successful program execution. Usage, syntax or execution errors. .\" .SH EXAMPLES -.TP -* Replay SAPHanaSR-showAttr data from all pe-input files in hb_report-06-06-2019.tar.bz2 . -# SAPHanaSR-replay-archive hb_report-06-06-2019.tar.bz2 -.TP -* Show system replication status history from an HANA scale-out cluster. -# SAPHanaSR-replay-archive hb_report-06-06-2019.tar.bz2 | grep ^global -.\".TP -.\"* Show system replication status history from an HANA scale-up cluster. -.\"# SAPHanaSR-replay-archive hb_report-08-05-2019.tar.bz2 | grep -A13 ^global SAPHanaSR-replay-archive.txt | awk '$1=="global"{print "@",$0}; $11=="SOK"||$11=="SFAIL"||$11=="SWAIT"||$11=="WAIT4PRIM"{print $11}' | tr -d "\\n" | tr "@" "\\n" -.\".TP -.\"* Show system replication primary from an HANA scale-up cluster. -.\"# SAPHanaSR-showAttr hb_report-08-05-2019.tar.bz2 | grep -A13 ^global SAPHanaSR-replay-archive.txt | awk '$1=="global"{print "@",$0}; $2=="PROMOTED"{print $1,$2}' | tr -d "\\n" | tr "@" "\\n" -.TP -* Filter replay of SAPHanaSR actions from hb_report for a specific host. -# SAPHanaSR-replay-archive --format=script hb_suse.tar.bz2 |\\ - SAPHanaSR-filter --search='Hosts/lv9054/role' --filterDouble -.TP -* Replay SAPHanaSR-showAttr data from all pe-input files in local directory /var/lib/pacemaker/pengine/. -# SAPHanaSR-replay-archive --pengine=/var/lib/pacemaker/pengine/ +.PP +* Replay SAPHanaSR-showAttr data from crm_report. +.PP +Report has been extracted into directory crm_report-08-05-2024, SID is EVA, +DC node is node01. +.PP +.RS 2 +# SAPHanaSR-replay-archive --sid EVA --cib crm_report-08-05-2024/node1/pengine/* +.RE +.PP +* Replay SAPHanaSR-showAttr data from crm_report, use parseable output format. +.PP +Report has been extracted into directory crm_report-08-05-2024, SID is EVA, +DC node is node01, output format is "script". +.PP +.RS 2 +# SAPHanaSR-replay-archive --sid EVA --cib crm_report-08-05-2024/node1/pengine/* --format script +.RE +.PP .\" .SH FILES .TP @@ -70,26 +90,23 @@ the working horse. /var/lib/pacemaker/pengine/ usual local pengine log directory. .TP --tmp/ -per call created working directory. -.TP --tmp/hb_report_log//pengine/pe-input-.bz2 +//pengine/pe-input-.bz2 extracted pe-input files. .TP --tmp/hb_report_log//cib.xml +/cib.xml extracted CIB file. .\" .SH BUGS Feedback is welcome, please mail to feedback@suse.com. .SH SEE ALSO - \fBSAPHanaSR\fP(7), \fBSAPHanaSR-ScaleOut\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , -\fBSAPHanaSR-filter\fP(8) , \fBcrm_report\fP(8) , \fBcrm_simulate\fP(8) +\fBSAPHanaSR-angi\fP(7), \fBSAPHanaSR-showAttr\fP(8) , +\fBcrm_report\fP(8) , \fBcrm_simulate\fP(8) .\" .SH AUTHORS A.Briel, F.Herschel, L.Pinne. .\" .SH COPYRIGHT -(c) 2019-2023 SUSE LLC +(c) 2019-2024 SUSE LLC .br SAPHanaSR-replay-archive comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR-showAttr.8 b/man/SAPHanaSR-showAttr.8 index 497677c4..6a5411ca 100644 --- a/man/SAPHanaSR-showAttr.8 +++ b/man/SAPHanaSR-showAttr.8 @@ -1,14 +1,15 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-showAttr 8 "20 Nov 2023" "" "SAPHanaSR" +.TH SAPHanaSR-showAttr 8 "24 Jan 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-showAttr \- Shows Linux cluster attributes for SAP HANA system replication. .\" .SH SYNOPSIS -\fBSAPHanaSR-showAttr\fR [ --help | --version | --path2table ] +\fBSAPHanaSR-showAttr\fR [ --help | --version ] .br -\fBSAPHanaSR-showAttr\fR [ --sid=SID[:INO] ] [ --select=SELECTION ] [ --sort=FIELD ] [ --format=FORMAT ] [ --cib=OFFLINE_CIB_FILE ] +\fBSAPHanaSR-showAttr\fR [ --sid \fISID\fR ] [ --select \fISELECT\fR ] +[ --sort \fISORT\fR ] [ --format \fIFORMAT\fR ] [ --cib \fIOFFLINE_CIB_FILE\fR ] .\" .SH DESCRIPTION SAPHanaSR-showAttr shows Linux cluster attributes for SAP HANA system replication automation. @@ -55,7 +56,7 @@ maintenance status of Linux cluster resources (\fBmaintenance\fP) .br promotability of Linux cluster clone resources (\fBpromotable\fP) .br -target role of the Linux cluster promotable clone resources (\fBtarget-role\fP) +target role of the Linux cluster resources (\fBtarget-role\fP) .TP Sites section HANA site name (\fBSites\fP) @@ -68,7 +69,7 @@ HANA current master nameserver for that site (\fBmns\fP) .br HANA replication operation mode (\fBopMode\fP) .br -HANA system replication status from HA/DR provider hook (\fBsrHook\fP) +HANA system replication status from srConnectionChanged() (\fBsrHook\fP) .br HANA system replication mode (\fBsrMode\fP) .br @@ -96,9 +97,9 @@ HANA site where the host sits (\fBsite\fP) .br maintenance state of Linux cluster node (\fBstandby\fP) .br -HANA system replication takeover action, indicated by preTakeover() (\fBsra\fP) +HANA system replication takeover action from preTakeover() (\fBsra\fP) .br -HANA system replication takeover action history, (\fBsrah\fP) +HANA system replication takeover action history (\fBsrah\fP) .br Linux cluster node fence status (\fBterminate\fP) .br @@ -255,7 +256,9 @@ also ocf_suse_SAPHanaController(7). Value: [ started | disabled ] -Should the resource be started or stopped (disableld) by the Linux cluster. +Should the resource be started or stopped (disabled) by the Linux cluster. +The attribute is shown after it has been changed from the default. +The field might appear or disappear, depending on cluster maintenance tasks. .PP .B filter - SAPHanaController filter for logging @@ -527,24 +530,21 @@ show help. \fB --version\fR show version. .TP 4 -\fB --path2table\fR -convert script-style input pipe back into normal output. -.TP 4 -\fB --select=\fISELECTION\fR +\fB --select\fR \fISELECT\fR .\" TODO explain meaning of values show selected information only. Allowed values: [ all | default | minimal | sr | cluster | cluster2 | cluster3 ]. Default is default. .TP 4 -\fB --sid=\fISID\fR[:\fIINO\fR] +\fB --sid\fR \fISID\fR use SAP system ID \fISID\fR. Should be autodetected, if there is only one SAP HANA instance installed on the local cluster node. The SAP system ID is a 3 alphanum string with a valid SAP system name like SLE, HAE, FH1, C11, or P42. -Optional: Use SAP instance number \fIINO\fR. Should be autodetected, if there is only one SAP HANA instance installed on the local cluster node. The SAP instance number must be represented by a two digit numer like 00, 05 or 42. Some numbers ares not allowed, e.g. 98. +\." Optional: Use SAP instance number \fIINO\fR. Should be autodetected, if there is only one SAP HANA instance installed on the local cluster node. The SAP instance number must be represented by a two digit numer like 00, 05 or 42. Some numbers ares not allowed, e.g. 98. .TP 4 -\fB --sort=\fIFIELD\fR +\fB --sort\fR \fIFIELD\fR sort Hosts section table by field. Allowed values: [ roles | site ]. Default is sort by hostnames. .TP 4 -\fB --format=\fIFORMAT\fR +\fB --format\fR \fIFORMAT\fR output format. Allowed values: [ script | tables | json | tester ]. Default is tables. .TP 4 -\fB --cib=\fIOFFLINE_CIB_FILE\fR +\fB --cib\fR \fIOFFLINE_CIB_FILE\fR read data from given offline CIB file. .\" .SH RETURN CODES @@ -561,16 +561,16 @@ show all SAPHanaSR attributes and relevant cluster maintenance states. .br If the roles sub-fields are :::: landscapeHostConfiguration.py has not been able to detect the HANA roles during last recent RA monitor operation. Likely HANA was down or sudo adm failed. .TP 4 -# SAPHanaSR-showAttr --sort=roles +# SAPHanaSR-showAttr --sort roles show all SAPHanaSR attributes in the cluster and sort host table output by roles. .TP 4 -# SAPHanaSR-showAttr --sid=HA1:10 --cib=./hb_report-17-07-2019/grauenstein01/cib.xml +# SAPHanaSR-showAttr --sid HA1 --cib ./hb_report-17-07-2019/grauenstein01/cib.xml show all SAPHanaSR attributes for SAP System ID HA1 and instance number 10 from given CIB file. .TP 4 # SAPHanaSR-showAttr | grep -e master: -e worker: -e slave: show SAPHanaSR promotion scores on running nodes. .TP 4 -# SAPHanaSR-showAttr --format=script | egrep -v '/(version|op_mode|vhost|remoteHost|node_state|site)=' | SAPHanaSR-showAttr --path2table +# SAPHanaSR-showAttr --format script | egrep -v '/(version|op_mode|vhost|remoteHost|node_state|site)=' | SAPHanaSR-showAttr --path2table reduce output to selected fields. .TP 4 # watch -n9 "crm_mon -1r --include=none,nodes,resources,failures;echo; \\ @@ -584,8 +584,8 @@ display comprehensive overview on Linux cluster and HANA resources, update every /usr/bin/SAPHanaSR-showAttr the program itself. .TP -/usr/lib/SAPHanaSR-angi/SAPHanaSRTools.pm -needed functions. +/usr/lib/SAPHanaSR-angi/ +needed libraries. .TP /usr/sap/hostctrl/exe/saphostctrl the SAP host control command. @@ -626,7 +626,7 @@ A.Briel, F.Herschel, L.Pinne. .br (c) 2015-2017 SUSE Linux GmbH, Germany. .br -(c) 2018-2023 SUSE LLC +(c) 2018-2024 SUSE LLC .br SAPHanaSR-showAttr comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 new file mode 100644 index 00000000..6c32d5a9 --- /dev/null +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -0,0 +1,516 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR_upgrade_to_angi 7 "14 Feb 2024" "" "SAPHanaSR" +.\" +.SH NAME +SAPHanaSR_upgrade_to_angi \- How to upgrade from SAPHanaSR or SAPHanaSR-ScaleOut to SAPHanaSR-angi. +.PP +.\" +.SH DESCRIPTION +.PP +\fB*\fR What is the upgrade about? +.PP +SAPHanaSR-angi can be used to replace SAPHanaSR and SAPHanaSR-ScaleOut. +SAPHanaSR-angi is quite similar to SAPHanaSR and SAPHanaSR-ScaleOut, but not +fully backward compatible. Upgrading existing clusters is possible by following +a defined procedure. The upgrade should lead to the same configuration as an +installation from scratch. +The upgrade procedure depends on an initial setup as decribed in setup guides +and manual pages. See REQUIREMENTS below and in manual pages SAPHanaSR(7) or +SAPHanaSR-ScaleOut(7). +.PP +\fB*\fR What will be changed for SAP HANA scale-up scenarios? +.PP +.RS 2 +a. The SAPHana RA and its multi-state config will be replaced by the new +SAPHanaController and its clone promotable config. +.br +b. The SAPHanaSR.py HADR provider hook script will be replaced by the new +susHanaSR.py. +.br +c. Tools are placed in /usr/bin/. +.br +d. Node attributes will be removed. +.br +hana__vhost +hana__site +hana__remoteHost +lpa__lpt +hana__op_mode +hana__srmode +hana__sync_state +TODO +.br +e. Site and global attributes will be removed from property SAPHanaSR. +.br +TODO +.br +f. Site and global attributes will be added to property SAPHanaSR. +.br +hana__glob_topology +hana__glob_prim +hana__glob_sec +hana__site_lpt_ +hana__site_lss_ +hana__site_mns_ +hana__site_srr_ +hana__site_opMode_ +hana__site_srMode_ +hana__site_srHook_ +hana__site_srPoll_ +TODO +.RE +.PP +\fB*\fR What will be changed for SAP HANA scale-out scenarios? +.PP +.RS 2 +a. The SAPHanaController RA and its multi-state config will be replaced by the +new SAPHanaController and its clone promotable config. +.br +b. The SAPHanaSrMultiTarget.py HADR provider hook script will be replaced by +the new susHanaSR.py. +.br +c. Tools are placed in /usr/bin/. +.br +d. Node attributes will be removed. +.br +gra +gsh +.br +e. Site and global attributes will be removed from property SAPHanaSR. +.br +mts +upd +hana__glob_sync_state +hana__glob_srHook (in case of obsolete scale-out SAPHanaSR.py) +TODO +.br +f. Site and global attributes will be added to property SAPHanaSR. +.br +hana__glob_topology +hana__site_lpt_ +hana__site_lss_ +hana__site_mns_ +hana__site_srr_ +hana__site_srMode_ +hana__site_srPoll_ +TODO +.RE +.PP +\fB*\fR How does the procedure look like at a glance? +.PP +.RS 2 +1.1 Check for sane state of cluster, HANA and system replication +.br +1.2 Collect information, needed for upgrade +.br +1.3 Make backup of CIB, sudoers and global.ini +.br +2.1 Set SAPHana or SAPHanaController resource to maintenance +.br +2.2 Remove SAPHanaSR.py or SAPHanaSrMultiTarget.py from global.ini, HANA and sudoers +.br +2.3 Remove SAPHana or SAPHanaController resource config from CIB +.br +2.4 Remove SAPHanaSR property attributes from CIB +.br +2.5 Remove SAPHanaSR node attributes from CIB +.br +2.6 Remove SAPHanaSR or SAPHanaSR-ScaleOut RPM +.br +3.1 Install SAPHanaSR-angi RPM +.br +3.2 Add susHanaSR.py to sudoers, global.ini, HANA +.br +3.3 Add angi SAPHanaController resource config to CIB +.br +3.4 Refresh SAPHanaController resource and set it out of maintenance +.\" TODO set whole cluster maintenance and restart cluster, to cleanup CIB? +.br +3.5 Add SAPHanaFilesystem resource (optional) +.br +4.1 Check for sane state of cluster, HANA and system replication +.br +4.2 Test RA on secondary and trigger susHanaSR.py (optional) +.RE +.PP +\fB*\fR What needs to be prepared upfront? +.PP +First make yourself familiar with concepts, components and configuration of +SAPHanaSR-angi. Refresh your knowledge of SAPHanaSR or SAPHanaSR-ScaleOut. +.PP +Next the following information needs to be collected and documented before +upgrading a cluster: +.RS 2 +1.1 Path to config backup directory at both sites +.br +1.2 Name of both cluster nodes, respectively both HANA master nameservers, see +SAPHanaSR-showAttr(8) +.br +1.3 HANA SID and instance number, name of adm +.br +1.4 HANA virtual hostname, in case it is used +.br +1.5 Name and config of existing SAPHana, or SAPHanaController, resources and +related constraints in CIB, see ocf_suse_SAPHana(7) or +ocf_suse_SAPHanaController(7) +.br +1.6 Path to sudoers permission config file and its content, e.g. /etc/sudoers.d/SAPHanaSR +.br +1.7 Name of existing SAPHanaSR.py, or SAPHanaSrMultiTarget.py, section in +global.ini and its content, see SAPHanaSR.py(7), SAPHanaSrMultiTarget.py(7) and +SAPHanaSR-manageProvider(8) +.br +2.1 Name and config for new SAPHanaController resources and related constraints, path to config template, see ocf_suse_SAPHanaController(7) +.br +2.2 Path to config template for new sudoers permission and its content, see +susHanaSR.py(7) +.br +2.3 Path to config template for new susHanaSR.py section, e.g. /usr/share/SAPHanaSR-angi/global.ini_susHanaSR, see susHanaSR.py(7) +.br +2.4 Name and config for new SAPHanaFilesystem resources, path to config template +, see ocf_suse_SAPHanaFilesystem(7) (optional) +.RE +.PP +Finally prepare the config templates with correct values for the given cluster. +Ideally also the needed commands are prepared in detail. +.PP +.\" +.SH EXAMPLES +.PP +\fB*\fR Example for checking sane state of cluster, HANA and system replication. +.PP +This steps should be performed before doing anything with the cluster, and after +something has been done. Usually is done per Linux cluster. See also manual +pages SAPHanaSR_maintenance_examples(7), cs_show_saphanasr_status(8) and +section REQUIREMENTS below. For scale-out, SAPHanaSR-manageAttr(8) might be +helpful as well. +.PP +.RS 2 +# cs_clusterstate -i +.br +# crm_mon -1r +.br +# crm configure show | grep cli- +.br +# SAPHanaSR-showAttr +.br +# cs_clusterstate -i +.RE +.PP +\fB*\fR Example for showing SID and instance number of SAP HANA. +.PP +The installed SAP HANA instance is shown (should be only one) with its SID and +instance number. For systemd-enabled HANA the same info can be fetched from +systemd. Needs to be done at least once per Linux cluster. See also manual page +SAPHanaSR_basic_cluster(7). +.PP +.RS 2 +# /usr/sap/hostctrl/exe/saphostctrl -function ListInstances +.br +# systemd-cgls -u SAP.slice +.RE +.PP +\fB*\fR Example for collecting information on SAPHana resource config. +.PP +The names for SAPHana primitive and multi-state resource are determined, as +well as for related oder and (co-)location constraints. The SAPHana primitive +configuration is shown. Might be useful to see if there is anything special. +Needs to be done once per Linux cluster. +.PP +.RS 2 +# crm_mon -1r +.br +# crm configure show |\\ +.br + grep -e "[primitive|master|order|location].*SAPHana_" +.br +# crm configure show rsc_SAPHana_HA1_HDB00 +.RE +.PP +\fB*\fR Example for removing SAPHana resource config from CIB. +.PP +First the CIB is written to file for backup. +Next the cluster is told to not stop orphaned resources and the SAPHana +multi-state resource is set into maintenance. Next the order and colocation +constraints are removed, the SAPHana multi-state resource is removed and the +orphaned primitive is refreshed. Then the cluster is told to stop orphaned +resources. Finally the resulting cluster state is shown. +Of course also the CIB should be checked to see if the removal was successful. +Needs to be done once per Linux cluster. SID is HA1, Instance Number is 00. +The resource names have been determined as shown in the example above. +example above. +.PP +.RS 2 +# crm configure show > cib.SAPHanaSR-backup +.br +# echo "property cib-bootstrap-options: stop-orphan-resources=false"|\\ + crm configure load update - +.br +# crm resource maintenance msl_SAPHana_HA1_HDB00 on +.br +# cibadmin --delete --xpath \\ +.br + "//rsc_order[@id='ord_SAPHana_HA1_HDB00']" +.br +# cibadmin --delete --xpath \\ +.br + "//rsc_colocation[@id='col_saphana_ip_HA1_HDB00']" +.br +# cibadmin --delete --xpath \\ +.br + "//master[@id='msl_SAPHana_HA1_HDB00']" +.br +# crm resource refresh rsc_SAPHana_HA1_HDB00 +.br +# echo "property cib-bootstrap-options: stop-orphan-resources=true"|\\ + crm configure load update - +.br +# crm_mon -1r +.RE +.PP +\fB*\fR Example for removing all reboot-safe node attributes from CIB. +.PP +All reboot-safe node attributes will be removed. Needed attributes are expected +to be re-added by the RAs later. +Of course the CIB should be checked to see if the removal was successful. +Needs to be done for both nodes, or both master nameservers. +Node is node1. +See also crm_attribute(8). +.PP +.RS 2 +# crm configure show node1 +.br +# crm configure show node1 | tr " " "\\n" |\\ +.br + tail -n +6 | awk -F "=" '{print $1}' |\\ +.br +while read; do \\ +.br + crm_attribute --node node1 --name $REPLY --delete; done +.RE +.PP +\fB*\fR Example for removing non-reboot-safe node attribute from CIB. +.PP +The attribute hana__sync_state will be removed. +Of course the CIB should be checked to see if the removal was successful. +Needs to be done for both nodes, scale-up only. +Node is node1, SID is HA1. +See also crm_attribute(8). +.PP +.RS 2 +# crm_attribute --node node1 --name hana_ha1_sync_state \\ +.br + --lifetime reboot --query +.br +# crm_attribute --node node1 --name hana_ha1_sync_state \\ +.br + --lifetime reboot --delete +.RE +.PP +\fB*\fR Example for removing all SAPHanaSR property attributes from CIB. +.PP +All attributes of porperty SAPHanaSR will be removed. Needed attributes are +expected to be re-added by the RAs later. The attribute for srHook will be +added by the susHanaSR.py HADR provider script and might be missing until the +HANA system replication status changes. +Of course the CIB should be checked to see if the removal was successful. +Needs to be done once per Linux cluster. +See also SAPHanaSR-showAttr(8) and SAPHanaSR.py(7) or SAPHanaSrMultiTarget.py(7) +respectively. +.PP +.RS 2 +# crm configure show SAPHanaSR +.br +# crm configure show SAPHanaSR |\\ +.br + awk -F"=" '$1~/hana_/ {print $1}' |\\ +.br +while read; do \\ +.br + crm_attribute --delete --type crm_config --name $REPLY; done +.RE +.PP +\fB*\fR Example for removing the SAPHanaSR.py hook script from global.ini and HANA. +.PP +The global.ini is copied for backup. Next the exact name (upper/lower case) of +the section is determined from global.ini. Then the currenct HADR provider +section is shown. If the section is identical with the shipped template, it can +be removed easily from the configuration. Finally the HADR provider hook script +is removed from running HANA. Needs to be done for each HANA site. +SID is HA1, case sensitive HADR provider name is SAPHanaSR. See manual page +SAPHanaSR.py(7) or SAPHanaSrMultiTarget.py(7) for details on checking the hook +script integration. +.PP +.RS 2 +# su - ha1adm +.br +~> cdcoc +.br +~> cp global.ini global.ini.SAPHanaSR-backup +.br +~> grep -i ha_dr_provider_saphanasr global.ini +.br +~> /usr/bin/SAPHanaSR-manageProvider --sid=HA1 --show \\ +.br + --provider=SAPHanaSR +.br +~> /usr/bin/SAPHanaSR-manageProvider --sid=HA1 --reconfigure \\ +.br + --remove /usr/share/SAPHanaSR/samples/global.ini +.br +~> hdbnsutil -reloadHADRProviders +.RE +.PP +\fB*\fR Example for removing the SAPHanaSR.py hook script from sudoers. +.PP +Needs to be done on each node. +See manual page SAPHanaSR.py(7) for details on checking the hook script +integration. +.PP +.RS 2 +# cp $SUDOER "$SUDOER".angi-bak +.br +# grep -v "$sidadm.*ALL..NOPASSWD.*crm_attribute.*$sid" \\ +.br + "$SUDOER".angi-bak >$SUDOER +.RE +.PP +\fB*\fR Example for removing the SAPHanaSR package. +.PP +The package SAPHanaSR is removed from all cluster nodes. Related packages +defined by patterns and dependencies are not touched. Needs to be done once per +Linux cluster. +.PP +.RS 2 +# crm cluster run "rpm -E --force SAPHanaSR" +.RE +.PP +\fB*\fR Example for installing the SAPHanaSR-angi package. +.PP +The package SAPHanaSR is installed on all cluster nodes. All nodes are checked +for the package. Needs to be done once per Linux cluster. +.PP +.RS 2 +# crm cluster run \\ +.br + "zypper --non-interactive in -l -f -y SAPHanaSR-angi" +.br +# crm cluster run \\ +.br + "hostname; rpm -q SAPHanaSR-angi --queryformat %{NAME}" +.RE +.PP +\fB*\fR Example for adding susHanaSR.py to sudoers. +.PP +Needs to be done on each node. +See manual page susHanaSR.py(7) and SAPHanaSR-hookHelper(8). +.PP +\fB*\fR Example for adding susHanaSR.py to global.ini and HANA. +.PP +Needs to be done for each HANA site. +See manual page susHanaSR.py(7) and SAPHanaSR-manageProvider(8). +.PP +\fB*\fR Example for adding angi SAPHanaController resource config to CIB. +.PP +Needs to be done once per Linux cluster. +See manual page ocf_suse_SAPHanaController(7), SAPHanaSR_basic_cluster(7) and +SUSE setup guides. +.PP +\fB*\fR Example for setting SAPHanaController resource out of maintenance. +.PP +First the SAPHanaController multi-state resource is refreshed, then it is set +out of maintenance. Name of the resource is mst_SAPHanaController_HA1_HDB00. +Of course status of cluster, HANA and system replication needs to be checked +before and after this action, see example above. Needs to be done once per +Linux cluster. See also manual page SAPHanaSR_maintenance_examples(7). +.br +Note: The srHook status for HANA secondary site migh be empty. +.PP +.RS 2 +# crm resource refresh mst_SAPHanaController_HA1_HDB00 +.br +# crm resource maintenance mst_SAPHanaController_HA1_HDB00 off +.RE +.PP +\fB*\fR Example for testing RA on secondary site and trigger susHanaSR.py. +.PP +This step is optional. +The secondary node is determined from SAPHanaSR-showAttr. On that node, the +hdbnameserver is killed. The cluster will recover the secondary HANA and set +the CIB attribute srHook. +Of course status of cluster, HANA and system replication needs to be checked. +.PP +.RS 2 +# SECNOD=$(SAPHanaSR-showAttr --format script |\\ +.br + awk -F"/" '$1=="0 Host"&&$3=="score=\\"100\\"" {print $2}') +.br +# echo $SECNOD +.br +# ssh root@$SECNOD "hostname; killall -9 hdbnameserver" +.RE +.PP +.\" +.SH FILES +.TP +/etc/sudoers.d/SAPHanaSR +recommended place for sudo permissions of HADR provider hook scripts +.TP +/usr/bin/ +path to tools +.TP +/hana/shared/$SID/global/hdb/custom/config/global.ini +on-disk representation of HANA global system configuration +.TP +/usr/share/SAPHanaSR/samples/global.ini +template for classical SAPHanaSR.py entry in global.ini +.TP +/usr/share/SAPHanaSR-angi/samples/global.ini_susHanaSR +template for susHanaSR.py entry in global.ini +.PP +.\" +.SH REQUIREMENTS +.PP +* OS, Linux cluster and HANA are matching requirements for SAPHanaSR, or SAPHanaSR-ScaleOut respectively, and SAPHanaSR-angi. +.br +* Linux cluster, HANA and system replication are in sane state before the upgrade. All cluster nodes are online. +.br +* The whole procedure is tested carefully and documented in detail before being applied on production. +.br +* Linux cluster, HANA and system replication are checked and in sane state before set back into production. +.PP +.\" +.SH BUGS +.br +In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +.br +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , +\fBocf_suse_SAPHana\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , +\fBSAPHanaSR.py\fP(7) , \fBSAPHanaSrMultiTarget.py\fP(7) , +\fBsusHanaSR.py\fP(7) , +\fBSAPHanaSR_maintenance_examples\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , +\fBcrm\fP(8) , \fBcrm_mon\fP(8) , \fBcrm_attribute\fP(8) , \fBcibadmin\fP(8) , +.br +https://documentation.suse.com/sbp/sap/ , +.br +https://www.suse.com/c/tag/towardszerodowntime/ +.PP +.\" +.SH AUTHORS +.br +A.Briel, F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +.br +(c) 2024 SUSE LLC +.br +This maintenance examples are coming with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" diff --git a/man/susHanaSR.py.7 b/man/susHanaSR.py.7 index af1c1475..1b877128 100644 --- a/man/susHanaSR.py.7 +++ b/man/susHanaSR.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susHanaSR.py 7 "04 Jan 2024" "" "SAPHanaSR" +.TH susHanaSR.py 7 "23 Jan 2024" "" "SAPHanaSR" .\" .SH NAME susHanaSR.py \- Provider for SAP HANA srHook method srConnectionChanged(). @@ -267,8 +267,10 @@ the internal cache for srHook status changes while Linux cluster is down, file i .PP .\" .SH REQUIREMENTS -1. SAP HANA 2.0 SPS04 or later provides the HA/DR provider hook method -srConnectionChanged() with multi-target aware parameters. +1. SAP HANA 2.0 SPS05 rev.059 or later provides Python 3 as well as the HA/DR +provider hook method srConnectionChanged() with multi-target aware parameters. +The Python 3 and multi-target aware parameters are needed for the SAPHanaSR-angi +package. .PP 2. No other HADR provider hook script should be configured for the srConnectionChanged() method. Hook scripts for other methods, provided in diff --git a/ra/SAPHanaController b/ra/SAPHanaController index 1a538216..3d0519d3 100755 --- a/ra/SAPHanaController +++ b/ra/SAPHanaController @@ -105,6 +105,7 @@ case "$ACTION" in ;; esac "${raType}"init +super_ocf_log info "RUNTIME finished ${raType}init" if ! ocf_is_root then @@ -125,6 +126,7 @@ then exit "$OCF_ERR_ARGS" fi +super_ocf_log info "RUNTIME ready to process action '$ACTION'" ra_rc="$OCF_ERR_UNIMPLEMENTED" case "$ACTION" in start|stop|monitor|promote|demote) # Standard controlling actions @@ -149,6 +151,8 @@ case "$ACTION" in esac timeE="$(date '+%s')" (( timeR = timeE - timeB )) -super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaControllerVersion) (${timeR}s)====" +set_g_times +super_ocf_log info "RUNTIME ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaControllerVersion) (${timeR}s)====" +super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaControllerVersion) (${timeR}s; times=$g_time_proc $g_time_chld)====" exit "${ra_rc}" # set ts=4 sw=4 sts=4 et diff --git a/ra/SAPHanaTopology b/ra/SAPHanaTopology index a9657615..2b5b265f 100755 --- a/ra/SAPHanaTopology +++ b/ra/SAPHanaTopology @@ -135,6 +135,8 @@ case "$ACTION" in esac timeE="$(date '+%s')" (( timeR = timeE - timeB )) -super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaTopologyVersion) (${timeR}s)====" +set_g_times +super_ocf_log info "RUNTIME ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaTopologyVersion) (${timeR}s)====" +super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaTopologyVersion) (${timeR}s; times=$g_time_proc $g_time_chld)====" exit "${ra_rc}" # set ts=4 sw=4 sts=4 et diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index d44b7cf5..1078b9ce 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -33,6 +33,13 @@ saphana_common_lib_version="1.001.1" # global variables: InstanceNr(w) SAPVIRHOST(w) DIR_EXECUTABLE(w) SAPSTARTSRV(w) SAPCONTROL(w) DIR_PROFILE(w) SAPSTARTPROFILE(w) # global variables: SAPHanaFilter(w) HANA_STATE_*(w) HANA_STD_ACTION_TIMEOUT(w) log_attributes(w) +function set_g_times() { + f_times="/run/${raType}.$$.times" + times > "$f_times" + { read -r g_time_proc; read -r g_time_chld; } < "$f_times" + rm "$f_times" +} + function super_ocf_log() { # called by: TODO # function: super_ocf_log - wrapper function for ocf log in order catch usual logging into super log @@ -43,6 +50,8 @@ function super_ocf_log() { local -u shf="${SAPHanaFilter:-ALLBUTFLOW}" # message levels: (dbg)|info|warn|err|error # message types: (ACT|RA|FLOW|DBG|LPA|DEC|DBG2... + mtype=${message%% *} + mtype=${mtype%:} case "$level" in debug | dbg | warn | err | error ) skip=0 ;; @@ -55,18 +64,21 @@ function super_ocf_log() { skip=1 ;; ALLBUTFLOW ) - mtype=${message%% *} - mtype=${mtype%:} [[ "FLOW" == *${mtype}* ]] && skip=1 || skip=0 ;; - * ) mtype=${message%% *} - mtype=${mtype%:} + * ) [[ ${shf} == *${mtype}* ]] && skip=0 || skip=1 ;; esac ;; esac if [[ "$skip" == 0 ]]; then + if [[ "RUNTIME" == *${mtype}* ]]; then + set_g_times + message="##-1-## $message times=$g_time_proc $g_time_chld" + else + message="##-2-## $message" + fi ocf_log "$level" "$message" fi } # end function super_ocf_log @@ -88,6 +100,7 @@ function cluster_commands_init() { fi # TODO PRIO3: NG - check, if NODENAME set here is not a duplicate NODENAME="$("$CRM_NODE" -n)" + super_ocf_log info "RUNTIME ${FUNCNAME[0]} rc=$rc" } # end function cluster_commands_init function core_init() { @@ -432,6 +445,7 @@ function HANA_CALL() { esac shift done + #super_ocf_log info "RUNTIME HANA_CALL START '$cmd'" if [ "$use_su" == "1" ]; then pre_cmd_type="su" @@ -493,6 +507,7 @@ function HANA_CALL() { #fi ;; esac + super_ocf_log info "RUNTIME HANA_CALL '$cmd' stack:${FUNCNAME[*]}rc=$rc" super_ocf_log debug "DBG: HANA_CALL '$cmd' rc=$rc output=$output" echo "$output" return "$rc"; @@ -702,9 +717,15 @@ function check_for_primary() { super_ocf_log info "FLOW ${FUNCNAME[0]} ()" local state="$HANA_STATE_DEFECT" local rc=0 + local mode="" + if [[ "$#" == "1" ]]; then + mode="$1" + fi case "$raType" in saphana* ) # SAPHanaController - get_local_sr_config >/dev/null # sets global variables gSrMode and gSite + if [[ -z "$gSrMode" || "$mode" == "live" ]]; then + get_local_sr_config >/dev/null # sets global variables gSrMode and gSite + fi ;; sht* ) # SAPHanaTopology - did that already in sht_init() - maybe we can align that later ;; @@ -839,7 +860,7 @@ function check_saphostagent() { # # params: virt-name [--timeout timeout] [--set_gTopology] -# globals: - +# globals: g_cache_lss(w) # output: either single value (default); or multi line key=value (--multiValue) # rc: lss or timeout (124) function get_role_by_landscape(){ @@ -857,6 +878,7 @@ function get_role_by_landscape(){ # TODO PRIO2: query lss to be moved into a function # TODO PRIO1: NG - parse for SAPCONTROL-OK: and SAPCONTROL-OK: to validate the completeness of the output hanaAnswer=$(HANA_CALL --timeout "$timeout" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); lssRc="$?" + g_cache_lss="$lssRc" super_ocf_log info "DEC: lssRc=$lssRc" nodeRole=$(echo "${hanaAnswer//[[:blank:]]/}" | \ awk -F= ' @@ -898,7 +920,7 @@ function get_role_by_cluster_or_landscape() { #super_ocf_log info "DEC: WILLI-BIENE nodeRole=$nodeRole" # TODO PRIO2: Should ":::" be defined as global Variable for SAPHanaController and SAPHanaTopology if [[ -z "$nodeRole" || "$nodeRole" = ":::" ]]; then - nodeRole="$(get_role_by_landscape "${virtName}")" + nodeRole="$(get_role_by_landscape "${virtName}")"; g_cache_lss="$?" #super_ocf_log info "DEC: HUGO-WILLI-BIENE nodeRole=$nodeRole" fi echo "$nodeRole" diff --git a/ra/saphana-controller-common-lib b/ra/saphana-controller-common-lib index 286037cc..8ea831a4 100755 --- a/ra/saphana-controller-common-lib +++ b/ra/saphana-controller-common-lib @@ -542,28 +542,39 @@ function cleanup_instance() { # # function: get_hana_landscape_status - figure out hana landscape status -# params: - -# globals: sidadm(r), DIR_EXECUTABLE(r), hana_LSS_Out(w) +# params: optional: cache_mode: empty or "" or "cache" or "live" +# globals: sidadm(r), DIR_EXECUTABLE(r), hana_LSS_Out(w), g_chache_lss(rw) # function get_hana_landscape_status() { # called by: TODO super_ocf_log info "FLOW ${FUNCNAME[0]} ()" local rc=0 - hana_LSS_Out=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); rc=$? - if [[ "$rc" == 124 ]]; then - # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? - # landscape timeout - super_ocf_log warn "RA: landscapeHostConfiguration.py TIMEOUT after $HANA_CALL_TIMEOUT seconds" - sleep 20 - # shellcheck disable=SC2034 + local cache_mode="" + if [[ "$#" == "1" ]]; then + cache_mode="$1" + fi + if [[ "$cache_mode" == "cache" && "$g_cache_lss" != "" ]]; then + super_ocf_log info "RUNTIME use cached value for lss return code" + return "$g_cache_lss" + else + super_ocf_log info "RUNTIME do NOT use cached value for lss return code (cache_mode=$cache_mode, g_cache_lss=$g_cache_lss)" hana_LSS_Out=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); rc=$? - if [ "$rc" == 124 ]; then - super_ocf_log warn "RA: landscapeHostConfiguration.py second TIMEOUT after $HANA_CALL_TIMEOUT seconds" - # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" - Maybe we should return the stored attribute value? - rc=0 + if [[ "$rc" == 124 ]]; then + # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? + # landscape timeout + super_ocf_log warn "RA: landscapeHostConfiguration.py TIMEOUT after $HANA_CALL_TIMEOUT seconds" + sleep 20 + # shellcheck disable=SC2034 + hana_LSS_Out=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); rc=$? + if [ "$rc" == 124 ]; then + super_ocf_log warn "RA: landscapeHostConfiguration.py second TIMEOUT after $HANA_CALL_TIMEOUT seconds" + # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" - Maybe we should return the stored attribute value? + rc=0 + fi fi + g_cache_lss="$rc" + return "$rc"; fi - return "$rc"; } # end function get_hana_landscape_status # diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 42b52fd1..6f4308be 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -397,6 +397,7 @@ function saphana_init() { # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w), NODENAME(w), vNAME(w), hdbver(w), # called by: RA + SAPHanaFilter="all" super_ocf_log info "FLOW ${FUNCNAME[0]} ($*)" local rc="$OCF_SUCCESS" clN SYSTEMCTL="/usr/bin/systemctl" @@ -418,6 +419,7 @@ function saphana_init() { # init attribute definitions # saphana_init_attribute_definitions + SAPHanaFilter=$(get_hana_attribute "X" "${ATTR_NAME_HANA_FILTER[@]}") super_ocf_log debug "DBG: SID=$SID, sid=$sid, SIDInstanceName=$SIDInstanceName, InstanceName=$InstanceName, InstanceNr=$InstanceNr, SAPVIRHOST=$SAPVIRHOST" # # init scoring tables @@ -431,6 +433,7 @@ function saphana_init() { preferRecover="local" fi saphana_init_scoring_tables + super_ocf_log info "RUNTIME: set scoring table for scenario ${topology}_${preferRecover}" super_ocf_log info "RA: set scoring table for scenario ${topology}_${preferRecover}" case "${topology}_${preferRecover}" in ScaleUp_remote ) SCORING_TABLE=( "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER_SU[@]}");; @@ -469,12 +472,12 @@ function saphana_init() { #topo=* ) gTopology="${splitResultLine#*=}";; esac done + g_cache_lss="$gLss" gFullRole="${gLss}:${gSrr}:${gRole}" super_ocf_log info "DEC: init(): gFullRole=$gFullRole" gSrPoll=$(get_hana_site_attribute "${gSite}" "${ATTR_NAME_HANA_SITE_SYNC_STATUS[@]}") gSrHook=$(get_SRHOOK "$gSite") super_ocf_log info "DEC: sr_name=$gSite, gRemSite=$gRemSite, sr_mode=$gSrMode" - SAPHanaFilter=$(get_hana_attribute "X" "${ATTR_NAME_HANA_FILTER[@]}") # # get HANA version # @@ -535,7 +538,7 @@ function analyze_hana_sync_statusSRS() { fi elif [ "$srRc" -le 11 ]; then # 11 and 10 # if systemReplicationStatus is ERROR and landscapeHostConfiguration is down then do NOT set SFAIL - get_hana_landscape_status; lss=$? + get_hana_landscape_status ""; lss=$? if [ "$lss" -lt 2 ]; then # keep everything like it was rc=2 @@ -892,7 +895,7 @@ function saphana_start_primary_handle_register_advice() { done if wait_for_primary_master 1; then register_hana_secondary - primary_status="$(check_for_primary)"; + primary_status="$(check_for_primary "live")"; if [[ "$primary_status" == "$HANA_STATE_SECONDARY" ]]; then super_ocf_log info "ACT: Register successful" lpa_push_lpt 10 @@ -938,7 +941,7 @@ function saphana_start_primary_handle_start_advice() { esac super_ocf_log info "DEC: saphana_start_primary_handle_start_advice: scoring_crm_promote($gFullRole,$my_sync)" # after starting SAP HANA we need to get the lss status life and not via SAPHanaTopology and get updaptes roles from SAPHanaTopology - get_hana_landscape_status; lLss="$?" + get_hana_landscape_status ""; lLss="$?" gRole=$(get_hana_attribute "${NODENAME}" "${ATTR_NAME_HANA_ROLES[@]}") gLss="$lLss" gFullRole="${lLss}:${gSrr}:${gRole}" @@ -1174,9 +1177,9 @@ function saphana_start_clone() { super_ocf_log info "FLOW ${FUNCNAME[0]} ()" # TODO PRIO2: scale-up is normally always the_master_nameserver and we do not need to count workers if is_the_master_nameserver; then - primary_status="$(check_for_primary)"; + primary_status="$(check_for_primary "live")"; # gNrSiteNode vs. landscape-workers? - get_hana_landscape_status; lss=$? + get_hana_landscape_status ""; lss=$? lss_worker=$(echo "$hana_LSS_Out" | awk '/indexServerConfigRole=worker/ { w++; } END { print w; }') # # Only start HANA, if there are enough nodes in the cluster to fulfill the landscape: @@ -1208,7 +1211,7 @@ function saphana_start_clone() { super_ocf_log info "ACT: Local SAP HANA instance already up and running" else # IF LS>=2 AND ROLE STANDBY "RESTART" INSTANCE - get_hana_landscape_status; lss=$? + get_hana_landscape_status ""; lss=$? if [ "$lss" -gt 2 ]; then # # only restart standby instances @@ -1237,7 +1240,7 @@ function saphana_stop_clone() { local primary_status="x" set_hana_attribute "${NODENAME}" "UNDEFINED" "${ATTR_NAME_HANA_CLONE_STATE[@]}" super_ocf_log debug "DBG: SET UNDEFINED" - primary_status="$(check_for_primary)"; + primary_status="$(check_for_primary "live")"; if [ "$primary_status" == "$HANA_STATE_SECONDARY" ]; then super_ocf_log debug "DBG: 008 * lpa_set_lpt 10 $gSite" lpa_set_lpt 10 "$gSite" @@ -1290,7 +1293,7 @@ function saphana_check_waiting_primary() { # called by: saphana_monitor_primary # TODO PRIO1: NG - check which params / globals are used/set ... local lss lparc LPTloc - get_hana_landscape_status; lss="$?" + get_hana_landscape_status ""; lss="$?" if [ "$lss" -ge 2 ]; then # seems admin already decided that for us? -> we are running - set DEMOTED # TODO PRIO2: set "DEMOTED" attribute as described @@ -1537,7 +1540,7 @@ function saphana_monitor_primary() { promoted=0; ;; esac - get_hana_landscape_status; lss=$? + get_hana_landscape_status "cache"; lss=$? gLss="$lss" gFullRole="${gLss}:${gSrr}:${gRole}" super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss gLss=$gLss" @@ -1695,7 +1698,7 @@ function saphana_monitor_secondary() { esac fi super_ocf_log debug "DBG: saphana_monitor_clone: HANA_STATE_SECONDARY" - get_hana_landscape_status; lss=$? + get_hana_landscape_status "cache"; lss=$? super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" case "$lss" in 0 ) # FATAL @@ -1726,7 +1729,7 @@ function saphana_monitor_clone_not_msn() { # local instance is not THE master nameserver and is currently stopped # # IF LS>=2 AND ROLE STANDBY "RESTART" INSTANCE - get_hana_landscape_status; lss=$? + get_hana_landscape_status ""; lss=$? if [ "$lss" -gt 2 ]; then super_ocf_log info "DEC: local instance is down landscape is up (lss=$lss)" rc="$OCF_NOT_RUNNING" @@ -1765,7 +1768,7 @@ function saphana_monitor_clone_not_msn() { # missing ALL master nameserver candidates, but local instance still running -> we need to trigger the cluster to take us down # super_ocf_log info "DEC: left-over instance ???" - get_hana_landscape_status; lss=$? + get_hana_landscape_status "cache"; lss=$? if [ "$lss" -le 1 ]; then # # landscape already reports down / error but local instance is up and running @@ -1857,7 +1860,7 @@ function saphana_monitor_clone() { # # First check, if we are PRIMARY or SECONDARY # - primary_status="$(check_for_primary)"; + primary_status="$(check_for_primary "cache")"; if [ "$primary_status" == "$HANA_STATE_PRIMARY" ]; then # FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent # TODO PRIO1: NG - Maybe we need a lpa-check here to @@ -1931,7 +1934,7 @@ function saphana_promote_clone() { super_ocf_log info "FLOW ${FUNCNAME[0]} ($*)" # Differ SAP HANA primary and secondary set_hana_attribute "${NODENAME}" "PROMOTED" "${ATTR_NAME_HANA_CLONE_STATE[@]}" - primary_status="$(check_for_primary)"; + primary_status="$(check_for_primary "live")"; if [ "$primary_status" == "$HANA_STATE_PRIMARY" ]; then # SAP HANA is already configured as PRIMARY, only mark the clone as PROMOTED super_ocf_log info "ACT: Promoted $SID-$InstanceName as master (no hdbnsutil action needed)." @@ -1987,7 +1990,7 @@ function saphana_promote_clone() { fi # check: SAP HANA is now PRIMARY? # TODO: PRIO3: check, if we need to differ between HANA_STATE_PRIMARY, HANA_STATE_SECONDARY, HANA_STATE_DEFECT - primary_status="$(check_for_primary)" + primary_status="$(check_for_primary "live")" if [[ "$primary_status" == "P" ]]; then rc="$OCF_SUCCESS"; # DONE: PRIO0: !!! diff --git a/ra/saphana-topology-lib b/ra/saphana-topology-lib index 4cf54f7f..59d6f4e7 100755 --- a/ra/saphana-topology-lib +++ b/ra/saphana-topology-lib @@ -433,7 +433,7 @@ function sht_stop_clone() { # called by: TODO super_ocf_log info "FLOW ${FUNCNAME[0]} ($*)" local rc=0 tout=0 nRole nNsConf - hanaPrim="$(check_for_primary)"; + hanaPrim="$(check_for_primary "live")"; # TODO: should we simplify the calculation of timeout (see saphana-silesystem-lib)? # shellcheck disable=SC2154 local actionTimeOut="$OCF_RESKEY_CRM_meta_timeout" stdTimeOut=300 actTimeOutPercent=50 @@ -501,7 +501,7 @@ function sht_monitor_clone() { # # First check, if we are PRIMARY or SECONDARY # - hanaPrim="$(check_for_primary)"; + hanaPrim="$(check_for_primary "cache")"; sht_monitor; rc="$?" # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 @@ -517,7 +517,8 @@ function sht_monitor_clone() { retn=* ) hanalrc="${splitResultLine#*=}";; esac done - super_ocf_log info "DEC: gNodeRole=$gNodeRole gTopology=$gTopology hanalrc=$hanalrc" + g_cache_lss="$hanalrc" + super_ocf_log info "DEC: gNodeRole=$gNodeRole gTopology=$gTopology hanalrc=$g_cache_lss" if [[ "$hanalrc" != "124" ]]; then # normal exit, use gNodeRole super_ocf_log info "DEC: gNodeRole=$gNodeRole" diff --git a/test/json/angi-ScaleOut/kill_secn_indexserver.json b/test/json/angi-ScaleOut/kill_secn_indexserver.json index 7f2350ab..cff8cd91 100644 --- a/test/json/angi-ScaleOut/kill_secn_indexserver.json +++ b/test/json/angi-ScaleOut/kill_secn_indexserver.json @@ -84,7 +84,7 @@ "step": "final40", "name": "end recover", "next": "END", - "loop": 120, + "loop": 240, "wait": 2, "post": "cleanup", "remark": "pXXX and sCCC to be the same as at test begin", diff --git a/test/json/angi-ScaleUp/kill_prim_indexserver.json b/test/json/angi-ScaleUp/kill_prim_indexserver.json index 9e192ae5..382f5180 100644 --- a/test/json/angi-ScaleUp/kill_prim_indexserver.json +++ b/test/json/angi-ScaleUp/kill_prim_indexserver.json @@ -49,7 +49,7 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 120, + "loop": 150, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ diff --git a/test/json/angi-ScaleUp/kill_secn_indexserver.json b/test/json/angi-ScaleUp/kill_secn_indexserver.json index f6fbfa3f..94f21fa9 100644 --- a/test/json/angi-ScaleUp/kill_secn_indexserver.json +++ b/test/json/angi-ScaleUp/kill_secn_indexserver.json @@ -50,7 +50,7 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 120, + "loop": 150, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ diff --git a/test/json/angi-ScaleUp/kill_secn_node.json b/test/json/angi-ScaleUp/kill_secn_node.json index 18221292..3941f1bd 100644 --- a/test/json/angi-ScaleUp/kill_secn_node.json +++ b/test/json/angi-ScaleUp/kill_secn_node.json @@ -49,7 +49,7 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 120, + "loop": 150, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [ diff --git a/test/json/angi-ScaleUp/split_brain_prio.json b/test/json/angi-ScaleUp/split_brain_prio.json index 3e9a675d..9bbc1cbe 100644 --- a/test/json/angi-ScaleUp/split_brain_prio.json +++ b/test/json/angi-ScaleUp/split_brain_prio.json @@ -44,7 +44,7 @@ "step": "step30", "name": "begin recover", "next": "final40", - "loop": 120, + "loop": 150, "wait": 2, "todo": "pHost+sHost to check site-name", "pSite": [