From 038b7704db75ce64401e63dcffd233b258b37ea2 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Wed, 11 Dec 2024 16:18:10 +0900 Subject: [PATCH] rpm: improve process timing for safety (#764) Points * FROM-package just leaves tmp files if it supports the features. * TO-package trigger the features if there are those tmp files. * Thus, can ensure that both FROM and TO support the feature. * Make installing plugin and restarting the same condition. * Disable `%systemd_postun_with_restart` completely to align specifications with DEB. Before 1. to-pre(2): Collect plugin-list. 2. Install TO-package 3. to-post(2): Install plugin. 4. from-preun(1): Do nothing. 5. Uninstall FROM-package 6. from-postun(1): Restart if need. After 1. to-pre(2): Collect plugin-list. 2. Install TO-package 3. to-post(2): Do nothing. 4. from-preun(1): * Check auto or not. * Leave plugin-install flag and pid if need. 5. Uninstall FROM-package 6. from-postun(1): Disable `%systemd_postun_with_restart`. 7. to-posttrans: Install plugin and restart if need. Signed-off-by: Daijiro Fukuda --- .github/workflows/yum.yml | 18 ++- .../apt/systemd-test/downgrade-to-v5-lts.sh | 1 + fluent-package/yum/fluent-package.spec.in | 106 ++++++++++-------- .../yum/systemd-test/downgrade-to-v5-lts.sh | 25 ++++- .../update-to-next-major-version.sh | 7 +- 5 files changed, 101 insertions(+), 56 deletions(-) diff --git a/.github/workflows/yum.yml b/.github/workflows/yum.yml index f3a4843e..c7daeb76 100644 --- a/.github/workflows/yum.yml +++ b/.github/workflows/yum.yml @@ -153,9 +153,12 @@ jobs: - "update-to-next-version-service-status.sh disabled active" - "update-to-next-version-service-status.sh disabled inactive" - "update-to-next-version-with-auto-and-manual.sh" - - "update-to-next-major-version.sh auto" - - "update-to-next-major-version.sh manual" - - "update-to-next-major-version.sh etc" + - "update-to-next-major-version.sh auto active" + - "update-to-next-major-version.sh auto inactive" + - "update-to-next-major-version.sh manual active" + - "update-to-next-major-version.sh manual inactive" + - "update-to-next-major-version.sh etc active" + - "update-to-next-major-version.sh etc inactive" - "update-without-data-lost.sh v5 v6" - "update-without-data-lost.sh v6 v5" include: @@ -229,9 +232,12 @@ jobs: - "update-to-next-version-service-status.sh disabled active" - "update-to-next-version-service-status.sh disabled inactive" - "update-to-next-version-with-auto-and-manual.sh" - - "update-to-next-major-version.sh auto" - - "update-to-next-major-version.sh manual" - - "update-to-next-major-version.sh etc" + - "update-to-next-major-version.sh auto active" + - "update-to-next-major-version.sh auto inactive" + - "update-to-next-major-version.sh manual active" + - "update-to-next-major-version.sh manual inactive" + - "update-to-next-major-version.sh etc active" + - "update-to-next-major-version.sh etc inactive" - "update-without-data-lost.sh v5 v6" - "update-without-data-lost.sh v6 v5" include: diff --git a/fluent-package/apt/systemd-test/downgrade-to-v5-lts.sh b/fluent-package/apt/systemd-test/downgrade-to-v5-lts.sh index 50ac15e5..2858daa2 100755 --- a/fluent-package/apt/systemd-test/downgrade-to-v5-lts.sh +++ b/fluent-package/apt/systemd-test/downgrade-to-v5-lts.sh @@ -25,4 +25,5 @@ systemctl status --no-pager fluentd systemctl status --no-pager td-agent # Fluentd should be restarted. +# NOTE: Unlike RPM, the restart behavior depends on TO-side. So, it restarts. test $main_pid -ne $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID) diff --git a/fluent-package/yum/fluent-package.spec.in b/fluent-package/yum/fluent-package.spec.in index c3b50eb8..ba01eba2 100644 --- a/fluent-package/yum/fluent-package.spec.in +++ b/fluent-package/yum/fluent-package.spec.in @@ -33,6 +33,8 @@ %define v4migration_old_rotate_config_saved /tmp/@PACKAGE_DIR@/.old_rotate_config %define v4migration_enabled_service /tmp/@PACKAGE_DIR@/.v4migration_enabled_service %define local_base_plugins /tmp/@PACKAGE_DIR@/.local_base_plugins +%define install_plugins /tmp/@PACKAGE_DIR@/.install_plugins +%define pid_for_auto_restart /tmp/@PACKAGE_DIR@/.pid_for_auto_restart # Omit the brp-python-bytecompile automagic because post hook for ffi fails on AmazonLinux 2. %if %{_amazon_ver} == 2 @@ -160,6 +162,14 @@ mkdir -p %{buildroot}%{_sysconfdir}/@PACKAGE_DIR@/plugin mkdir -p %{buildroot}/tmp/@PACKAGE_DIR@ %pre +# Make sure the previous tmp files for auto restart does not remain. +# Note: +# %preun (FROM-side) can create these files, but they will be removed in %posttrans (TO-side). +# This means that these files may not be removed depending on the version of TO-side. +# In the future, want to figure out a more secure way to manage tmp files... +rm -f %{install_plugins} +rm -f %{pid_for_auto_restart} + if ! getent group @COMPAT_SERVICE_NAME@ >/dev/null; then if ! getent group @SERVICE_NAME@ >/dev/null; then /usr/sbin/groupadd --system @SERVICE_NAME@ @@ -188,16 +198,35 @@ else fi fi if [ $1 -eq 2 ]; then - . %{_sysconfdir}/sysconfig/@SERVICE_NAME@ - echo "pre FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART" - if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = auto ]; then - # collect installed gems during upgrading + # Collect plugin-list. + # Note: + # This should be done in %preun(1) of FROM-side, but we have no choice but to do this here. + # %preun(1) of FROM-side is executed after TO-side installs the files and replaces embedded Ruby. + # So, this needs to be done before it. + if [ -e /usr/sbin/fluent-gem ]; then /usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose > %{local_base_plugins} fi fi %preun %systemd_preun @SERVICE_NAME@.service +if [ $1 -eq 1 ]; then + # systemctl ... --property=MainPID --value is available since systemd 230 or later. + # thus for amazonlinux:2, it can not be used. + pid="$(systemctl show "@SERVICE_NAME@" --property=MainPID | cut -d'=' -f2)" + if [ "$pid" -eq 0 ]; then + echo "Do not use auto restart because the service is not active" + else + . %{_sysconfdir}/sysconfig/@SERVICE_NAME@ + echo "FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART" + if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = auto ]; then + # Present that FROM-side wants auto installing plugins and restarting. + # Note: Wants to collect plugin-list here, but we need to do it in %pre (see comments in %pre). + touch %{install_plugins} + echo "$pid" > %{pid_for_auto_restart} + fi + fi +fi %post %systemd_post @SERVICE_NAME@.service @@ -291,51 +320,9 @@ if [ -f "%{_sysconfdir}/prelink.conf" ]; then %{__sed} -i"" %{_sysconfdir}/prelink.conf -e "/\/opt\/td-agent\/bin\/ruby/d" fi fi -if [ $1 -eq 2 ]; then - # install missing plugins during upgrading package - if [ -f %{local_base_plugins} ]; then - local_current_plugins=$(/usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose) - if ! grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins}; then - echo "No missing plugins to install" - else - if ! curl --fail --silent -O https://rubygems.org/specs.4.8.gz; then - echo "No network connectivity..." - else - grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins} | while read missing_gem - do - if ! /usr/sbin/fluent-gem install --no-document $missing_gem; then - echo "Can't install missing plugin automatically: please install $missing_gem manually." - fi - done - fi - fi - rm -f %{local_base_plugins} - fi -fi %postun -if [ $1 -eq 1 ]; then - # Control service during upgrading - . %{_sysconfdir}/sysconfig/@SERVICE_NAME@ - echo "postun FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART" - if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = "auto" ]; then - # systemctl ... --property=MainPID --value is available since systemd 230 or later. - # thus for amazonlinux:2, it can not be used. - pid=$(systemctl show fluentd --property=MainPID | cut -d'=' -f2) - if [ $pid -gt 0 ]; then - echo "Kick auto service upgrade mode to MainPID:$pid" - kill -USR2 $pid - else - # no running fluentd service - echo "Suppress auto service upgrade mode to MainPID:$pid" - fi - elif [ "$FLUENT_PACKAGE_SERVICE_RESTART" = "manual" ]; then - echo "No need to restart service in manual mode..." - else - # no support for upgrading without downtime - %systemd_postun_with_restart @SERVICE_NAME@.service - fi -fi +# Disable systemd_postun_with_restart to manage restart on the package side. if [ $1 -eq 0 ]; then # Uninstall # Without this uninstall conditional guard block ($1 -eq 0), symlink @@ -400,6 +387,31 @@ if [ -f %{v4migration} ]; then rm -f %{v4migration_with_restart} fi fi +if [ -f %{install_plugins} ] && [ -f %{local_base_plugins} ]; then + local_current_plugins=$(/usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose) + if ! grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins}; then + echo "No missing plugins to install" + else + if ! curl --fail --silent -O https://rubygems.org/specs.4.8.gz; then + echo "No network connectivity..." + else + grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins} | while read missing_gem + do + if ! /usr/sbin/fluent-gem install --no-document $missing_gem; then + echo "Can't install missing plugin automatically: please install $missing_gem manually." + fi + done + fi + fi +fi +rm -f %{install_plugins} +rm -f %{local_base_plugins} +if [ -f %{pid_for_auto_restart} ]; then + pid=$(cat %{pid_for_auto_restart}) + echo "Kick auto restart to MainPID:$pid" + kill -USR2 $pid + rm -f %{pid_for_auto_restart} +fi %files %doc README.md diff --git a/fluent-package/yum/systemd-test/downgrade-to-v5-lts.sh b/fluent-package/yum/systemd-test/downgrade-to-v5-lts.sh index a0dd8af0..8c1fa7a5 100755 --- a/fluent-package/yum/systemd-test/downgrade-to-v5-lts.sh +++ b/fluent-package/yum/systemd-test/downgrade-to-v5-lts.sh @@ -27,6 +27,10 @@ sudo $DNF remove -y fluent-package sudo $DNF install -y \ /host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-[0-9]*.rpm +# Customize the env file to prevent replacing by downgrade. +# Need this to test the case where some tmp files is left. +echo "FOO=foo" >> /etc/sysconfig/fluentd + sudo systemctl enable --now fluentd systemctl status --no-pager fluentd systemctl status --no-pager td-agent @@ -42,5 +46,22 @@ systemctl is-enabled fluentd systemctl status --no-pager fluentd systemctl status --no-pager td-agent -# Fluentd should be restarted. -test $main_pid -ne $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID) +# Fluentd should NOT be restarted. +# NOTE: Unlike DEB, the restart behavior depends on FROM-side. So, it does not restart. +# (it should restarts only when triggering zerodowntime-restart). +test $main_pid -eq $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID) + +# === Test: Remained tmp files should not affect to next upgrade === +# (This happens when env file was customized but the FLUENT_PACKAGE_SERVICE_RESTART was still `auto`) + +# Some tmp files remains, though it is not happy. +test -e /tmp/fluent/.install_plugins +test -e /tmp/fluent/.pid_for_auto_restart + +sudo $DNF install -y \ + /host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-[0-9]*.rpm | tee upgrade.log + +# zerodowntime-restart should NOT be triggered. +(! grep "Kick auto restart" upgrade.log) + +# ====== diff --git a/fluent-package/yum/systemd-test/update-to-next-major-version.sh b/fluent-package/yum/systemd-test/update-to-next-major-version.sh index 4aa1131f..26390f41 100755 --- a/fluent-package/yum/systemd-test/update-to-next-major-version.sh +++ b/fluent-package/yum/systemd-test/update-to-next-major-version.sh @@ -5,11 +5,16 @@ set -exu . $(dirname $0)/commonvar.sh service_restart=$1 +status_before_update=$2 # active / inactive # Install the current package="/host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-*.rpm" sudo $DNF install -y $package +if [ "$status_before_update" = active ]; then + sudo systemctl start fluentd +fi + # Set FLUENT_PACKAGE_SERVICE_RESTART sed -i "s/=auto/=$service_restart/" /etc/sysconfig/fluentd @@ -22,7 +27,7 @@ package="/host/v6-test/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/f sudo $DNF install -y $package # Test: Check whether plugin/gem were installed during upgrading -if [ "$service_restart" = auto ]; then +if [ "$service_restart" = auto ] && [ "$status_before_update" = active ]; then # plugin gem should be installed automatically /opt/fluent/bin/fluent-gem list | grep fluent-plugin-concat # Non fluent-plugin- prefix gem should not be installed automatically