diff --git a/roles/elasticsearch/handlers/main.yml b/roles/elasticsearch/handlers/main.yml index 78169736..4dc7208c 100644 --- a/roles/elasticsearch/handlers/main.yml +++ b/roles/elasticsearch/handlers/main.yml @@ -1,11 +1,10 @@ --- # handlers file for elasticsearch - name: Restart Elasticsearch - ansible.builtin.service: - name: elasticsearch - state: restarted - daemon_reload: yes + ansible.builtin.include_tasks: restart_elasticsearch.yml + with_items: "{{ groups[elasticstack_elasticsearch_group_name] }}" when: + - "hostvars[item].inventory_hostname == inventory_hostname" - elasticsearch_enable | bool - not elasticsearch_freshstart.changed | bool - not elasticsearch_freshstart_security.changed | bool diff --git a/roles/elasticsearch/handlers/restart_elasticsearch.yml b/roles/elasticsearch/handlers/restart_elasticsearch.yml new file mode 100644 index 00000000..c900a0c3 --- /dev/null +++ b/roles/elasticsearch/handlers/restart_elasticsearch.yml @@ -0,0 +1,12 @@ +--- + +- name: Check for running Elasticsearch service + ansible.builtin.systemd: + name: elasticsearch + register: elasticsearch_running + +- name: Include rolling stop + ansible.builtin.include_tasks: "{{ role_path }}/tasks/elasticsearch-rolling-stop.yml" + +- name: Include rolling start + ansible.builtin.include_tasks: "{{ role_path }}/tasks/elasticsearch-rolling-start.yml" diff --git a/roles/elasticsearch/tasks/elasticsearch-rolling-start.yml b/roles/elasticsearch/tasks/elasticsearch-rolling-start.yml new file mode 100644 index 00000000..8223cb74 --- /dev/null +++ b/roles/elasticsearch/tasks/elasticsearch-rolling-start.yml @@ -0,0 +1,83 @@ +# Ansible +# +# Rolling Upgrade of Elasticsearch with security on +# Source from: author: Jeff Steinmetz, @jeffsteinmetz; Bin Li, @holysoros +# Modifications: author: Daniel Neuberger @netways.de +# More modifications: NETWAYS Professional Services GmbH +# latest tested with Ansible 2.9 and later + +--- + +# For now we support upgrade only for clusters with security enabled +# If you positively need support for safely upgrading clusters without security, +# feel free to open an issue at https://github.com/NETWAYS/ansible-collection-elasticstack/issues + +- name: Start elasticsearch + ansible.builtin.service: + name: elasticsearch + enabled: yes + state: started + when: + - elasticsearch_running.status.ActiveState == "active" + - not elasticsearch_unsafe_upgrade_restart | bool + +- name: Restart elasticsearch (fast, for non-prod) + ansible.builtin.service: + name: elasticsearch + enabled: yes + state: restarted + when: + - elasticsearch_running.status.ActiveState == "active" + - elasticsearch_unsafe_upgrade_restart | bool + +- name: Wait for elasticsearch node to come back up if it was stopped + ansible.builtin.wait_for: + host: "{{ elasticsearch_api_host }}" + port: "{{ elasticstack_elasticsearch_http_port }}" + delay: 30 + +- name: Confirm the node joins the cluster # noqa: risky-shell-pipe + ansible.builtin.shell: > + if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi; + curl + -k + -u elastic:{{ elasticstack_password.stdout }} + -s + -m 2 + '{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes?h=name' + | grep + -E + '^{{ elasticsearch_nodename }}$' + register: result + until: result.rc == 0 + retries: 200 + delay: 3 + changed_when: false + +- name: Enable shard allocation for the cluster + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" + method: PUT + body: '{ "persistent": { "cluster.routing.allocation.enable": null }}' + body_format: json + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + register: response + # next line is boolean not string, so no quotes around true + # use python truthiness + until: "response.json.acknowledged == true" + retries: 5 + delay: 30 + +- name: Wait for cluster health to return to yellow or green + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" + method: GET + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + register: response + until: "response.json.status == 'yellow' or response.json.status == 'green'" + retries: 5 + delay: 30 diff --git a/roles/elasticsearch/tasks/elasticsearch-rolling-stop.yml b/roles/elasticsearch/tasks/elasticsearch-rolling-stop.yml new file mode 100644 index 00000000..fe7a0948 --- /dev/null +++ b/roles/elasticsearch/tasks/elasticsearch-rolling-stop.yml @@ -0,0 +1,76 @@ +# Ansible +# +# Rolling Upgrade of Elasticsearch with security on +# Source from: author: Jeff Steinmetz, @jeffsteinmetz; Bin Li, @holysoros +# Modifications: author: Daniel Neuberger @netways.de +# More modifications: NETWAYS Professional Services GmbH +# latest tested with Ansible 2.9 and later + +--- + +# For now we support upgrade only for clusters with security enabled +# If you positively need support for safely upgrading clusters without security, +# feel free to open an issue at https://github.com/NETWAYS/ansible-collection-elasticstack/issues +- name: Set connection protocol to https + ansible.builtin.set_fact: + elasticsearch_http_protocol: "https" + +# Usually we should not need this step. It's only there to recover from broken upgrade plays +# Without this step the cluster would never recover and the play would always fail +- name: Enable shard allocation for the cluster + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" + method: PUT + body: '{ "persistent": { "cluster.routing.allocation.enable": null }}' + body_format: json + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + register: response + # next line is boolean not string, so no quotes around true + # use python truthiness + until: "response.json.acknowledged == true" + retries: 5 + delay: 30 + + # this step is key!!! Don't restart more nodes + # until all shards have completed recovery +- name: Wait for cluster health to return to green + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" + method: GET + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + register: response + until: "response.json.status == 'green'" + retries: 50 + delay: 30 + +# Disabling shard allocation right after enabling it seems redundant. Please see above for details. +- name: Disable shard allocation for the cluster + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" + method: PUT + body: '{ "persistent": { "cluster.routing.allocation.enable": "none" }}' + body_format: json + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + +- name: Stop non essential indexing to speed up shard recovery + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_flush" + method: POST + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: no + failed_when: false + +- name: Shutdown elasticsearch service + ansible.builtin.service: + name: elasticsearch + enabled: yes + state: stopped + when: + - not elasticsearch_unsafe_upgrade_restart | bool diff --git a/roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml b/roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml index 19801a76..ab319c07 100644 --- a/roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml +++ b/roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml @@ -71,65 +71,8 @@ - groups[elasticstack_elasticsearch_group_name] | length > 1 block: - # Usually we should not need this step. It's only there to recover from broken upgrade plays - # Without this step the cluster would never recover and the play would always fail - - name: Enable shard allocation for the cluster - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" - method: PUT - body: '{ "persistent": { "cluster.routing.allocation.enable": null }}' - body_format: json - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - register: response - # next line is boolean not string, so no quotes around true - # use python truthiness - until: "response.json.acknowledged == true" - retries: 5 - delay: 30 - - # this step is key!!! Don't restart more nodes - # until all shards have completed recovery - - name: Wait for cluster health to return to green - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" - method: GET - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - register: response - until: "response.json.status == 'green'" - retries: 50 - delay: 30 - - # Disabling shard allocation right after enabling it seems redundant. Please see above for details. - - name: Disable shard allocation for the cluster - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" - method: PUT - body: '{ "persistent": { "cluster.routing.allocation.enable": "none" }}' - body_format: json - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - - - name: Stop non essential indexing to speed up shard recovery - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_flush" - method: POST - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - failed_when: false - - - name: Shutdown elasticsearch service - ansible.builtin.service: - name: elasticsearch - enabled: yes - state: stopped - when: - - not elasticsearch_unsafe_upgrade_restart | bool + - name: Include rolling stop + ansible.builtin.include_tasks: elasticsearch-rolling-stop.yml - name: Update Elasticsearch - rpm with managed repositories ansible.builtin.package: @@ -147,72 +90,5 @@ - ansible_os_family == "Debian" or not elasticstack_full_stack | bool - - name: Start elasticsearch - ansible.builtin.service: - name: elasticsearch - enabled: yes - state: started - when: - - elasticsearch_running.status.ActiveState == "active" - - not elasticsearch_unsafe_upgrade_restart | bool - - - name: Restart elasticsearch (fast, for non-prod) - ansible.builtin.service: - name: elasticsearch - enabled: yes - state: restarted - when: - - elasticsearch_running.status.ActiveState == "active" - - elasticsearch_unsafe_upgrade_restart | bool - - - name: Wait for elasticsearch node to come back up if it was stopped - ansible.builtin.wait_for: - host: "{{ elasticsearch_api_host }}" - port: "{{ elasticstack_elasticsearch_http_port }}" - delay: 30 - - - name: Confirm the node joins the cluster # noqa: risky-shell-pipe - ansible.builtin.shell: > - if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi; - curl - -k - -u elastic:{{ elasticstack_password.stdout }} - -s - -m 2 - '{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes?h=name' - | grep - -E - '^{{ elasticsearch_nodename }}$' - register: result - until: result.rc == 0 - retries: 200 - delay: 3 - changed_when: false - - - name: Enable shard allocation for the cluster - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" - method: PUT - body: '{ "persistent": { "cluster.routing.allocation.enable": null }}' - body_format: json - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - register: response - # next line is boolean not string, so no quotes around true - # use python truthiness - until: "response.json.acknowledged == true" - retries: 5 - delay: 30 - - - name: Wait for cluster health to return to yellow or green - ansible.builtin.uri: - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" - method: GET - user: elastic - password: "{{ elasticstack_password.stdout }}" - validate_certs: no - register: response - until: "response.json.status == 'yellow' or response.json.status == 'green'" - retries: 5 - delay: 30 + - name: Include rolling start + ansible.builtin.include_tasks: elasticsearch-rolling-start.yml