diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml index 86b41239c6..2a82110152 100644 --- a/.github/workflows/_comps-workflow.yml +++ b/.github/workflows/_comps-workflow.yml @@ -17,11 +17,19 @@ on: default: true required: false type: boolean - test: + test_compose: default: true description: "Test comps with docker compose" required: false type: boolean + test_helmchart: + default: true + description: "Test comps with helm chart" + required: false + type: boolean + hardware: + required: true + type: string mode: default: "CD" description: "Whether the test range is CI, CD or CICD" @@ -33,6 +41,7 @@ jobs: # Image Build #################################################################################################### build-images: + if: ${{ !(fromJSON(inputs.test_helmchart)) }} runs-on: "docker-build-gaudi" continue-on-error: true outputs: @@ -98,9 +107,22 @@ jobs: #################################################################################################### test-service-compose: needs: [build-images] - if: ${{ fromJSON(inputs.test) }} + if: ${{ fromJSON(inputs.test_compose) }} uses: ./.github/workflows/_run-docker-compose.yml with: tag: ${{ inputs.tag }} service: ${{ inputs.service }} secrets: inherit + + #################################################################################################### + # Helm Chart Test + #################################################################################################### + test-service-helmchart: + if: ${{ fromJSON(inputs.test_helmchart) }} + uses: ./.github/workflows/_run-helm-chart.yml + with: + tag: ${{ inputs.tag }} + mode: ${{ inputs.mode }} + service: ${{ inputs.service }} + hardware: ${{ inputs.node }} + secrets: inherit diff --git a/.github/workflows/_run-helm-chart.yml b/.github/workflows/_run-helm-chart.yml new file mode 100644 index 0000000000..79990e675d --- /dev/null +++ b/.github/workflows/_run-helm-chart.yml @@ -0,0 +1,209 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Helm Chart Test on GenAIComps For Call +permissions: read-all +on: + workflow_call: + inputs: + service: + default: "chatqna" + required: true + type: string + description: "service to test, e.g. asr" + dockerhub: + default: "false" + required: false + type: string + description: "Set to true if you want to use released docker images at dockerhub. By default using internal docker registry." + tag: + default: "latest" + required: false + type: string + mode: + default: "CD" + description: "Whether the test range is CI, CD or CICD" + required: false + type: string + hardware: + default: "xeon" + required: true + type: string + +jobs: + get-test-case: + runs-on: ubuntu-latest + outputs: + value_files: ${{ steps.get-test-files.outputs.value_files }} + CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + steps: + - name: Get checkout ref + id: get-checkout-ref + run: | + if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then + CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge + else + CHECKOUT_REF=${{ github.ref }} + fi + echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT + echo "checkout ref ${CHECKOUT_REF}" + + - name: Checkout Repo + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Get test Services + id: get-test-files + run: | + set -x + if [ "${{ inputs.mode }}" = "CI" ]; then + base_commit=${{ github.event.pull_request.base.sha }} + merged_commit=$(git log -1 --format='%H') + values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \ + grep "kubernetes" | \ + sort -u ) + echo $values_files + elif [ "${{ inputs.mode }}" = "CD" ]; then + values_files=$(ls ${{ github.workspace }}/comps/"${{ inputs.service }}"/deployment/kubernetes/*values.yaml) + fi + value_files="[" + for file in ${values_files}; do + if [ -f "$file" ]; then + filename=$(basename "$file") + if [[ "$filename" == *"gaudi"* ]]; then + if [[ "${{ inputs.hardware }}" == "gaudi" ]]; then + value_files="${value_files}\"${filename}\"," + fi + elif [[ "$filename" == *"nv"* ]]; then + continue + else + if [[ "${{ inputs.hardware }}" == "xeon" ]]; then + value_files="${value_files}\"${filename}\"," + fi + fi + fi + done + value_files="${value_files%,}]" + + echo "value_files=${value_files}" + echo "value_files=${value_files}" >> $GITHUB_OUTPUT + + helm-test: + needs: [get-test-case] + strategy: + matrix: + value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }} + runs-on: ${{ inputs.hardware }} + continue-on-error: true + steps: + - name: Clean Up Working Directory + run: | + echo "value_file=${{ matrix.value_file }}" + sudo rm -rf ${{github.workspace}}/* + + - name: Get checkout ref + id: get-checkout-ref + run: | + if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then + CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge + else + CHECKOUT_REF=${{ github.ref }} + fi + echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT + echo "checkout ref ${CHECKOUT_REF}" + + - name: Checkout Repo + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Set variables + env: + service: ${{ inputs.service }} + run: | + CHART_NAME="${service,,}" # asr + echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV + echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV + echo "NAMESPACE=${CHART_NAME}-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV + echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV + echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV + echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV + echo "should_cleanup=false" >> $GITHUB_ENV + echo "skip_validate=false" >> $GITHUB_ENV + echo "CHART_FOLDER=comps/${service}/deployment/kubernetes" >> $GITHUB_ENV + + - name: Helm install + id: install + env: + GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + value_file: ${{ matrix.value_file }} + run: | + set -xe + echo "should_cleanup=true" >> $GITHUB_ENV + if [[ ! -f ${{ github.workspace }}/${{ env.CHART_FOLDER }}/${value_file} ]]; then + echo "No value file found, exiting test!" + echo "skip_validate=true" >> $GITHUB_ENV + echo "should_cleanup=false" >> $GITHUB_ENV + exit 0 + fi + + if ! helm install --create-namespace --namespace $NAMESPACE $RELEASE_NAME oci://ghcr.io/opea-project/charts/${CHART_NAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=/home/sdp/.cache/huggingface/hub -f comps/${{ inputs.service }}/deployment/kubernetes/${value_file} --version 0-latest --wait; then + echo "Failed to install chart ${{ inputs.service }}" + echo "skip_validate=true" >> $GITHUB_ENV + .github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE + exit 1 + fi + helm list -A + kubectl get pods -n $NAMESPACE + + - name: Validate e2e test + if: always() + run: | + set -xe + if $skip_validate; then + echo "Skip validate" + else + LOG_PATH=/home/$(whoami)/helm-logs + chart=${{ env.CHART_NAME }} + helm test -n $NAMESPACE $RELEASE_NAME --logs --timeout "$TEST_TIMEOUT_SECONDS" | tee ${LOG_PATH}/charts-${chart}.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "Chart ${chart} test failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + teststatus=false + if [[ -f $LOG_PATH/charts-${chart}.log ]] && \ + [[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then + teststatus=false + ${{ github.workspace }}/.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE + else + teststatus=true + fi + + if [ $teststatus == false ]; then + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeeded!" + exit 0 + fi + fi + + - name: Helm uninstall + if: always() + run: | + if $should_cleanup; then + helm uninstall $RELEASE_NAME --namespace $NAMESPACE + if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then + kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all + kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS + fi + fi diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml index e15ba1b3b6..8765a601f5 100644 --- a/.github/workflows/manual-comps-test.yml +++ b/.github/workflows/manual-comps-test.yml @@ -15,11 +15,21 @@ on: description: "Build test required images for Comps" required: false type: boolean - test: + test_compose: default: true description: "Test comps with docker compose" required: false type: boolean + test_helmchart: + default: true + description: "Test comps with helm chart" + required: false + type: boolean + nodes: + default: "gaudi,xeon" + description: "Hardware that only runs helm chart test" + required: false + type: string tag: default: "rc" description: "Tag to apply to images" @@ -38,6 +48,7 @@ jobs: runs-on: ubuntu-latest outputs: services: ${{ steps.get-matrix.outputs.services }} + nodes: ${{ steps.get-matrix.outputs.nodes }} steps: - name: Create Matrix id: get-matrix @@ -45,17 +56,27 @@ jobs: services=($(echo ${{ inputs.services }} | tr ',' ' ')) services_json=$(printf '%s\n' "${services[@]}" | sort -u | jq -R '.' | jq -sc '.') echo "services=$services_json" >> $GITHUB_OUTPUT + if [ ${{ inputs.test_helmchart }} == "true" ]; then + nodes=($(echo ${{ inputs.nodes }} | tr ',' ' ')) + nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.') + echo "nodes=$nodes_json" >> $GITHUB_OUTPUT + else + echo "nodes=[]" >> $GITHUB_OUTPUT + fi run-services: needs: [get-test-matrix] strategy: matrix: service: ${{ fromJson(needs.get-test-matrix.outputs.services) }} + nodes: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }} fail-fast: false uses: ./.github/workflows/_comps-workflow.yml with: service: ${{ matrix.service }} + hardware: ${{ matrix.nodes }} tag: ${{ inputs.tag }} mode: ${{ inputs.mode }} - test: ${{ inputs.test }} + test_compose: ${{ inputs.test_compose }} + test_helmchart: ${{ inputs.test_helmchart }} secrets: inherit diff --git a/.github/workflows/pr-helm-test.yaml b/.github/workflows/pr-helm-test.yaml new file mode 100644 index 0000000000..0d47cc49a3 --- /dev/null +++ b/.github/workflows/pr-helm-test.yaml @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Microservice Test With Helm Charts + +on: + pull_request_target: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - "!**.md" + - "**/deployment/kubernetes/**" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + job1: + name: Get-test-matrix + runs-on: ubuntu-latest + outputs: + run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }} + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.number }}/merge" + fetch-depth: 0 + + - name: Get test matrix + id: get-test-matrix + run: | + set -x + base_commit=${{ github.event.pull_request.base.sha }} + merged_commit=$(git log -1 --format='%H') + values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \ + grep "values.yaml" | \ + sort -u) # comps/agent/deployment/kubernetes/cpu-values.yaml + + run_matrix="{\"include\":[" + for values_file in ${values_files}; do + if [ -f "$values_file" ]; then + valuefile=$(basename "$values_file") # cpu-values.yaml + service=$(echo "$values_file" | cut -d'/' -f2) # agent + if [[ "$valuefile" == *"gaudi"* ]]; then + hardware="gaudi" + elif [[ "$valuefile" == *"nv"* ]]; then + continue + else + hardware="xeon" + fi + echo "service=${service}, hardware=${hardware}, valuefile=${valuefile}" + if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${service}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then + run_matrix="${run_matrix}{\"service\":\"${service}\",\"hardware\":\"${hardware}\"}," + echo "------------------ add one values file ------------------" + fi + fi + done + run_matrix="${run_matrix%,}" + run_matrix=$run_matrix"]}" + echo "run_matrix=${run_matrix}" + echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT + + Chart-test: + needs: [job1] + if: always() && ${{ needs.job1.outputs.run_matrix.service.length }} > 0 + uses: ./.github/workflows/_run-helm-chart.yml + strategy: + matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }} + with: + service: ${{ matrix.service }} + hardware: ${{ matrix.hardware }} + mode: "CI" + secrets: inherit diff --git a/.github/workflows/scripts/k8s-utils.sh b/.github/workflows/scripts/k8s-utils.sh new file mode 100755 index 0000000000..ba58e1a152 --- /dev/null +++ b/.github/workflows/scripts/k8s-utils.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e + +function dump_pod_log() { + pod_name=$1 + namespace=$2 + echo "-----------Pod: $pod_name---------" + echo "#kubectl describe pod $pod_name -n $namespace" + kubectl describe pod $pod_name -n $namespace + echo "-----------------------------------" + echo "#kubectl logs $pod_name -n $namespace" + kubectl logs $pod_name -n $namespace + echo "-----------------------------------" +} + +function dump_pods_status() { + namespace=$1 + echo "-----DUMP POD STATUS in NS $namespace------" + kubectl get pods -n $namespace -o wide + echo "-----------------------------------" + + # Get all pods in the namespace and their statuses + pods=$(kubectl get pods -n $namespace --no-headers) + + # Loop through each pod + echo "$pods" | while read -r line; do + pod_name=$(echo $line | awk '{print $1}') + ready=$(echo $line | awk '{print $2}') + status=$(echo $line | awk '{print $3}') + + # Extract the READY count + ready_count=$(echo $ready | cut -d'/' -f1) + required_count=$(echo $ready | cut -d'/' -f2) + + # Check if the pod is not in "Running" status or READY count is less than required + if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then + dump_pod_log $pod_name $namespace + fi + done +} + +function dump_all_pod_logs() { + namespace=$1 + echo "-----DUMP POD STATUS AND LOG in NS $namespace------" + + pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') + for pod_name in $pods + do + dump_pod_log $pod_name $namespace + done +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + dump_pods_status) + dump_pods_status $2 + ;; + dump_all_pod_logs) + dump_all_pod_logs $2 + ;; + *) + echo "Unknown function: $1" + ;; +esac