diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 000000000000..d3ebbbd96b48 --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,49 @@ +name: XGBoost-docs + +on: [push, pull_request] + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + +jobs: + build-jvm-docs: + name: Build docs for JVM packages + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + - tag=doc-build-jvm-docs + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4 + with: + submodules: "true" + - name: Log into Docker registry (AWS ECR) + run: bash ops/pipeline/login-docker-registry.sh + - run: bash ops/pipeline/build-jvm-gpu.sh + - run: bash ops/pipeline/build-jvm-doc.sh + - name: Upload JVM doc + run: | + # xgboost-docs/{branch}/{commit}/{branch}.tar.bz2 + # branch can be the name of the dmlc/xgboost branch, or `PR-{number}`. + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-docs \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + jvm-packages/${{ env.BRANCH_NAME }}.tar.bz2 + + trigger-rtd-build: + needs: [build-jvm-docs] + name: Trigger Read The Docs build. + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + - tag=doc-trigger-rtd-build + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4 + with: + submodules: "true" + - name: Trigger RTD + run: bash ops/pipeline/trigger-rtd.sh diff --git a/doc/conf.py b/doc/conf.py index 89dc0f4eaee2..52b99e5380ee 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,18 +1,10 @@ -# -*- coding: utf-8 -*- -# -# documentation build configuration file, created by -# sphinx-quickstart on Thu Jul 23 19:40:08 2015. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. +"""Sphinx configuration. + +See `doc/contrib/docs.rst `__ +for more info. +""" + import os -import re import shutil import subprocess import sys @@ -26,6 +18,9 @@ TMP_DIR = os.path.join(CURR_PATH, "tmp") DOX_DIR = "doxygen" +# Directly load the source module. +sys.path.append(os.path.join(PROJECT_ROOT, "python-package")) +# Tell xgboost to not load the libxgboost.so os.environ["XGBOOST_BUILD_DOC"] = "1" # Version information. @@ -35,7 +30,11 @@ release = xgboost.__version__ -def run_doxygen(): +# Document is uploaded to here by the CI builder. +S3_BUCKET = "https://xgboost-docs.s3.us-west-2.amazonaws.com" + + +def run_doxygen() -> None: """Run the doxygen make command in the designated folder.""" curdir = os.path.normpath(os.path.abspath(os.path.curdir)) if os.path.exists(TMP_DIR): @@ -67,33 +66,74 @@ def run_doxygen(): os.chdir(curdir) -def build_jvm_docs(): - """Build docs for the JVM packages""" - git_branch = os.getenv("READTHEDOCS_VERSION_NAME", default=None) - print(f"READTHEDOCS_VERSION_NAME = {git_branch}") +def get_branch() -> str: + """Guess the git branch.""" + branch = os.getenv("READTHEDOCS_VERSION_NAME", default=None) + print(f"READTHEDOCS_VERSION_NAME = {branch}") + + def is_id(): + try: + return str(int(branch)) == branch + except ValueError: + return False + + if not branch: # Not in RTD + branch = "master" # use the master branch as the default. + elif branch == "latest": + branch = "master" + elif branch.startswith("release_"): + pass # release branch, like: release_2.1.0 + elif branch == "stable": + branch = f"release_{xgboost.__version__}" + elif is_id(): + # Likely PR branch + branch = f"PR-{branch}" + else: # other dmlc branches. + pass + print(f"branch = {branch}") + return branch + + +def get_sha(branch: str) -> str | None: + sha = os.getenv("READTHEDOCS_GIT_COMMIT_HASH", default=None) + if sha is not None: + return sha + + if branch == "master": + res = subprocess.run(["git", "rev-parse", "master"], stdout=subprocess.PIPE) + else: + res = subprocess.run(["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE) + if res.returncode != 0: + return None + return res.stdout.decode("utf-8").strip() + - if not git_branch: - git_branch = "master" - elif git_branch == "latest": - git_branch = "master" - elif git_branch == "stable": - git_branch = f"release_{xgboost.__version__}" - print(f"git_branch = {git_branch}") +def build_jvm_docs() -> None: + """Fetch docs for the JVM packages""" + branch = get_branch() + commit = get_sha(branch) + if commit is None: + print("Couldn't find commit to build jvm docs.") + return - def try_fetch_jvm_doc(branch): + def try_fetch_jvm_doc(branch: str) -> bool: """ Attempt to fetch JVM docs for a given branch. Returns True if successful """ try: - url = f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{branch}.tar.bz2" - filename, _ = urllib.request.urlretrieve(url) + local_jvm_docs = os.environ.get("XGBOOST_JVM_DOCS", None) + if local_jvm_docs is not None: + filename = os.path.expanduser(local_jvm_docs) + else: + url = f"{S3_BUCKET}/{branch}/{commit}/{branch}.tar.bz2" + filename, _ = urllib.request.urlretrieve(url) + print(f"Finished: {url} -> {filename}") if not os.path.exists(TMP_DIR): print(f"Create directory {TMP_DIR}") os.mkdir(TMP_DIR) jvm_doc_dir = os.path.join(TMP_DIR, "jvm_docs") if os.path.exists(jvm_doc_dir): - print(f"Delete directory {jvm_doc_dir}") shutil.rmtree(jvm_doc_dir) print(f"Create directory {jvm_doc_dir}") os.mkdir(jvm_doc_dir) @@ -105,8 +145,8 @@ def try_fetch_jvm_doc(branch): print(f"JVM doc not found at {url}. Skipping...") return False - if not try_fetch_jvm_doc(git_branch): - print(f"Falling back to the master branch...") + if not try_fetch_jvm_doc(branch): + print("Falling back to the master branch.") try_fetch_jvm_doc("master") diff --git a/ops/pipeline/trigger-rtd-impl.py b/ops/pipeline/trigger-rtd-impl.py new file mode 100644 index 000000000000..0fd21c7bf8fe --- /dev/null +++ b/ops/pipeline/trigger-rtd-impl.py @@ -0,0 +1,62 @@ +"""Helper script for triggering Read the docs build. + +See `doc/contrib/docs.rst `__ +for more info. + +""" + +import json +import os +import pprint +from http.client import responses as http_responses + +import requests # type: ignore + + +def trigger_build(token: str) -> None: + """Trigger RTD build.""" + + event_path = os.environ["GITHUB_EVENT_PATH"] + with open(event_path, "r") as fd: + event: dict = json.load(fd) + + if event.get("pull_request", None) is None: + # refs/heads/branch-name + branch = event["ref"].split("/")[-1] + else: + branch = event["pull_request"]["number"] + + URL = f"https://readthedocs.org/api/v3/projects/xgboost/versions/{branch}/builds/" + HEADERS = {"Authorization": f"token {token}"} + response = requests.post(URL, headers=HEADERS) + # 202 means the build is successfully triggered. + if response.status_code != 202: + status_text = http_responses[response.status_code] + raise RuntimeError( + "ReadTheDocs returned an unexpected response: " + f"{response.status_code} {status_text}, reason: {response.reason}" + ) + pprint.pprint(response.json(), indent=4) + + +def main() -> None: + token = os.getenv("RTD_AUTH_TOKEN") + # GA redacts the secret by default, but we should still be really careful to not log + # (expose) the token in the CI. + if token is None: + raise RuntimeError( + "The RTD_AUTH_TOKEN environment variable must be set to a valid auth token for the" + "ReadTheDocs service." + ) + if len(token) == 0: + print("Document build is not triggered.") + return + + if not isinstance(token, str) or len(token) != 40: + raise ValueError(f"Invalid token.") + + trigger_build(token) + + +if __name__ == "__main__": + main() diff --git a/ops/pipeline/trigger-rtd.sh b/ops/pipeline/trigger-rtd.sh new file mode 100755 index 000000000000..84bd0853db12 --- /dev/null +++ b/ops/pipeline/trigger-rtd.sh @@ -0,0 +1,16 @@ +#!/bin/bash +## Trigger a new build on ReadTheDocs service. + +set -euo pipefail + +if [[ -z ${BRANCH_NAME:-} ]] +then + echo "Make sure to define environment variable BRANCH_NAME." + exit 1 +fi + +echo "Branch name: ${BRANCH_NAME}" +export RTD_AUTH_TOKEN=$(aws secretsmanager get-secret-value \ + --secret-id runs-on/readthedocs-auth-token --output text \ + --region us-west-2 --query SecretString || echo -n '') +python3 ops/pipeline/trigger-rtd-impl.py