Skip to content

Check all English links #61

Check all English links

Check all English links #61

name: Check all English links
# **What it does**: This script once a day checks all English links and reports in issues.
# **Why we have it**: We want to know if any links break.
# **Who does it impact**: Docs content.
on:
workflow_dispatch:
schedule:
- cron: '40 19 * * *' # once a day at 19:40 UTC / 11:40 PST
permissions:
contents: read
issues: write
jobs:
check_all_english_links:
name: Check all links
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-20.04-xl
env:
GITHUB_TOKEN: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
FIRST_RESPONDER_PROJECT: Docs content first responder
REPORT_AUTHOR: docubot
REPORT_LABEL: broken link report
REPORT_REPOSITORY: github/docs-content
steps:
- name: Check that gh CLI is installed
run: gh --version
- name: Check out repo's default branch
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
- name: Setup Node
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.15.0'
cache: npm
- name: Install dependencies
run: npm ci
- name: Cache nextjs build
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
with:
path: .next/cache
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
- name: Build server
run: npm run build
- name: Start server in the background
env:
NODE_ENV: production
PORT: 4000
# We don't want or need the changelog entries in this context.
CHANGELOG_DISABLED: true
# The default is 10s. But because this runs overnight, we can
# be a lot more patient.
REQUEST_TIMEOUT: 20000
# Don't care about CDN caching image URLs
DISABLE_REWRITE_ASSET_URLS: true
run: |
node server.js > /tmp/stdout.log 2> /tmp/stderr.log &
sleep 6
curl --retry-connrefused --retry 5 -I http://localhost:4000/
- if: ${{ failure() }}
name: Debug server outputs on errors
run: |
echo "____STDOUT____"
cat /tmp/stdout.log
echo "____STDERR____"
cat /tmp/stderr.log
- name: Run script
timeout-minutes: 120
env:
# The default is 300 which works OK on a fast macbook pro
# but not so well in Actions.
LINKINATOR_CONCURRENCY: 100
LINKINATOR_LOG_FILE_PATH: linkinator.log
run: |
script/check-english-links.js > broken_links.md
# check-english-links.js returns 0 if no links are broken, and 1 if any links
# are broken. When an Actions step's exit code is 1, the action run's job status
# is failure and the run ends. The following steps create an issue for the
# broken link report only if any links are broken, so `if: ${{ failure() }}`
# ensures the steps run despite the previous step's failure of the job.
#
# https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions
- uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535
with:
name: linkinator_log
path: linkinator.log
- uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535
if: ${{ failure() }}
with:
name: broken_links
path: ./broken_links.md
- if: ${{ failure() }}
name: Get title for issue
id: check
run: echo "::set-output name=title::$(head -1 broken_links.md)"
- if: ${{ failure() }}
name: Create issue from file
id: broken-link-report
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
with:
token: ${{ env.GITHUB_TOKEN }}
title: ${{ steps.check.outputs.title }}
content-filepath: ./broken_links.md
repository: ${{ env.REPORT_REPOSITORY }}
labels: ${{ env.REPORT_LABEL }}
- if: ${{ failure() }}
name: Close and/or comment on old issues
env:
NEW_REPORT_URL: 'https://github.com/${{ env.REPORT_REPOSITORY }}/issues/${{ steps.broken-link-report.outputs.issue-number }}'
run: |
gh alias set list-reports "issue list \
--repo ${{ env.REPORT_REPOSITORY }} \
--author ${{ env.REPORT_AUTHOR }} \
--label '${{ env.REPORT_LABEL }}'"
# Link to the previous report from the new report that triggered this
# workflow run.
previous_report_url=$(gh list-reports \
--state all \
--limit 2 \
--json url \
--jq '.[].url' \
| grep -v ${{ env.NEW_REPORT_URL }} | head -1)
gh issue comment ${{ env.NEW_REPORT_URL }} --body "⬅️ [Previous report]($previous_report_url)"
# If an old report is open and assigned to someone, link to the newer
# report without closing the old report.
for issue_url in $(gh list-reports \
--json assignees,url \
--jq '.[] | select (.assignees != []) | .url'); do
if [ "$issue_url" != "${{ env.NEW_REPORT_URL }}" ]; then
gh issue comment $issue_url --body "➡️ [Newer report](${{ env.NEW_REPORT_URL }})"
fi
done
# Link to the newer report from any older report that is still open,
# then close the older report and remove it from the first responder's
# project board.
for issue_url in $(gh list-reports \
--search 'no:assignee' \
--json url \
--jq '.[].url'); do
if [ "$issue_url" != "${{ env.NEW_REPORT_URL }}" ]; then
gh issue comment $issue_url --body "➡️ [Newer report](${{ env.NEW_REPORT_URL }})"
gh issue close $issue_url
gh issue edit $issue_url --remove-project "${{ env.FIRST_RESPONDER_PROJECT }}"
fi
done