Skip to content

Commit

Permalink
Support additional trusted CAs to link verifier
Browse files Browse the repository at this point in the history
CAs installed as part of `certifi` are not enough in some cases
(e.g. wiki.segger.com). This changes adds support for additional
CAs for such websites.

Signed-off-by: Gaurav Aggarwal <[email protected]>
  • Loading branch information
aggarg committed Jul 25, 2024
1 parent 27a2563 commit b97adff
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 21 deletions.
90 changes: 90 additions & 0 deletions link-verifier/trusted_certs/ca_bundle.crt
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
-----BEGIN CERTIFICATE-----
MIIG8jCCBdqgAwIBAgIQCPqs+TwfXV3zHQ30fcn1STANBgkqhkiG9w0BAQsFADBZ
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMTMwMQYDVQQDEypE
aWdpQ2VydCBHbG9iYWwgRzIgVExTIFJTQSBTSEEyNTYgMjAyMCBDQTEwHhcNMjMx
MDIzMDAwMDAwWhcNMjQxMTIyMjM1OTU5WjCBgzELMAkGA1UEBhMCREUxHDAaBgNV
BAgTE05vcmRyaGVpbi1XZXN0ZmFsZW4xGTAXBgNVBAcTEE1vbmhlaW0gYW0gUmhl
aW4xJDAiBgNVBAoTG1NFR0dFUiBNaWNyb2NvbnRyb2xsZXIgR21iSDEVMBMGA1UE
AwwMKi5zZWdnZXIuY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
nlSFUuIbuJFHO6DpHz/Bh1+dz+qiD4VZ4GpLSK1tkhptYPh+ZYx5pkMgqI34HehN
4wxt5pFIS58z5kkh/zE+fr9CwGCdWxkdPiANxwWQp13Ko27o7uZHo//2rBtQTjms
rYlLHIURVlE89B6SJooEofTV2J/U61b9YGEea1PbwO2g9XoglJH8wDWX0WxqAcbG
3chOr8f0O66wRv0CCpp9B1hXc6DcvhSSD/xGa32hnwHN55LhmivmOQtt2MWYzrpa
5hMN3RSh1jkH2M7dhMgFLLyu4j96yjOOHkfXDKyxTWPQLJ5QKLfFHs4x/syvcMxW
akbZuZHeZQoVN8kb1ZWRCQIDAQABo4IDiTCCA4UwHwYDVR0jBBgwFoAUdIWAwGbH
3zfez70pN6oDHb7tzRcwHQYDVR0OBBYEFMd+XguAicKsxhuJFDNGzIvjYeu7MBcG
A1UdEQQQMA6CDCouc2VnZ2VyLmNvbTA+BgNVHSAENzA1MDMGBmeBDAECAjApMCcG
CCsGAQUFBwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwDgYDVR0PAQH/
BAQDAgWgMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjCBnwYDVR0fBIGX
MIGUMEigRqBEhkJodHRwOi8vY3JsMy5kaWdpY2VydC5jb20vRGlnaUNlcnRHbG9i
YWxHMlRMU1JTQVNIQTI1NjIwMjBDQTEtMS5jcmwwSKBGoESGQmh0dHA6Ly9jcmw0
LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbEcyVExTUlNBU0hBMjU2MjAyMENB
MS0xLmNybDCBhwYIKwYBBQUHAQEEezB5MCQGCCsGAQUFBzABhhhodHRwOi8vb2Nz
cC5kaWdpY2VydC5jb20wUQYIKwYBBQUHMAKGRWh0dHA6Ly9jYWNlcnRzLmRpZ2lj
ZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbEcyVExTUlNBU0hBMjU2MjAyMENBMS0xLmNy
dDAMBgNVHRMBAf8EAjAAMIIBfwYKKwYBBAHWeQIEAgSCAW8EggFrAWkAdwDuzdBk
1dsazsVct520zROiModGfLzs3sNRSFlGcR+1mwAAAYtbLd3lAAAEAwBIMEYCIQCP
c6RveylhvzBaOFg0Lb9SzPlsl2qJRBy1gtPxeT/p0gIhALdEFiUsDmmlkSopD9tA
lcys+MZOC4EAJbSd3Gqjy8jiAHYASLDja9qmRzQP5WoC+p0w6xxSActW3SyB2bu/
qznYhHMAAAGLWy3d+wAABAMARzBFAiEAzz0HR+9Ub6OQY2yeqiCQdpHjZvCKS0r1
DHEgvpSO1sQCIGwuAeCRYd7MRyxrnRpKP4W4KYEk81GBUhY/U/FMMekCAHYA2ra/
az+1tiKfm8K7XGvocJFxbLtRhIU0vaQ9MEjX+6sAAAGLWy3d6gAABAMARzBFAiAF
W8keKo/nJlK6SpPgNsNNHTqeHYGTw2B4GcGo3M4f7AIhAPLPloOcSfnZH8tvcEyV
EuQWifNaRCLt6nHTtbRel+F8MA0GCSqGSIb3DQEBCwUAA4IBAQBbzc037fmC4z8F
KKNYM7jLj/nKvz+8ygnVpxUcQKPQx5hS/i+k2j+oQIv/YR7IrPJMkBKnNLgV8a6y
S4LWmwFa4sw2Ff2K4QSH+evxH166CfphhOlVmB3Bv8aauo9RFB1J1bNuhRGO8ySA
kvL4gXqiDTS2VVHctIYiPX9rfjwyfsuhmGIwsi16ZyJ04zXDDFNwbMTacACJiK/u
0AP4LJC0Bh7n+3ETGH1PoaOkr5cmQaI9tKhYSZdiEMnLxI7KIaNApFnfCqdd+EQT
LCKZbBSQ4GGHexntbu4KenLPwZ2sSjuwDlSNB2yGFLej3gV0leOKZ/zDsKcVplJ+
mDWCvLnn
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIEyDCCA7CgAwIBAgIQDPW9BitWAvR6uFAsI8zwZjANBgkqhkiG9w0BAQsFADBh
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBH
MjAeFw0yMTAzMzAwMDAwMDBaFw0zMTAzMjkyMzU5NTlaMFkxCzAJBgNVBAYTAlVT
MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxMzAxBgNVBAMTKkRpZ2lDZXJ0IEdsb2Jh
bCBHMiBUTFMgUlNBIFNIQTI1NiAyMDIwIENBMTCCASIwDQYJKoZIhvcNAQEBBQAD
ggEPADCCAQoCggEBAMz3EGJPprtjb+2QUlbFbSd7ehJWivH0+dbn4Y+9lavyYEEV
cNsSAPonCrVXOFt9slGTcZUOakGUWzUb+nv6u8W+JDD+Vu/E832X4xT1FE3LpxDy
FuqrIvAxIhFhaZAmunjZlx/jfWardUSVc8is/+9dCopZQ+GssjoP80j812s3wWPc
3kbW20X+fSP9kOhRBx5Ro1/tSUZUfyyIxfQTnJcVPAPooTncaQwywa8WV0yUR0J8
osicfebUTVSvQpmowQTCd5zWSOTOEeAqgJnwQ3DPP3Zr0UxJqyRewg2C/Uaoq2yT
zGJSQnWS+Jr6Xl6ysGHlHx+5fwmY6D36g39HaaECAwEAAaOCAYIwggF+MBIGA1Ud
EwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFHSFgMBmx9833s+9KTeqAx2+7c0XMB8G
A1UdIwQYMBaAFE4iVCAYlebjbuYP+vq5Eu0GF485MA4GA1UdDwEB/wQEAwIBhjAd
BgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwdgYIKwYBBQUHAQEEajBoMCQG
CCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wQAYIKwYBBQUHMAKG
NGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbFJvb3RH
Mi5jcnQwQgYDVR0fBDswOTA3oDWgM4YxaHR0cDovL2NybDMuZGlnaWNlcnQuY29t
L0RpZ2lDZXJ0R2xvYmFsUm9vdEcyLmNybDA9BgNVHSAENjA0MAsGCWCGSAGG/WwC
ATAHBgVngQwBATAIBgZngQwBAgEwCAYGZ4EMAQICMAgGBmeBDAECAzANBgkqhkiG
9w0BAQsFAAOCAQEAkPFwyyiXaZd8dP3A+iZ7U6utzWX9upwGnIrXWkOH7U1MVl+t
wcW1BSAuWdH/SvWgKtiwla3JLko716f2b4gp/DA/JIS7w7d7kwcsr4drdjPtAFVS
slme5LnQ89/nD/7d+MS5EHKBCQRfz5eeLjJ1js+aWNJXMX43AYGyZm0pGrFmCW3R
bpD0ufovARTFXFZkAdl9h6g4U5+LXUZtXMYnhIHUfoyMo5tS58aI7Dd8KvvwVVo4
chDYABPPTHPbqjc1qCmBaZx2vN4Ye5DUys/vZwP9BFohFrH/6j/f3IL16/RZkiMN
JCqVJUzKoZHm1Lesh3Sz8W2jmdv51b2EQJ8HmA==
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIDjjCCAnagAwIBAgIQAzrx5qcRqaC7KGSxHQn65TANBgkqhkiG9w0BAQsFADBh
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBH
MjAeFw0xMzA4MDExMjAwMDBaFw0zODAxMTUxMjAwMDBaMGExCzAJBgNVBAYTAlVT
MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j
b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IEcyMIIBIjANBgkqhkiG
9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuzfNNNx7a8myaJCtSnX/RrohCgiN9RlUyfuI
2/Ou8jqJkTx65qsGGmvPrC3oXgkkRLpimn7Wo6h+4FR1IAWsULecYxpsMNzaHxmx
1x7e/dfgy5SDN67sH0NO3Xss0r0upS/kqbitOtSZpLYl6ZtrAGCSYP9PIUkY92eQ
q2EGnI/yuum06ZIya7XzV+hdG82MHauVBJVJ8zUtluNJbd134/tJS7SsVQepj5Wz
tCO7TG1F8PapspUwtP1MVYwnSlcUfIKdzXOS0xZKBgyMUNGPHgm+F6HmIcr9g+UQ
vIOlCsRnKPZzFBQ9RnbDhxSJITRNrw9FDKZJobq7nMWxM4MphQIDAQABo0IwQDAP
BgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjAdBgNVHQ4EFgQUTiJUIBiV
5uNu5g/6+rkS7QYXjzkwDQYJKoZIhvcNAQELBQADggEBAGBnKJRvDkhj6zHd6mcY
1Yl9PMWLSn/pvtsrF9+wX3N3KjITOYFnQoQj8kVnNeyIv/iPsGEMNKSuIEyExtv4
NeF22d+mQrvHRAiGfzZ0JFrabA0UWTW98kndth/Jsw1HKj2ZL7tcu7XUIOGZX1NG
Fdtom/DzMNU+MeKNhJ7jitralj41E6Vf8PlwUHBHQRFXGU7Aj64GxJUTFy8bJZ91
8rGOmaFvE7FBcf6IKshPECBV1/MUReXgRPTqh5Uykw7+U0b6LJ3/iyK5S9kJRaTe
pLiaWN0bfVKfjllDiIGknibVb63dDcY3fe0Dkhvld1927jyNxF1WW6LZZm6zNTfl
MrY=
-----END CERTIFICATE-----
76 changes: 55 additions & 21 deletions link-verifier/verify-links.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
import traceback
from collections import defaultdict

THIS_FILE_PATH = os.path.dirname(os.path.abspath(__file__))
TRUSTED_CA_BUNDLE = os.path.join(THIS_FILE_PATH, 'trusted_certs', 'ca_bundle.crt')

MARKDOWN_SEARCH_TERM = r"\.md$"
# Regex to find a URL
URL_SEARCH_TERM = r'(\b(https?)://[^\s\)\]\\"<>]+[^\s\)\.\]\\"<>])'
Expand Down Expand Up @@ -189,12 +192,61 @@ def create_html(markdown_file):
)
return process

def access_url(url):
global http_headers
status = ''
is_broken = False
try_with_trusted_ca_bundle = False

try:
r = requests.head(url, allow_redirects=True, headers=http_headers)
# Some sites may return 404 for head but not get, e.g.
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
if r.status_code >= 400:
# Allow redirects is already enabled by default for GET.
r = requests.get(url, headers=http_headers)
# It's likely we will run into GitHub's rate-limiting if there are many links.
if r.status_code == 429:
time.sleep(int(r.headers['Retry-After']))
r = requests.head(url, allow_redirects=True)
if r.status_code >= 400:
is_broken = True
status = r.status_code
except requests.exceptions.SSLError as e:
print(str(e))
try_with_trusted_ca_bundle = True
except Exception as e:
print(str(e))
is_broken = True
status = 'Error'

if try_with_trusted_ca_bundle == True:
try:
r = requests.head(url, allow_redirects=True, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
# Some sites may return 404 for head but not get, e.g.
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
if r.status_code >= 400:
# Allow redirects is already enabled by default for GET.
r = requests.get(url, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
# It's likely we will run into GitHub's rate-limiting if there are many links.
if r.status_code == 429:
time.sleep(int(r.headers['Retry-After']))
r = requests.head(url, allow_redirects=True, verify=TRUSTED_CA_BUNDLE)
if r.status_code >= 400:
is_broken = True
status = r.status_code
except Exception as e:
print(str(e))
is_broken = True
status = 'Error'

return is_broken, status

def test_url(url):
"""Tests a single url"""
global use_gh_cache
global main_repo_list
global link_cache
global http_headers
status = ''
is_broken = False
# Test if link was already tested before.
Expand All @@ -215,26 +267,8 @@ def test_url(url):
if int(issue_match.group(3)) in main_repo_list[repo_key][ISSUE_KEY]:
status = 'Good'
if status != 'Good':
try:
r = requests.head(url, allow_redirects=True, headers=http_headers)
# Some sites may return 404 for head but not get, e.g.
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
if r.status_code >= 400:
# Allow redirects is already enabled by default for GET.
r = requests.get(url, headers=http_headers)
# It's likely we will run into GitHub's rate-limiting if there are many links.
if r.status_code == 429:
time.sleep(int(r.headers['Retry-After']))
r = requests.head(url, allow_redirects=True)
if r.status_code >= 400:
is_broken = True
status = r.status_code
# requests.exceptions.ConnectionError if URL does not exist, but we capture
# all possible exceptions from trying the link to be safe.
except Exception as e:
print(str(e))
is_broken = True
status = 'Error'
is_broken, status = access_url(url)

# Add result to cache so it won't be tested again.
link_cache[url] = (is_broken, status)
return is_broken, status
Expand Down

0 comments on commit b97adff

Please sign in to comment.