-
Notifications
You must be signed in to change notification settings - Fork 2
/
master.py
188 lines (157 loc) · 7.43 KB
/
master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from datetime import datetime, timedelta
import importlib
from os import environ
from random import shuffle
from subprocess import check_output
from sys import stdout
from traceback import print_exc
from wikitools import wiki
from wikitools.page import Page
import open_pr_comment
# Reports I want:
# Now that I have wikitext caching, many things are faster. Write a report for Redirects which link to non-existant subsections
# Quotations which use quote characters
# Using {{lang}} and {{if lang}} on non-template pages -> this is apparently somewhat common now to make copy/paste editing easier
# Pages which link to disambig pages not in hatnote/see also
# Just... a summary of every single external link. Maybe just 'count per domain' and then list the top 10 pages? I'm finding a LOT of sus links, and it's only the ones that are *broken*.
# Templates sorted by usage and protect status
# A 'missing translations' report but for dictionary entries (maybe sorted by usage, too?)
# Templates which have redirects in them
# Reports I want to improve:
# Sort missing categories by # pages
# Threading for navboxes.py?
# Might be more smarts to do in lang_quality.py, e.g. non-ascii characters in 'en', or check for only quote characters (or other lang incomplete hints)
def edit_or_save(page_name, file_name, output, summary):
wiki_diff_url = Page(w, page_name).edit(output, bot=True, summary=summary)
if wiki_diff_url:
return wiki_diff_url
# Edit failed, fall back to saving to file (will be attached as a build artifact)
with open(file_name, 'w', encoding='utf-8') as f:
f.write(output)
return None
def publish_report(w, module, report_name, root, summary):
link_map = {}
report_file_name = 'wiki_' + report_name.lower().replace(' ', '_')
try:
report_output = importlib.import_module(module).main(w)
if isinstance(report_output, list):
shuffle(report_output) # Shuffle the order so that we don't always upload the same language first, to ensure even coverage of 502s
for lang, output in report_output:
link_map[lang] = edit_or_save(f'{root}/{report_name}/{lang}', f'{report_file_name}_{lang}.txt', output, summary)
else:
link_map['en'] = edit_or_save(f'{root}/{report_name}', f'{report_file_name}.txt', report_output, summary)
except Exception:
print(f'Failed to update {report_name}')
print_exc(file=stdout)
return link_map
# Multi-language reports need frequent updates since we have many translators
daily_reports = {
'active_discussions': 'Active discussions',
'all_articles': 'All articles',
'missing_categories': 'Untranslated categories',
'missing_translations': 'Missing translations',
'untranslated_templates': 'Untranslated templates',
}
# English-only but otherwise frequently changing reports
weekly_reports = {
'displaytitles_weekly': 'Duplicate displaytitles',
'incorrect_redirects': 'Mistranslated redirects',
'incorrectly_categorized': 'Pages with incorrect categorization',
'incorrectly_linked': 'Pages with incorrect links',
'lang_quality': 'Lang errors',
'mismatched_weekly': 'Mismatched parenthesis',
'missing_translations_weekly': 'Missing translations/sorted',
'navboxes': 'Pages which are missing navboxes',
'overtranslated': 'Pages with no english equivalent',
'wanted_templates': 'Wanted templates',
}
# Everything else (especially reports which require all HTML contents)
monthly_reports = {
'displaytitles': 'Duplicate displaytitles',
'duplicate_files': 'Duplicate files',
'edit_stats': 'Users by edit count',
'external_links2': 'External links',
'mismatched': 'Mismatched parenthesis',
'undocumented_templates': 'Undocumented templates',
'unlicensed_images': 'Unlicensed images',
'unused_files': 'Unused files',
}
all_reports = daily_reports | weekly_reports | monthly_reports
if __name__ == '__main__':
event = environ.get('GITHUB_EVENT_NAME', 'local_run')
modules_to_run = []
if event == 'schedule':
root = 'Team Fortress Wiki:Reports'
summary = 'Automatic update via https://github.com/jbzdarkid/TFWiki-scripts'
# Determine which reports to run -- note that the weekly and monthly cadences don't necessarily line up.
modules_to_run += daily_reports.keys()
if datetime.now().weekday() == 0:
modules_to_run += weekly_reports.keys()
if datetime.now().day == 1:
modules_to_run += monthly_reports.keys()
elif event == 'pull_request':
root = 'User:Darkid/Reports'
summary = 'Test update via https://github.com/jbzdarkid/TFWiki-scripts'
merge_base = check_output(['git', 'merge-base', 'HEAD', 'origin/' + environ['GITHUB_BASE_REF']], text=True).strip()
changed_files = {f for f in check_output(['git', 'diff', '--name-only', merge_base, '--diff-filter=M'], text=True).split('\n') if f}
added_files = {f for f in check_output(['git', 'diff', '--name-only', merge_base, '--diff-filter=A'], text=True).split('\n') if f}
print('Changed files:', changed_files)
print('Added files:', added_files)
if len(added_files) > 0 and 'README.md' not in changed_files:
raise ValueError('When adding a new report, you must update the readme.')
changed_files |= added_files
for row in changed_files:
file = row.replace('.py', '').strip()
weekly_file = file + '_weekly'
if weekly_file in all_reports:
modules_to_run.append(weekly_file)
elif file in all_reports:
modules_to_run.append(file)
elif event == 'workflow_dispatch':
root = 'User:Darkid/Reports'
summary = 'Test update via https://github.com/jbzdarkid/TFWiki-scripts'
modules_to_run = all_reports.keys() # On manual triggers, run everything
elif event == 'local_run':
w = wiki.Wiki('https://wiki.teamfortress.com/w/api.php')
for report in all_reports:
# Root and summary don't matter because we can't publish anyways.
print(report)
publish_report(w, report, all_reports[report], '', '')
break
exit(0)
else:
print(f'Not sure what to run in response to {event}')
exit(1)
w = wiki.Wiki('https://wiki.teamfortress.com/w/api.php')
if not w.login(environ['WIKI_USERNAME'], environ['WIKI_PASSWORD']):
exit(1)
comment = 'Please verify the following diffs:\n'
succeeded = True
for module in modules_to_run:
report_name = all_reports[module]
start = datetime.now()
print(f'Starting {report_name} at {start}')
link_map = publish_report(w, module, report_name, root, summary)
duration = datetime.now() - start
duration -= timedelta(microseconds=duration.microseconds) # Strip microseconds
if not link_map:
action_url = 'https://github.com/' + environ['GITHUB_REPOSITORY'] + '/actions/runs/' + environ['GITHUB_RUN_ID']
comment += f'- [ ] {report_name} failed after {duration}: {action_url}\n'
succeeded = False
else:
comment += f'- [ ] {report_name} succeeded in {duration}:'
languages = sorted(link_map.keys(), key=lambda lang: (lang != 'en', lang)) # Sort languages, keeping english first
for language in languages:
link = link_map.get(language, None)
if link:
comment += f' [{language}]({link_map[language]})'
else:
comment += f' ~~[{language}](## "Upload failed")~~'
comment += '\n'
if event == 'pull_request':
open_pr_comment.create_pr_comment(comment)
elif event == 'workflow_dispatch':
open_pr_comment.create_issue('Workflow dispatch finished', comment)
elif environ['GITHUB_EVENT_NAME'] == 'schedule':
print(comment)
exit(0 if succeeded else 1)