Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More stats #408

Merged
merged 10 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ repos:
- ruamel.yaml==0.18.6
- questionary==2.0.1
- types-colorama==0.4.15.20240311
- types-Pygments==2.18.0.20240506
- types-python-dateutil==2.9.0.20241003
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not datetime.strptime (https://stackoverflow.com/a/2265383) to avoid an additional dependency?

Also, this/these additional (optional) dependenc(y/ies) should be mentioned in other places, I think.

- types-PyYAML==6.0.12.20240311
- types-requests==2.32.0.20240712
args:
Expand Down
2 changes: 1 addition & 1 deletion bin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
grep -Ev '^(h|jobs|time|verbose)$' | sed "s/^/'/;s/$/',/" | tr '\n' ' ' | sed 's/^/args_list = [/;s/, $/]\n/'
"""
# fmt: off
args_list = ['1', 'add', 'all', 'answer', 'api', 'author', 'check_deterministic', 'clean', 'colors', 'contest', 'contest_id', 'contestname', 'cp', 'default_solution', 'depth', 'directory', 'error', 'force', 'force_build', 'input', 'interaction', 'interactive', 'invalid', 'kattis', 'language', 'memory', 'move_to', 'no_bar', 'no_generate', 'no_solution', 'no_solutions', 'no_testcase_sanity_checks', 'no_timelimit', 'no_validators', 'no_visualizer', 'open', 'order', 'order_from_ccs', 'overview', 'password', 'post_freeze', 'problem', 'problemname', 'remove', 'reorder', 'samples', 'sanitizer', 'skel', 'skip', 'sort', 'submissions', 'table', 'testcases', 'timelimit', 'timeout', 'token', 'tree', 'username', 'validation', 'watch', 'web', 'write']
args_list = ['1', 'add', 'all', 'answer', 'api', 'author', 'check_deterministic', 'clean', 'colors', 'contest', 'contest_id', 'contestname', 'cp', 'default_solution', 'depth', 'directory', 'error', 'force', 'force_build', 'input', 'interaction', 'interactive', 'invalid', 'kattis', 'language', 'memory', 'more', 'move_to', 'no_bar', 'no_generate', 'no_solution', 'no_solutions', 'no_testcase_sanity_checks', 'no_timelimit', 'no_validators', 'no_visualizer', 'open', 'order', 'order_from_ccs', 'overview', 'password', 'post_freeze', 'problem', 'problemname', 'remove', 'reorder', 'samples', 'sanitizer', 'skel', 'skip', 'sort', 'submissions', 'table', 'testcases', 'timelimit', 'timeout', 'token', 'tree', 'username', 'validation', 'watch', 'web', 'write']
# fmt: on


Expand Down
25 changes: 12 additions & 13 deletions bin/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,18 @@ def add(s):

programs = [run.Submission(problem, path) for path in paths]

# - first all submission with just one verdict (sorted by that verdict)
# - then by subdir
# - then by list of verdicts
# - then by name
def submissions_key(x):
if len(x.expected_verdicts) == 1:
return (1, x.expected_verdicts[0], x.name)
else:
return (len(x.expected_verdicts), x.subdir, x.expected_verdicts, x.name)

programs.sort(key=submissions_key)

bar = ProgressBar('Build submissions', items=programs)

def build_program(p):
Expand All @@ -526,19 +538,6 @@ def build_program(p):
return False

assert isinstance(problem._submissions, list)

# - first all submission with just one verdict (sorted by that verdict)
# - then by subdir
# - then by list of verdicts
# - then by name
def submissions_key(x):
if len(x.expected_verdicts) == 1:
return (1, x.expected_verdicts[0], x.name)
else:
return (len(x.expected_verdicts), x.subdir, x.expected_verdicts, x.name)

problem._submissions.sort(key=submissions_key)

return problem._submissions.copy()

def validators(
Expand Down
268 changes: 256 additions & 12 deletions bin/stats.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
import shutil
import statistics
import sys
from collections.abc import Callable
from datetime import datetime, timedelta, timezone
from dateutil import parser
from pathlib import Path
from typing import Literal, Any

from colorama import Fore, Style
from colorama import ansi, Fore, Style

import config
import generate
from util import glob
import program
from util import error, glob, exec_command

Selector = str | Callable | list[str] | list[Callable]


def stats(problems):
problem_stats(problems)
if config.args.more:
more_stats(problems)


# This prints the number belonging to the count.
# This can be a red/white colored number, or Y/N
def _get_stat(count, threshold=True, upper_bound=None):
Expand All @@ -28,7 +41,7 @@ def _get_stat(count, threshold=True, upper_bound=None):
return color + str(count) + Style.RESET_ALL


def stats(problems):
def problem_stats(problems):
stats: list[
tuple[str, Selector] | tuple[str, Selector, int] | tuple[str, Selector, int, int]
] = [
Expand Down Expand Up @@ -66,15 +79,20 @@ def stats(problems):
(' WA', 'submissions/wrong_answer/*', 2),
('TLE', 'submissions/time_limit_exceeded/*', 1),
('subs', lambda p: len(glob(p.path, 'submissions/*/*')), 6),
(
' cpp',
['submissions/accepted/*.c', 'submissions/accepted/*.cpp', 'submissions/accepted/*.cc'],
1,
),
('py', ['submissions/accepted/*.py3', 'submissions/accepted/*.py'], 1),
('java', 'submissions/accepted/*.java', 1),
('kt', 'submissions/accepted/*.kt', 1),
]
languages = {
' c(++)': ['C', 'C++'],
'py': ['Python 2', 'Python 3', 'CPython 2', 'CPython 3'],
'java': ['Java'],
'kt': ['Kotlin'],
}
for column, names in languages.items():
paths = []
for config in program.languages().values():
if config['name'] in names:
globs = config['files'].split() or []
paths += [f'submissions/accepted/{glob}' for glob in globs]
stats.append((column, list(set(paths)), 1))

headers = ['problem', *(h[0] for h in stats), ' comment']
cumulative = [0] * (len(stats))
Expand Down Expand Up @@ -177,7 +195,7 @@ def value(x):
)
for i in range(len(stats))
],
comment
comment,
),
file=sys.stderr,
)
Expand All @@ -188,3 +206,229 @@ def value(x):
format_string.format('TOTAL', *(_get_stat(x, False) for x in cumulative), ''),
file=sys.stderr,
)


try:
import pygments
from pygments import lexers

loc_cache: dict[Path, int | None] = {}
has_pygments = True
except Exception:
has_pygments = False


def _is_code(language, type, text):
if type in pygments.token.Comment and type not in (
pygments.token.Comment.Preproc, # pygments treats preprocessor statements as comments
pygments.token.Comment.PreprocFile,
):
return False
if type in pygments.token.String:
return False
if text.rstrip(' \f\n\r\t(),:;[]{}') == '':
return False
# ignore some language specific keywords
text = text.strip()
if language == 'python':
return text != 'pass'
elif language == 'batchfile':
return text != '@'
elif language == 'sql' and text == 'pass':
return text not in ['begin', 'end']
else:
return True


def loc(file):
if file not in loc_cache:
try:
content = file.read_text()
lexer = lexers.guess_lexer_for_filename(file, content)
assert isinstance(lexer, pygments.lexer.Lexer)
language = lexer.name.lower()
tokens = lexer.get_tokens(content)

count = 0
has_code = False
for type, text in tokens:
for line in text.splitlines(True):
if _is_code(language, type, line):
has_code = True
if line.endswith('\n') and has_code:
count += 1
has_code = False
if has_code:
count += 1

loc_cache[file] = count
except:
# Either we could not read the file (for example binaries)
# or we did not find a lexer
loc_cache[file] = None
return loc_cache[file]


def more_stats(problems):
if not has_pygments:
error('stats --more needs pygments. Install python[3]-pygments.')
return

stat_name_len = 10
stat_len = 5

# solution stats
columns = [p.label for p in problems] + ['sum', 'min', 'avg', 'max']

def get_stats(values, missing='-'):
if not values:
return [missing] * 4
return [sum(values), min(values), statistics.mean(values), max(values)]

header_string = f'{{:<{stat_name_len}}}' + f' {{:>{stat_len}}}' * len(columns)
format_string = (
f'{{:<{stat_name_len + len(Fore.WHITE)}}}{Style.RESET_ALL}'
+ f' {{:>{stat_len + len(Fore.WHITE)}}}{Style.RESET_ALL}' * len(columns)
)

print(file=sys.stderr)
header = header_string.format('', *columns)
print(Style.BRIGHT + header + Style.RESET_ALL, file=sys.stderr)
print('-' * len(header), file=sys.stderr)

def format_row(*values):
printable = []
for value in values:
if isinstance(value, float):
value = f'{value:.1f}'
elif isinstance(value, timedelta):
hours = int(value.total_seconds()) // (60 * 60)
days = int(value.total_seconds()) // (60 * 60 * 24)
weeks = int(value.total_seconds()) // (60 * 60 * 24 * 7)
if hours < 3 * 24:
value = f'{hours}h'
elif days < 4 * 7:
value = f'{days}d'
else:
value = f'{weeks}w'
elif not isinstance(value, str):
value = str(value)
if not value.startswith(ansi.CSI):
value = f'{Fore.WHITE}{value}'
printable.append(value)
return format_string.format(*printable)

languages: dict[str, list[str] | Literal[True]] = {
'C(++)': ['C', 'C++'],
'Python': ['Python 2', 'Python 3', 'CPython 2', 'CPython 3'],
'Java': ['Java'],
'Kotlin': ['Kotlin'],
}

def get_submissions_row(display_name, names):
paths = []
if names is True:
paths.append('submissions/accepted/*')
else:
assert isinstance(names, list)
for config in program.languages().values():
if config['name'] in names:
globs = config['files'].split() or []
paths += [f'submissions/accepted/{glob}' for glob in globs]
paths = list(set(paths))

lines = [display_name]
values = []
for problem in problems:
files = {file for path in paths for file in glob(problem.path, path)}
cur_lines = [loc(file) for file in files]
cur_lines = [x for x in cur_lines if x is not None]
if cur_lines:
best = min(cur_lines)
values.append(best)
lines.append(best)
else:
lines.append(f'{Fore.RED}-')
lines += get_stats(values)
return lines

best = get_submissions_row('Solution', True)
print(format_row(*best), file=sys.stderr)
for display_name, names in languages.items():
values = get_submissions_row(display_name, names)
for i in range(1, 1 + len(problems)):
if values[i] == best[i]:
values[i] = f'{Fore.CYAN}{values[i]}'
print(format_row(*values), file=sys.stderr)

# TODO: analyze team submissions?

# git stats
if shutil.which('git') is None:
error('git not found!')
return

if not exec_command(['git', 'rev-parse', '--is-inside-work-tree']).out.startswith('true'):
error('not inside git')
return

def git(*args):
res = exec_command(
['git', *args],
crop=False,
preexec_fn=False,
timeout=None,
)
return res.out if res else ''

print('-' * len(header), file=sys.stderr)
testcases = [len(generate.testcases(p)) for p in problems]
testcases += get_stats(testcases)
print(format_row('Testcases', *testcases), file=sys.stderr)
changed: list[Any] = []
for p in problems:
time = max(
[
parser.parse(git('log', '--format=%cI', '-1', '--', p.path / path))
for path in ['generators', 'data']
]
)
duration = datetime.now(timezone.utc) - time
changed.append(duration.total_seconds())
changed += get_stats(changed)
changed = [timedelta(seconds=s) for s in changed]
changed[-4] = '-' # sum of last changed is meaningless...
print(format_row('└─changed', *changed), file=sys.stderr)

# this is hacky and does not handle all renames properly...
# for example: if A is renamed to C and B is renamed to A this will break
def countCommits(problem):
yaml_path = problem.path / 'problem.yaml'
paths = git(
'log', '--all', '--follow', '--name-only', '--relative', '--format=', '--', yaml_path
).split('\n')
names = {Path(p).parent for p in paths if p.strip() != ''}
return int(git('rev-list', '--all', '--count', '--', *names))

commits = [countCommits(p) for p in problems]
commits += get_stats(commits, '-')
commits[-4] = '-' # one commit can change multiple problems so the sum is meaningless...
print(format_row('Commits', *commits), file=sys.stderr)
print(file=sys.stderr)
print(
f'{Fore.CYAN}Total Commits{Style.RESET_ALL}:',
int(git('rev-list', '--all', '--count')),
file=sys.stderr,
)
print(
f'{Fore.CYAN}Total Authors{Style.RESET_ALL}:',
git('shortlog', '--group=%ae', '-s').count('\n'),
file=sys.stderr,
)
duration = datetime.now(timezone.utc) - parser.parse(
git('log', '--reverse', '--format=%cI').partition('\n')[0]
)
print(
f'{Fore.CYAN}Preparation{Style.RESET_ALL}: {duration.days}d, {duration.seconds // 3600}h',
file=sys.stderr,
)
3 changes: 2 additions & 1 deletion bin/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,10 @@ def build_parser():
)

# Stats
subparsers.add_parser(
statsparser = subparsers.add_parser(
'stats', parents=[global_parser], help='show statistics for contest/problem'
)
statsparser.add_argument('--more', action='store_true', help='Print more stats.')

# Generate Testcases
genparser = subparsers.add_parser(
Expand Down
Loading