diff --git a/.gitignore b/.gitignore index 7dd3dc60..31339baa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ **/CMakeFiles/** multipy/runtime/example/generated/ *.egg-info +.lintbin/ diff --git a/.lintrunner.toml b/.lintrunner.toml new file mode 100644 index 00000000..9103bb30 --- /dev/null +++ b/.lintrunner.toml @@ -0,0 +1,477 @@ +[[linter]] +code = 'FLAKE8' +include_patterns = ['**/*.py'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/flake8_linter.py', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'scripts/linters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'flake8==3.8.2', + 'flake8-bugbear==20.1.4', + 'flake8-comprehensions==3.3.0', + 'flake8-executable==2.0.4', + 'flake8-pyi==20.5.0', + 'mccabe==0.6.1', + 'pycodestyle==2.6.0', + 'pyflakes==2.2.0', +] + + +[[linter]] +code = 'CLANGFORMAT' +include_patterns = [ + 'multipy/runtime/**/*.h', + 'multipy/runtime/**/*.cpp', +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +init_command = [ + 'python3', + 'scripts/linters/s3_init.py', + '--config-json=scripts/linters/s3_init_config.json', + '--linter=clang-format', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=clang-format', +] +command = [ + 'python3', + 'scripts/linters/clangformat_linter.py', + '--binary=.lintbin/clang-format', + '--', + '@{{PATHSFILE}}' +] +is_formatter = true + +[[linter]] +code = 'MYPY' +include_patterns = ['**/*.py'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/mypy_linter.py', + '--config=mypy.ini', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'scripts/linters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'numpy==1.21.6', + 'expecttest==0.1.3', + 'mypy==0.960', + 'types-requests==2.27.25', + 'types-six==1.16.15', + 'types-PyYAML==6.0.7', + 'types-tabulate==0.8.8', + 'types-protobuf==3.19.18', + 'types-pkg-resources==0.1.3', + 'types-Jinja2==2.11.9', + 'junitparser==2.1.1', + 'rich==10.9.0', + 'pyyaml==6.0', +] + +[[linter]] +code = 'CLANGTIDY' +include_patterns = [ + 'multipy/runtime/**/*.h', + 'multipy/runtime/**/*.cpp', +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +init_command = [ + 'python3', + 'scripts/linters/s3_init.py', + '--config-json=scripts/linters/s3_init_config.json', + '--linter=clang-tidy', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=clang-tidy', +] +command = [ + 'python3', + 'scripts/linters/clangtidy_linter.py', + '--binary=.lintbin/clang-tidy', + '--build_dir=./build', + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'TYPEIGNORE' +include_patterns = ['**/*.py'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + '--pattern=# type:\s*ignore([^\[]|$)', + '--linter-name=TYPEIGNORE', + '--error-name=unqualified type: ignore', + """--error-description=\ + This line has an unqualified `type: ignore`; \ + please convert it to `type: ignore[xxxx]`\ + """, + '--', + '@{{PATHSFILE}}' +] + + +[[linter]] +code = 'CIRCLECI' +include_patterns=['.circleci/**'] +command = [ + 'python3', + 'scripts/linters/circleci_linter.py', + '--regen-script-working-dir=.circleci', + '--config-yml=.circleci/config.yml', + '--regen-script=generate_config_yml.py', +] + +[[linter]] +code = 'NEWLINE' +include_patterns=['**'] +exclude_patterns=[ + '.git/**', + '**/build/**', + '**/__pycache__/**', +] +command = [ + 'python3', + 'scripts/linters/newlines_linter.py', + '--', + '@{{PATHSFILE}}', +] +is_formatter = true + +[[linter]] +code = 'SPACES' +include_patterns = ['**'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + '--pattern=[[:blank:]]$', + '--linter-name=SPACES', + '--error-name=trailing spaces', + '--replace-pattern=s/[[:blank:]]+$//', + """--error-description=\ + This line has trailing spaces; please remove them.\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'TABS' +include_patterns = ['**'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + # @lint-ignore TXT2 + '--pattern= ', + '--linter-name=TABS', + '--error-name=saw some tabs', + '--replace-pattern=s/\t/ /', + """--error-description=\ + This line has tabs; please replace them with spaces.\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'INCLUDE' +include_patterns = [ + '**/*.cpp', + '**/*.h', +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + '--pattern=#include "', + '--linter-name=INCLUDE', + '--error-name=quoted include', + '--replace-pattern=s/#include "(.*)"$/#include <\1>/', + """--error-description=\ + This #include uses quotes; please convert it to #include \ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'PYBIND11_INCLUDE' +include_patterns = [ + '**/*.cpp', + '**/*.h', +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + '--pattern=#include ', + '--linter-name=PYBIND11_INCLUDE', + '--match-first-only', + '--error-name=direct include of pybind11', + # https://stackoverflow.com/a/33416489/23845 + # NB: this won't work if the pybind11 include is on the first line; + # but that's fine because it will just mean the lint will still fail + # after applying the change and you will have to fix it manually + '--replace-pattern=1,/(#include \n\1/', + """--error-description=\ + This #include directly includes pybind11 without also including \ + #include ; this means some important \ + specializations may not be included.\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'PYBIND11_SPECIALIZATION' +include_patterns = [ + '**/*.cpp', + '**/*.h', +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + '--pattern=PYBIND11_DECLARE_HOLDER_TYPE', + '--linter-name=PYBIND11_SPECIALIZATION', + '--error-name=pybind11 specialization in non-standard location', + """--error-description=\ + This pybind11 specialization (PYBIND11_DECLARE_HOLDER_TYPE) should \ + be placed in torch/csrc/utils/pybind.h so that it is guaranteed to be \ + included at any site that may potentially make use of it via py::cast. \ + If your specialization is in the same header file as the definition \ + of the holder type, you can ignore this lint by adding your header to \ + the exclude_patterns for this lint in .lintrunner.toml. For more \ + information see https://github.com/pybind/pybind11/issues/4099 \ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'PYPIDEP' +include_patterns = ['.github/**'] +exclude_patterns = [ + '**/*.rst', + '**/*.py', + '**/*.md', + '**/*.diff', +] +command = [ + 'python3', + 'scripts/linters/grep_linter.py', + """--pattern=\ + (pip|pip3|python -m pip|python3 -m pip|python3 -mpip|python -mpip) \ + install ([a-zA-Z0-9][A-Za-z0-9\\._\\-]+)([^/=<>~!]+)[A-Za-z0-9\\._\\-\\*\\+\\!]*$\ + """, + '--linter-name=PYPIDEP', + '--error-name=unpinned PyPI install', + """--error-description=\ + This line has unpinned PyPi installs; \ + please pin them to a specific version: e.g. 'thepackage==1.2'\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'EXEC' +include_patterns = ['**'] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/exec_linter.py', + '--', + '@{{PATHSFILE}}', +] + +[[linter]] +code = 'CMAKE' +include_patterns = [ + "**/*.cmake", + "**/*.cmake.in", + "**/CMakeLists.txt", +] +exclude_patterns = [ + '.git/**', + '**/build/**', + '**/__pycache__/**', + '**/third_party/**', + 'multipy/runtime/example/generated/**', +] +command = [ + 'python3', + 'scripts/linters/cmake_linter.py', + '--config=.cmakelintrc', + '--', + '@{{PATHSFILE}}', +] +init_command = [ + 'python3', + 'scripts/linters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'cmakelint==1.4.1', +] + +[[linter]] +code = 'SHELLCHECK' +include_patterns = [ + '.jenkins/pytorch/**/*.sh' +] +command = [ + 'python3', + 'scripts/linters/shellcheck_linter.py', + '--', + '@{{PATHSFILE}}', +] +init_command = [ + 'python3', + 'scripts/linters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'shellcheck-py==0.7.2.1', +] + +[[linter]] +code = 'ACTIONLINT' +include_patterns = [ + '.github/workflows/*.yml', + '.github/workflows/*.yaml', + # actionlint does not support composite actions yet + # '.github/actions/**/*.yml', + # '.github/actions/**/*.yaml', +] +command = [ + 'python3', + 'scripts/linters/actionlint_linter.py', + '--binary=.lintbin/actionlint', + '--', + '@{{PATHSFILE}}', +] +init_command = [ + 'python3', + 'scripts/linters/s3_init.py', + '--config-json=scripts/linters/s3_init_config.json', + '--linter=actionlint', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=actionlint', +] + +[[linter]] +code = 'TESTOWNERS' +include_patterns = [ + 'test/**/test_*.py', + 'test/**/*_test.py', +] +exclude_patterns = [ + 'test/run_test.py', +] +command = [ + 'python3', + 'scripts/linters/testowners_linter.py', + '--', + '@{{PATHSFILE}}', +] + +# Black + usort +[[linter]] +code = 'UFMT' +include_patterns = [ + '**/*.py', +] +command = [ + 'python3', + 'scripts/linters/ufmt_linter.py', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'scripts/linters/pip_init.py', + '--dry-run={{DRYRUN}}', + '--no-black-binary', + 'black==22.3.0', + 'ufmt==1.3.3', + 'usort==1.0.2', +] +is_formatter = true diff --git a/scripts/linters/README.md b/scripts/linters/README.md new file mode 100644 index 00000000..1475724a --- /dev/null +++ b/scripts/linters/README.md @@ -0,0 +1,10 @@ +# lintrunner adapters + +These files adapt our various linters to work with `lintrunner`. + +## Adding a new linter +1. init and linter +2. {{DRYRUN}} and {{PATHSFILE}} +3. never exit uncleanly +4. Communication protocol +5. Self-contained diff --git a/scripts/linters/__init__.py b/scripts/linters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/linters/actionlint_linter.py b/scripts/linters/actionlint_linter.py new file mode 100644 index 00000000..d9131b37 --- /dev/null +++ b/scripts/linters/actionlint_linter.py @@ -0,0 +1,155 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import re +import subprocess +import time +from enum import Enum +from typing import List, NamedTuple, Optional, Pattern + + +LINTER_CODE = "ACTIONLINT" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?P\d+): + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def check_file( + binary: str, + file: str, +) -> List[LintMessage]: + try: + proc = run_command([binary, file]) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + return [ + LintMessage( + path=match["file"], + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=int(match["char"]), + code=LINTER_CODE, + severity=LintSeverity.ERROR, + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(stdout) + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="actionlint runner", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--binary", + required=True, + help="actionlint binary path", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + if not os.path.exists(args.binary): + err_msg = LintMessage( + path="", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Could not find actionlint binary at {args.binary}," + " you may need to run `lintrunner init`." + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file, + args.binary, + filename, + ): filename + for filename in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise diff --git a/scripts/linters/black_linter.py b/scripts/linters/black_linter.py new file mode 100644 index 00000000..8459b6a1 --- /dev/null +++ b/scripts/linters/black_linter.py @@ -0,0 +1,228 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import subprocess +import sys +import time +from enum import Enum +from typing import Any, BinaryIO, List, NamedTuple, Optional + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def _run_command( + args: List[str], + *, + stdin: BinaryIO, + timeout: int, +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdin=stdin, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=IS_WINDOWS, # So batch scripts are found. + timeout=timeout, + check=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_command( + args: List[str], + *, + stdin: BinaryIO, + retries: int, + timeout: int, +) -> "subprocess.CompletedProcess[bytes]": + remaining_retries = retries + while True: + try: + return _run_command(args, stdin=stdin, timeout=timeout) + except subprocess.TimeoutExpired as err: + if remaining_retries == 0: + raise err + remaining_retries -= 1 + logging.warning( + "(%s/%s) Retrying because command failed with: %r", + retries - remaining_retries, + retries, + err, + ) + time.sleep(1) + + +def check_file( + filename: str, + retries: int, + timeout: int, +) -> List[LintMessage]: + try: + with open(filename, "rb") as f: + original = f.read() + with open(filename, "rb") as f: + proc = run_command( + [sys.executable, "-mblack", "--stdin-filename", filename, "-"], + stdin=f, + retries=retries, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="BLACK", + severity=LintSeverity.ERROR, + name="timeout", + original=None, + replacement=None, + description=( + "black timed out while trying to process a file. " + "Please report an issue in pytorch/pytorch with the " + "label 'module: lint'" + ), + ) + ] + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="BLACK", + severity=LintSeverity.ADVICE, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + ] + + replacement = proc.stdout + if original == replacement: + return [] + + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="BLACK", + severity=LintSeverity.WARNING, + name="format", + original=original.decode("utf-8"), + replacement=replacement.decode("utf-8"), + description="Run `lintrunner -a` to apply this patch.", + ) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Format files with black.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out black", + ) + parser.add_argument( + "--timeout", + default=90, + type=int, + help="seconds to wait for black", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit(check_file, x, args.retries, args.timeout): x + for x in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/circleci_linter.py b/scripts/linters/circleci_linter.py new file mode 100644 index 00000000..6200b383 --- /dev/null +++ b/scripts/linters/circleci_linter.py @@ -0,0 +1,159 @@ +""" +Checks that the configuration in .circleci/config.yml has been properly regenerated. +""" + +import argparse +import json +import logging +import os +import subprocess +import sys +import time +from enum import Enum +from typing import List, NamedTuple, Optional + + +CHECKED_IN_FILE = "config.yml" +REGENERATION_SCRIPT = "regenerate.sh" + +PARENT_DIR = os.path.basename(os.path.dirname(os.path.abspath(__file__))) +README_PATH = os.path.join(PARENT_DIR, "README.md") + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +IS_WINDOWS: bool = os.name == "nt" + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def run_command(args: List[str], cwd: str) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_check( + regen_script_working_dir: str, regen_script: str, config_file: str +) -> List[LintMessage]: + try: + proc = run_command(["python3", regen_script], regen_script_working_dir) + except Exception as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code="CIRCLECI", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + ] + + with open(config_file, mode="rb") as f: + config = f.read() + if proc.stdout == config: + return [] + + return [ + LintMessage( + path=config_file, + line=None, + char=None, + code="CIRCLECI", + severity=LintSeverity.ERROR, + name="config inconsistency", + original=config.decode("utf-8"), + replacement=proc.stdout.decode("utf-8"), + description=( + "The checked-in CircleCI config.yml file does not match what was generated by the scripts. " + "Re-run with '-a' to accept changes." + ), + ) + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="circleci consistency linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--config-yml", + required=True, + help="location of config.yml", + ) + parser.add_argument( + "--regen-script-working-dir", + required=True, + help="this script will chdir to this argument before running --regen-script", + ) + parser.add_argument( + "--regen-script", + required=True, + help="location of the config generation script, relative to --regen-script-working-dir", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET if args.verbose else logging.DEBUG, + stream=sys.stderr, + ) + + for lint_message in run_check( + args.regen_script_working_dir, args.regen_script, args.config_yml + ): + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/scripts/linters/clangformat_linter.py b/scripts/linters/clangformat_linter.py new file mode 100644 index 00000000..3445dee4 --- /dev/null +++ b/scripts/linters/clangformat_linter.py @@ -0,0 +1,249 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import subprocess +import sys +import time +from enum import Enum +from pathlib import Path +from typing import Any, List, NamedTuple, Optional + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def _run_command( + args: List[str], + *, + timeout: int, +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=IS_WINDOWS, # So batch scripts are found. + timeout=timeout, + check=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_command( + args: List[str], + *, + retries: int, + timeout: int, +) -> "subprocess.CompletedProcess[bytes]": + remaining_retries = retries + while True: + try: + return _run_command(args, timeout=timeout) + except subprocess.TimeoutExpired as err: + if remaining_retries == 0: + raise err + remaining_retries -= 1 + logging.warning( + "(%s/%s) Retrying because command failed with: %r", + retries - remaining_retries, + retries, + err, + ) + time.sleep(1) + + +def check_file( + filename: str, + binary: str, + retries: int, + timeout: int, +) -> List[LintMessage]: + try: + with open(filename, "rb") as f: + original = f.read() + proc = run_command( + [binary, filename], + retries=retries, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="CLANGFORMAT", + severity=LintSeverity.ERROR, + name="timeout", + original=None, + replacement=None, + description=( + "clang-format timed out while trying to process a file. " + "Please report an issue in pytorch/pytorch with the " + "label 'module: lint'" + ), + ) + ] + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="CLANGFORMAT", + severity=LintSeverity.ADVICE, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + ] + + replacement = proc.stdout + if original == replacement: + return [] + + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="CLANGFORMAT", + severity=LintSeverity.WARNING, + name="format", + original=original.decode("utf-8"), + replacement=replacement.decode("utf-8"), + description="See https://clang.llvm.org/docs/ClangFormat.html.\nRun `lintrunner -a` to apply this patch.", + ) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Format files with clang-format.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--binary", + required=True, + help="clang-format binary path", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out clang-format", + ) + parser.add_argument( + "--timeout", + default=90, + type=int, + help="seconds to wait for clang-format", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + binary = os.path.normpath(args.binary) if IS_WINDOWS else args.binary + if not Path(binary).exists(): + lint_message = LintMessage( + path=None, + line=None, + char=None, + code="CLANGFORMAT", + severity=LintSeverity.ERROR, + name="init-error", + original=None, + replacement=None, + description=( + f"Could not find clang-format binary at {binary}, " + "did you forget to run `lintrunner init`?" + ), + ) + print(json.dumps(lint_message._asdict()), flush=True) + sys.exit(0) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit(check_file, x, binary, args.retries, args.timeout): x + for x in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/clangtidy_linter.py b/scripts/linters/clangtidy_linter.py new file mode 100644 index 00000000..d7e19452 --- /dev/null +++ b/scripts/linters/clangtidy_linter.py @@ -0,0 +1,279 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import time +from enum import Enum +from pathlib import Path +from sysconfig import get_paths as gp +from typing import Any, List, NamedTuple, Optional, Pattern + +# PyTorch directory root +result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + stdout=subprocess.PIPE, + check=True, +) +PYTORCH_ROOT = result.stdout.decode("utf-8").strip() +IS_WINDOWS: bool = os.name == "nt" + +# Returns '/usr/local/include/python' +def get_python_include_dir() -> str: + return gp()["include"] + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +# c10/core/DispatchKey.cpp:281:26: error: 'k' used after it was moved [bugprone-use-after-move] +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?:(?P-?\d+):)? + \s(?P\S+?):? + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +# Severity is either "error" or "note": +# https://github.com/python/mypy/blob/8b47a032e1317fb8e3f9a818005a6b63e9bf0311/mypy/errors.py#L46-L47 +severities = { + "error": LintSeverity.ERROR, + "warning": LintSeverity.WARNING, +} + + +def clang_search_dirs() -> List[str]: + # Compilers are ordered based on fallback preference + # We pick the first one that is available on the system + compilers = ["clang", "gcc", "cpp", "cc"] + compilers = [c for c in compilers if shutil.which(c) is not None] + if len(compilers) == 0: + raise RuntimeError(f"None of {compilers} were found") + compiler = compilers[0] + + result = subprocess.run( + [compiler, "-E", "-x", "c++", "-", "-v"], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + stderr = result.stderr.decode().strip().split("\n") + search_start = r"#include.*search starts here:" + search_end = r"End of search list." + + append_path = False + search_paths = [] + for line in stderr: + if re.match(search_start, line): + if append_path: + continue + else: + append_path = True + elif re.match(search_end, line): + break + elif append_path: + search_paths.append(line.strip()) + + return search_paths + + +include_args = [] +include_dir = [ + "/usr/lib/llvm-11/include/openmp", + get_python_include_dir(), + os.path.join(PYTORCH_ROOT, "third_party/pybind11/include"), +] + clang_search_dirs() +for dir in include_dir: + include_args += ["--extra-arg", f"-I{dir}"] + + +def check_file( + filename: str, + binary: str, + build_dir: Path, +) -> List[LintMessage]: + try: + proc = run_command( + [binary, f"-p={build_dir}", *include_args, filename], + ) + except (OSError) as err: + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="CLANGTIDY", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + lint_messages = [] + try: + # Change the current working directory to the build directory, since + # clang-tidy will report files relative to the build directory. + saved_cwd = os.getcwd() + os.chdir(build_dir) + + for match in RESULTS_RE.finditer(proc.stdout.decode()): + # Convert the reported path to an absolute path. + abs_path = str(Path(match["file"]).resolve()) + message = LintMessage( + path=abs_path, + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=int(match["column"]) + if match["column"] is not None and not match["column"].startswith("-") + else None, + code="CLANGTIDY", + severity=severities.get(match["severity"], LintSeverity.ERROR), + original=None, + replacement=None, + ) + lint_messages.append(message) + finally: + os.chdir(saved_cwd) + + return lint_messages + + +def main() -> None: + parser = argparse.ArgumentParser( + description="clang-tidy wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--binary", + required=True, + help="clang-tidy binary path", + ) + parser.add_argument( + "--build_dir", + required=True, + help=( + "Where the compile_commands.json file is located. " + "Gets passed to clang-tidy -p" + ), + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + if not os.path.exists(args.binary): + err_msg = LintMessage( + path="", + line=None, + char=None, + code="CLANGTIDY", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Could not find clang-tidy binary at {args.binary}," + " you may need to run `lintrunner init`." + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + abs_build_dir = Path(args.build_dir).resolve() + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file, + filename, + args.binary, + abs_build_dir, + ): filename + for filename in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/cmake_linter.py b/scripts/linters/cmake_linter.py new file mode 100644 index 00000000..0847f561 --- /dev/null +++ b/scripts/linters/cmake_linter.py @@ -0,0 +1,139 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import re +import subprocess +import time +from enum import Enum +from typing import List, NamedTuple, Optional, Pattern + + +LINTER_CODE = "CMAKE" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +# CMakeLists.txt:901: Lines should be <= 80 characters long [linelength] +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def check_file( + filename: str, + config: str, +) -> List[LintMessage]: + try: + proc = run_command( + ["cmakelint", f"--config={config}", filename], + ) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + return [ + LintMessage( + path=match["file"], + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(stdout) + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="cmakelint runner", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--config", + required=True, + help="location of cmakelint config", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file, + filename, + args.config, + ): filename + for filename in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise diff --git a/scripts/linters/exec_linter.py b/scripts/linters/exec_linter.py new file mode 100644 index 00000000..f00dc60a --- /dev/null +++ b/scripts/linters/exec_linter.py @@ -0,0 +1,86 @@ +""" +EXEC: Ensure that source files are not executable. +""" +import argparse +import json +import logging +import os +import sys + +from enum import Enum +from typing import NamedTuple, Optional + +LINTER_CODE = "EXEC" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def check_file(filename: str) -> Optional[LintMessage]: + is_executable = os.access(filename, os.X_OK) + if is_executable: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="executable-permissions", + original=None, + replacement=None, + description="This file has executable permission; please remove it by using `chmod -x`.", + ) + return None + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="exec linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + lint_messages = [] + for filename in args.filenames: + lint_message = check_file(filename) + if lint_message is not None: + lint_messages.append(lint_message) + + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/scripts/linters/flake8_linter.py b/scripts/linters/flake8_linter.py new file mode 100644 index 00000000..26f8dd8e --- /dev/null +++ b/scripts/linters/flake8_linter.py @@ -0,0 +1,373 @@ +import argparse +import json +import logging +import os +import re +import subprocess +import sys +import time +from enum import Enum +from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +# fmt: off +# https://www.flake8rules.com/ +DOCUMENTED_IN_FLAKE8RULES: Set[str] = { + "E101", "E111", "E112", "E113", "E114", "E115", "E116", "E117", + "E121", "E122", "E123", "E124", "E125", "E126", "E127", "E128", "E129", + "E131", "E133", + "E201", "E202", "E203", + "E211", + "E221", "E222", "E223", "E224", "E225", "E226", "E227", "E228", + "E231", + "E241", "E242", + "E251", + "E261", "E262", "E265", "E266", + "E271", "E272", "E273", "E274", "E275", + "E301", "E302", "E303", "E304", "E305", "E306", + "E401", "E402", + "E501", "E502", + "E701", "E702", "E703", "E704", + "E711", "E712", "E713", "E714", + "E721", "E722", + "E731", + "E741", "E742", "E743", + "E901", "E902", "E999", + "W191", + "W291", "W292", "W293", + "W391", + "W503", "W504", + "W601", "W602", "W603", "W604", "W605", + "F401", "F402", "F403", "F404", "F405", + "F811", "F812", + "F821", "F822", "F823", + "F831", + "F841", + "F901", + "C901", +} + +# https://pypi.org/project/flake8-comprehensions/#rules +DOCUMENTED_IN_FLAKE8COMPREHENSIONS: Set[str] = { + "C400", "C401", "C402", "C403", "C404", "C405", "C406", "C407", "C408", "C409", + "C410", + "C411", "C412", "C413", "C413", "C414", "C415", "C416", +} + +# https://github.com/PyCQA/flake8-bugbear#list-of-warnings +DOCUMENTED_IN_BUGBEAR: Set[str] = { + "B001", "B002", "B003", "B004", "B005", "B006", "B007", "B008", "B009", "B010", + "B011", "B012", "B013", "B014", "B015", + "B301", "B302", "B303", "B304", "B305", "B306", + "B901", "B902", "B903", "B950", +} +# fmt: on + + +# stdin:2: W802 undefined name 'foo' +# stdin:3:6: T484 Name 'foo' is not defined +# stdin:3:-100: W605 invalid escape sequence '\/' +# stdin:3:1: E302 expected 2 blank lines, found 1 +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?:(?P-?\d+):)? + \s(?P\S+?):? + \s(?P.*) + $ + """ +) + + +def _test_results_re() -> None: + """ + >>> def t(s): return RESULTS_RE.search(s).groupdict() + + >>> t(r"file.py:80:1: E302 expected 2 blank lines, found 1") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '80', 'column': '1', 'code': 'E302', + 'message': 'expected 2 blank lines, found 1'} + + >>> t(r"file.py:7:1: P201: Resource `stdout` is acquired but not always released.") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '7', 'column': '1', 'code': 'P201', + 'message': 'Resource `stdout` is acquired but not always released.'} + + >>> t(r"file.py:8:-10: W605 invalid escape sequence '/'") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '8', 'column': '-10', 'code': 'W605', + 'message': "invalid escape sequence '/'"} + """ + pass + + +def _run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], +) -> "subprocess.CompletedProcess[str]": + logging.debug( + "$ %s", + " ".join( + ([f"{k}={v}" for (k, v) in extra_env.items()] if extra_env else []) + args + ), + ) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + encoding="utf-8", + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], + retries: int, +) -> "subprocess.CompletedProcess[str]": + remaining_retries = retries + while True: + try: + return _run_command(args, extra_env=extra_env) + except subprocess.CalledProcessError as err: + if remaining_retries == 0 or not re.match( + r"^ERROR:1:1: X000 linting with .+ timed out after \d+ seconds", + err.stdout, + ): + raise err + remaining_retries -= 1 + logging.warning( + "(%s/%s) Retrying because command failed with: %r", + retries - remaining_retries, + retries, + err, + ) + time.sleep(1) + + +def get_issue_severity(code: str) -> LintSeverity: + # "B901": `return x` inside a generator + # "B902": Invalid first argument to a method + # "B903": __slots__ efficiency + # "B950": Line too long + # "C4": Flake8 Comprehensions + # "C9": Cyclomatic complexity + # "E2": PEP8 horizontal whitespace "errors" + # "E3": PEP8 blank line "errors" + # "E5": PEP8 line length "errors" + # "F401": Name imported but unused + # "F403": Star imports used + # "F405": Name possibly from star imports + # "T400": type checking Notes + # "T49": internal type checker errors or unmatched messages + if any( + code.startswith(x) + for x in [ + "B9", + "C4", + "C9", + "E2", + "E3", + "E5", + "F401", + "F403", + "F405", + "T400", + "T49", + ] + ): + return LintSeverity.ADVICE + + # "F821": Undefined name + # "E999": syntax error + if any(code.startswith(x) for x in ["F821", "E999"]): + return LintSeverity.ERROR + + # "F": PyFlakes Error + # "B": flake8-bugbear Error + # "E": PEP8 "Error" + # "W": PEP8 Warning + # possibly other plugins... + return LintSeverity.WARNING + + +def get_issue_documentation_url(code: str) -> str: + if code in DOCUMENTED_IN_FLAKE8RULES: + return f"https://www.flake8rules.com/rules/{code}.html" + + if code in DOCUMENTED_IN_FLAKE8COMPREHENSIONS: + return "https://pypi.org/project/flake8-comprehensions/#rules" + + if code in DOCUMENTED_IN_BUGBEAR: + return "https://github.com/PyCQA/flake8-bugbear#list-of-warnings" + + return "" + + +def check_files( + filenames: List[str], + flake8_plugins_path: Optional[str], + severities: Dict[str, LintSeverity], + retries: int, +) -> List[LintMessage]: + try: + proc = run_command( + [sys.executable, "-mflake8", "--exit-zero"] + filenames, + extra_env={"FLAKE8_PLUGINS_PATH": flake8_plugins_path} + if flake8_plugins_path + else None, + retries=retries, + ) + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code="FLAKE8", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.strip() or "(empty)", + stdout=err.stdout.strip() or "(empty)", + ) + ), + ) + ] + + return [ + LintMessage( + path=match["file"], + name=match["code"], + description="{}\nSee {}".format( + match["message"], + get_issue_documentation_url(match["code"]), + ), + line=int(match["line"]), + char=int(match["column"]) + if match["column"] is not None and not match["column"].startswith("-") + else None, + code="FLAKE8", + severity=severities.get(match["code"]) or get_issue_severity(match["code"]), + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(proc.stdout) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Flake8 wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--flake8-plugins-path", + help="FLAKE8_PLUGINS_PATH env value", + ) + parser.add_argument( + "--severity", + action="append", + help="map code to severity (e.g. `B950:advice`)", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out flake8", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + flake8_plugins_path = ( + None + if args.flake8_plugins_path is None + else os.path.realpath(args.flake8_plugins_path) + ) + + severities: Dict[str, LintSeverity] = {} + if args.severity: + for severity in args.severity: + parts = severity.split(":", 1) + assert len(parts) == 2, f"invalid severity `{severity}`" + severities[parts[0]] = LintSeverity(parts[1]) + + lint_messages = check_files( + args.filenames, flake8_plugins_path, severities, args.retries + ) + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/generate_build_files.py b/scripts/linters/generate_build_files.py new file mode 100644 index 00000000..3986d3d2 --- /dev/null +++ b/scripts/linters/generate_build_files.py @@ -0,0 +1,74 @@ +import os +import subprocess +import sys +from typing import List + + +def run_cmd(cmd: List[str]) -> None: + print(f"Running: {cmd}") + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = ( + result.stdout.decode("utf-8").strip(), + result.stderr.decode("utf-8").strip(), + ) + print(stdout) + print(stderr) + if result.returncode != 0: + print(f"Failed to run {cmd}") + exit(1) + + +def run_timed_cmd(cmd: List[str]) -> None: + run_cmd(["time"] + cmd) + + +def update_submodules() -> None: + run_cmd(["git", "submodule", "update", "--init", "--recursive"]) + + +def gen_compile_commands() -> None: + os.environ["USE_NCCL"] = "0" + os.environ["CC"] = "clang" + os.environ["CXX"] = "clang++" + run_timed_cmd([sys.executable, "setup.py", "--cmake-only", "build"]) + + +def run_autogen() -> None: + run_timed_cmd( + [ + sys.executable, + "-m", + "torchgen.gen", + "-s", + "aten/src/ATen", + "-d", + "build/aten/src/ATen", + "--per-operator-headers", + ] + ) + + run_timed_cmd( + [ + sys.executable, + "tools/setup_helpers/generate_code.py", + "--native-functions-path", + "aten/src/ATen/native/native_functions.yaml", + "--tags-path", + "aten/src/ATen/native/tags.yaml", + "--gen_lazy_ts_backend", + ] + ) + + +def generate_build_files() -> None: + update_submodules() + gen_compile_commands() + run_autogen() + + +if __name__ == "__main__": + generate_build_files() diff --git a/scripts/linters/grep_linter.py b/scripts/linters/grep_linter.py new file mode 100644 index 00000000..f6bd714e --- /dev/null +++ b/scripts/linters/grep_linter.py @@ -0,0 +1,273 @@ +""" +Generic linter that greps for a pattern and optionally suggests replacements. +""" + +import argparse +import json +import logging +import os +import subprocess +import sys +import time +from enum import Enum +from typing import Any, List, NamedTuple, Optional + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def lint_file( + matching_line: str, + allowlist_pattern: str, + replace_pattern: str, + linter_name: str, + error_name: str, + error_description: str, +) -> Optional[LintMessage]: + # matching_line looks like: + # tools/linter/clangtidy_linter.py:13:import foo.bar.baz + split = matching_line.split(":") + filename = split[0] + + if allowlist_pattern: + try: + proc = run_command(["grep", "-nEHI", allowlist_pattern, filename]) + except Exception as err: + return LintMessage( + path=None, + line=None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + + # allowlist pattern was found, abort lint + if proc.returncode == 0: + return None + + original = None + replacement = None + if replace_pattern: + with open(filename, "r") as f: + original = f.read() + + try: + proc = run_command(["sed", "-r", replace_pattern, filename]) + replacement = proc.stdout.decode("utf-8") + except Exception as err: + return LintMessage( + path=None, + line=None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + + return LintMessage( + path=split[0], + line=int(split[1]) if len(split) > 1 else None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name=error_name, + original=original, + replacement=replacement, + description=error_description, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="grep wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--pattern", + required=True, + help="pattern to grep for", + ) + parser.add_argument( + "--allowlist-pattern", + help="if this pattern is true in the file, we don't grep for pattern", + ) + parser.add_argument( + "--linter-name", + required=True, + help="name of the linter", + ) + parser.add_argument( + "--match-first-only", + action="store_true", + help="only match the first hit in the file", + ) + parser.add_argument( + "--error-name", + required=True, + help="human-readable description of what the error is", + ) + parser.add_argument( + "--error-description", + required=True, + help="message to display when the pattern is found", + ) + parser.add_argument( + "--replace-pattern", + help=( + "the form of a pattern passed to `sed -r`. " + "If specified, this will become proposed replacement text." + ), + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + files_with_matches = [] + if args.match_first_only: + files_with_matches = ["--files-with-matches"] + + try: + proc = run_command( + ["grep", "-nEHI", *files_with_matches, args.pattern, *args.filenames] + ) + except Exception as err: + err_msg = LintMessage( + path=None, + line=None, + char=None, + code=args.linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + lines = proc.stdout.decode().splitlines() + for line in lines: + lint_message = lint_file( + line, + args.allowlist_pattern, + args.replace_pattern, + args.linter_name, + args.error_name, + args.error_description, + ) + if lint_message is not None: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/mypy_linter.py b/scripts/linters/mypy_linter.py new file mode 100644 index 00000000..65ee8850 --- /dev/null +++ b/scripts/linters/mypy_linter.py @@ -0,0 +1,191 @@ +import argparse +import json +import logging +import os +import re +import subprocess +import sys +import time +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, NamedTuple, Optional, Pattern + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +# tools/linter/flake8_linter.py:15:13: error: Incompatibl...int") [assignment] +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?:(?P-?\d+):)? + \s(?P\S+?):? + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], + retries: int, +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +# Severity is either "error" or "note": +# https://github.com/python/mypy/blob/8b47a032e1317fb8e3f9a818005a6b63e9bf0311/mypy/errors.py#L46-L47 +severities = { + "error": LintSeverity.ERROR, + "note": LintSeverity.ADVICE, +} + + +def check_files( + filenames: List[str], + config: str, + retries: int, +) -> List[LintMessage]: + try: + proc = run_command( + [sys.executable, "-mmypy", f"--config={config}"] + filenames, + extra_env={}, + retries=retries, + ) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code="MYPY", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + return [ + LintMessage( + path=match["file"], + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=int(match["column"]) + if match["column"] is not None and not match["column"].startswith("-") + else None, + code="MYPY", + severity=severities.get(match["severity"], LintSeverity.ERROR), + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(stdout) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="mypy wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out mypy", + ) + parser.add_argument( + "--config", + required=True, + help="path to an mypy .ini config file", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + # Use a dictionary here to preserve order. mypy cares about order, + # tragically, e.g. https://github.com/python/mypy/issues/2015 + filenames: Dict[str, bool] = {} + + # If a stub file exists, have mypy check it instead of the original file, in + # accordance with PEP-484 (see https://www.python.org/dev/peps/pep-0484/#stub-files) + for filename in args.filenames: + if filename.endswith(".pyi"): + filenames[filename] = True + continue + + stub_filename = filename.replace(".py", ".pyi") + if Path(stub_filename).exists(): + filenames[stub_filename] = True + else: + filenames[filename] = True + + lint_messages = check_files(list(filenames), args.config, args.retries) + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/nativefunctions_linter.py b/scripts/linters/nativefunctions_linter.py new file mode 100644 index 00000000..12a6c7e0 --- /dev/null +++ b/scripts/linters/nativefunctions_linter.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Verify that it is possible to round-trip native_functions.yaml via ruamel under some +configuration. Keeping native_functions.yaml consistent in this way allows us to +run codemods on the file using ruamel without introducing line noise. Note that we don't +want to normalize the YAML file, as that would to lots of spurious lint failures. Anything +that ruamel understands how to roundtrip, e.g., whitespace and comments, is OK! + +ruamel is a bit picky about inconsistent indentation, so you will have to indent your +file properly. Also, if you are working on changing the syntax of native_functions.yaml, +you may find that you want to use some format that is not what ruamel prefers. If so, +it is OK to modify this script (instead of reformatting native_functions.yaml)--the point +is simply to make sure that there is *some* configuration of ruamel that can round trip +the YAML, not to be prescriptive about it. +""" + +import argparse +import json +import sys +from enum import Enum +from io import StringIO +from typing import NamedTuple, Optional + +import ruamel.yaml # type: ignore[import] + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="native functions linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--native-functions-yml", + required=True, + help="location of native_functions.yaml", + ) + + args = parser.parse_args() + + with open(args.native_functions_yml) as f: + contents = f.read() + + yaml = ruamel.yaml.YAML() # type: ignore[attr-defined] + yaml.preserve_quotes = True # type: ignore[assignment] + yaml.width = 1000 # type: ignore[assignment] + yaml.boolean_representation = ["False", "True"] # type: ignore[attr-defined] + try: + r = yaml.load(contents) + except Exception as err: + msg = LintMessage( + path=None, + line=None, + char=None, + code="NATIVEFUNCTIONS", + severity=LintSeverity.ERROR, + name="YAML load failure", + original=None, + replacement=None, + description=f"Failed due to {err.__class__.__name__}:\n{err}", + ) + + print(json.dumps(msg._asdict()), flush=True) + sys.exit(0) + + # Cuz ruamel's author intentionally didn't include conversion to string + # https://stackoverflow.com/questions/47614862/best-way-to-use-ruamel-yaml-to-dump-to-string-not-to-stream + string_stream = StringIO() + yaml.dump(r, string_stream) + new_contents = string_stream.getvalue() + string_stream.close() + + if contents != new_contents: + msg = LintMessage( + path=args.native_functions_yml, + line=None, + char=None, + code="NATIVEFUNCTIONS", + severity=LintSeverity.ERROR, + name="roundtrip inconsistency", + original=contents, + replacement=new_contents, + description=( + "YAML roundtrip failed; run `lintrunner --take NATIVEFUNCTIONS -a` to apply the suggested changes. " + "If you think this is in error, please see tools/linter/adapters/nativefunctions_linter.py" + ), + ) + + print(json.dumps(msg._asdict()), flush=True) diff --git a/scripts/linters/newlines_linter.py b/scripts/linters/newlines_linter.py new file mode 100644 index 00000000..a2cb1c5c --- /dev/null +++ b/scripts/linters/newlines_linter.py @@ -0,0 +1,163 @@ +""" +NEWLINE: Checks files to make sure there are no trailing newlines. +""" +import argparse +import json +import logging +import sys + +from enum import Enum +from typing import List, NamedTuple, Optional + +NEWLINE = 10 # ASCII "\n" +CARRIAGE_RETURN = 13 # ASCII "\r" +LINTER_CODE = "NEWLINE" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def check_file(filename: str) -> Optional[LintMessage]: + logging.debug("Checking file %s", filename) + + with open(filename, "rb") as f: + lines = f.readlines() + + if len(lines) == 0: + # File is empty, just leave it alone. + return None + + if len(lines) == 1 and len(lines[0]) == 1: + # file is wrong whether or not the only byte is a newline + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="testestTrailing newline", + original=None, + replacement=None, + description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + + if len(lines[-1]) == 1 and lines[-1][0] == NEWLINE: + try: + original = b"".join(lines).decode("utf-8") + except Exception as err: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Decoding failure", + original=None, + replacement=None, + description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}", + ) + + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Trailing newline", + original=original, + replacement=original.rstrip("\n") + "\n", + description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + has_changes = False + original_lines: Optional[List[bytes]] = None + for idx, line in enumerate(lines): + if len(line) >= 2 and line[-1] == NEWLINE and line[-2] == CARRIAGE_RETURN: + if not has_changes: + original_lines = list(lines) + has_changes = True + lines[idx] = line[:-2] + b"\n" + + if has_changes: + try: + assert original_lines is not None + original = b"".join(original_lines).decode("utf-8") + replacement = b"".join(lines).decode("utf-8") + except Exception as err: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Decoding failure", + original=None, + replacement=None, + description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}", + ) + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="DOS newline", + original=original, + replacement=replacement, + description="DOS newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + + return None + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="native functions linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="location of native_functions.yaml", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + lint_messages = [] + for filename in args.filenames: + lint_message = check_file(filename) + if lint_message is not None: + lint_messages.append(lint_message) + + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/scripts/linters/pip_init.py b/scripts/linters/pip_init.py new file mode 100644 index 00000000..f177a920 --- /dev/null +++ b/scripts/linters/pip_init.py @@ -0,0 +1,83 @@ +""" +Initializer script that installs stuff to pip. +""" +import argparse +import logging +import os +import subprocess +import sys +import time + +from typing import List + + +def run_command(args: List[str]) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run(args, check=True) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="pip initializer") + parser.add_argument( + "packages", + nargs="+", + help="pip packages to install", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "--dry-run", help="do not install anything, just print what would be done." + ) + parser.add_argument( + "--no-black-binary", + help="do not use pre-compiled binaries from pip for black.", + action="store_true", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET if args.verbose else logging.DEBUG, + stream=sys.stderr, + ) + + pip_args = ["pip3", "install"] + + # If we are in a global install, use `--user` to install so that you do not + # need root access in order to initialize linters. + # + # However, `pip install --user` interacts poorly with virtualenvs (see: + # https://bit.ly/3vD4kvl) and conda (see: https://bit.ly/3KG7ZfU). So in + # these cases perform a regular installation. + in_conda = os.environ.get("CONDA_PREFIX") is not None + in_virtualenv = os.environ.get("VIRTUAL_ENV") is not None + if not in_conda and not in_virtualenv: + pip_args.append("--user") + + pip_args.extend(args.packages) + + for package in args.packages: + package_name, _, version = package.partition("=") + if version == "": + raise RuntimeError( + "Package {package_name} did not have a version specified. " + "Please specify a version to produce a consistent linting experience." + ) + if args.no_black_binary and "black" in package_name: + pip_args.append(f"--no-binary={package_name}") + + dry_run = args.dry_run == "1" + if dry_run: + print(f"Would have run: {pip_args}") + sys.exit(0) + + run_command(pip_args) diff --git a/scripts/linters/s3_init.py b/scripts/linters/s3_init.py new file mode 100644 index 00000000..65fcef4b --- /dev/null +++ b/scripts/linters/s3_init.py @@ -0,0 +1,213 @@ +import argparse +import hashlib +import json +import logging +import os +import platform +import stat +import subprocess +import sys +import textwrap +import urllib.error +import urllib.request +from pathlib import Path + +# String representing the host platform (e.g. Linux, Darwin). +HOST_PLATFORM = platform.system() + +# PyTorch directory root +try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + stdout=subprocess.PIPE, + check=True, + ) + PYTORCH_ROOT = result.stdout.decode("utf-8").strip() +except subprocess.CalledProcessError: + # If git is not installed, compute repo root as 3 folders up from this file + path_ = os.path.abspath(__file__) + for _ in range(4): + path_ = os.path.dirname(path_) + PYTORCH_ROOT = path_ + +DRY_RUN = False + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def report_download_progress( + chunk_number: int, chunk_size: int, file_size: int +) -> None: + """ + Pretty printer for file download progress. + """ + if file_size != -1: + percent = min(1, (chunk_number * chunk_size) / file_size) + bar = "#" * int(64 * percent) + sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) + + +def check(binary_path: Path, reference_hash: str) -> bool: + """Check whether the binary exists and is the right one. + + If there is hash difference, delete the actual binary. + """ + if not binary_path.exists(): + logging.info(f"{binary_path} does not exist.") + return False + + existing_binary_hash = compute_file_sha256(str(binary_path)) + if existing_binary_hash == reference_hash: + return True + + logging.warning( + textwrap.dedent( + f"""\ + Found binary hash does not match reference! + + Found hash: {existing_binary_hash} + Reference hash: {reference_hash} + + Deleting {binary_path} just to be safe. + """ + ) + ) + if DRY_RUN: + logging.critical( + "In dry run mode, so not actually deleting the binary. But consider deleting it ASAP!" + ) + return False + + try: + binary_path.unlink() + except OSError as e: + logging.critical(f"Failed to delete binary: {e}") + logging.critical( + "Delete this binary as soon as possible and do not execute it!" + ) + + return False + + +def download( + name: str, + output_dir: str, + url: str, + reference_bin_hash: str, +) -> bool: + """ + Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies + that it is the right binary by checking its SHA256 hash against the expected hash. + """ + # First check if we need to do anything + binary_path = Path(output_dir, name) + if check(binary_path, reference_bin_hash): + logging.info(f"Correct binary already exists at {binary_path}. Exiting.") + return True + + # Create the output folder + binary_path.parent.mkdir(parents=True, exist_ok=True) + + # Download the binary + logging.info(f"Downloading {url} to {binary_path}") + + if DRY_RUN: + logging.info("Exiting as there is nothing left to do in dry run mode") + return True + + urllib.request.urlretrieve( + url, + binary_path, + reporthook=report_download_progress if sys.stdout.isatty() else None, + ) + + logging.info(f"Downloaded {name} successfully.") + + # Check the downloaded binary + if not check(binary_path, reference_bin_hash): + logging.critical(f"Downloaded binary {name} failed its hash check") + return False + + # Ensure that exeuctable bits are set + mode = os.stat(binary_path).st_mode + mode |= stat.S_IXUSR + os.chmod(binary_path, mode) + + logging.info(f"Using {name} located at {binary_path}") + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="downloads and checks binaries from s3", + ) + parser.add_argument( + "--config-json", + required=True, + help="Path to config json that describes where to find binaries and hashes", + ) + parser.add_argument( + "--linter", + required=True, + help="Which linter to initialize from the config json", + ) + parser.add_argument( + "--output-dir", + required=True, + help="place to put the binary", + ) + parser.add_argument( + "--output-name", + required=True, + help="name of binary", + ) + parser.add_argument( + "--dry-run", + default=False, + help="do not download, just print what would be done", + ) + + args = parser.parse_args() + if args.dry_run == "0": + DRY_RUN = False + else: + DRY_RUN = True + + logging.basicConfig( + format="[DRY_RUN] %(levelname)s: %(message)s" + if DRY_RUN + else "%(levelname)s: %(message)s", + level=logging.INFO, + stream=sys.stderr, + ) + + config = json.load(open(args.config_json)) + config = config[args.linter] + + # If the host platform is not in platform_to_hash, it is unsupported. + if HOST_PLATFORM not in config: + logging.error(f"Unsupported platform: {HOST_PLATFORM}") + exit(1) + + url = config[HOST_PLATFORM]["download_url"] + hash = config[HOST_PLATFORM]["hash"] + + ok = download(args.output_name, args.output_dir, url, hash) + if not ok: + logging.critical(f"Unable to initialize {args.linter}") + sys.exit(1) diff --git a/scripts/linters/s3_init_config.json b/scripts/linters/s3_init_config.json new file mode 100644 index 00000000..0b0e87e8 --- /dev/null +++ b/scripts/linters/s3_init_config.json @@ -0,0 +1,38 @@ +{ + "HOW TO UPDATE THE BINARIES": [ + "Upload the new file to S3 under a new folder with the version number embedded in (see actionlint for an example).", + "(Don't override the old files, otherwise you'll break `lintrunner install` for anyone using an older commit of pytorch.)", + "'Hash' is the sha256 of the uploaded file.", + "Validate the new download url and hash by running 'lintrunner init' to pull the new binaries and then run 'lintrunner' to try linting the files." + ], + "clang-format": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/mac/clang-format-mojave", + "hash": "1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64", + "hash": "e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856" + } + }, + "clang-tidy": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy", + "hash": "541797a7b8fa795e2f3c1adcd8236cc336a40aa927028dc5bc79172e1d9eca36" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy", + "hash": "49343a448fcb75cd1e0fb9d6b1f6c2ef4b008b6f91d6ff899d4ac6060f5e52a5" + } + }, + "actionlint": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/actionlint/1.6.15/Darwin_amd64/actionlint", + "hash": "e9a0e0b17e54cfefe7964b6aa1da8921b1f8f2318c31c0eb1a17ea3e8ab10db2" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/actionlint/1.6.15/Linux_arm64/actionlint", + "hash": "d6b45ae67f29a2bf9ddd226071ddd8f158fdf2992e8515a06838e5fef90f3a2d" + } + } +} diff --git a/scripts/linters/shellcheck_linter.py b/scripts/linters/shellcheck_linter.py new file mode 100644 index 00000000..025595d3 --- /dev/null +++ b/scripts/linters/shellcheck_linter.py @@ -0,0 +1,118 @@ +import argparse +import json +import logging +import shutil +import subprocess +import time +from enum import Enum +from typing import List, NamedTuple, Optional + + +LINTER_CODE = "SHELLCHECK" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def check_files( + files: List[str], +) -> List[LintMessage]: + try: + proc = run_command( + ["shellcheck", "--external-sources", "--format=json1"] + files + ) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + results = json.loads(stdout)["comments"] + return [ + LintMessage( + path=result["file"], + name=f"SC{result['code']}", + description=result["message"], + line=result["line"], + char=result["column"], + code=LINTER_CODE, + severity=LintSeverity.ERROR, + original=None, + replacement=None, + ) + for result in results + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="shellcheck runner", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + if shutil.which("shellcheck") is None: + err_msg = LintMessage( + path="", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description="shellcheck is not installed, did you forget to run `lintrunner init`?", + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + args = parser.parse_args() + + lint_messages = check_files(args.filenames) + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/scripts/linters/testowners_linter.py b/scripts/linters/testowners_linter.py new file mode 100755 index 00000000..dfd5172a --- /dev/null +++ b/scripts/linters/testowners_linter.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +""" +Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232. + +This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder) +has valid ownership information in a comment header. Valid means: + - The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"] + - Each owner label actually exists in PyTorch + - Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS +""" +import argparse +import json +from enum import Enum +from typing import Any, List, NamedTuple, Optional +from urllib.request import urlopen + + +LINTER_CODE = "TESTOWNERS" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions +ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"] +OWNERS_PREFIX = "# Owner(s): " + + +def get_pytorch_labels() -> Any: + labels = ( + urlopen("https://ossci-metrics.s3.amazonaws.com/pytorch_labels.json") + .read() + .decode("utf-8") + ) + return json.loads(labels) + + +PYTORCH_LABELS = get_pytorch_labels() +# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions +ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"] +GLOB_EXCEPTIONS = ["**/test/run_test.py"] + + +def check_labels( + labels: List[str], filename: str, line_number: int +) -> List[LintMessage]: + lint_messages = [] + for label in labels: + if label not in PYTORCH_LABELS: + lint_messages.append( + LintMessage( + path=filename, + line=line_number, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="[invalid-label]", + original=None, + replacement=None, + description=( + f"{label} is not a PyTorch label " + "(please choose from https://github.com/pytorch/pytorch/labels)" + ), + ) + ) + + if ( + label.startswith("module:") + or label.startswith("oncall:") + or label in ACCEPTABLE_OWNER_LABELS + ): + continue + + lint_messages.append( + LintMessage( + path=filename, + line=line_number, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="[invalid-owner]", + original=None, + replacement=None, + description=( + f"{label} is not an acceptable owner " + "(please update to another label or edit ACCEPTABLE_OWNERS_LABELS " + "in tools/linters/adapters/testowners_linter.py" + ), + ) + ) + + return lint_messages + + +def check_file(filename: str) -> List[LintMessage]: + lint_messages = [] + has_ownership_info = False + + with open(filename) as f: + for idx, line in enumerate(f): + if not line.startswith(OWNERS_PREFIX): + continue + + has_ownership_info = True + labels = json.loads(line[len(OWNERS_PREFIX) :]) + lint_messages.extend(check_labels(labels, filename, idx + 1)) + + if has_ownership_info is False: + lint_messages.append( + LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="[no-owner-info]", + original=None, + replacement=None, + description="Missing a comment header with ownership information.", + ) + ) + + return lint_messages + + +def main() -> None: + parser = argparse.ArgumentParser( + description="test ownership linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + lint_messages = [] + + for filename in args.filenames: + lint_messages.extend(check_file(filename)) + + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/ufmt_linter.py b/scripts/linters/ufmt_linter.py new file mode 100644 index 00000000..7174e832 --- /dev/null +++ b/scripts/linters/ufmt_linter.py @@ -0,0 +1,141 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import sys +from enum import Enum +from pathlib import Path +from typing import Any, List, NamedTuple, Optional + +from ufmt.core import make_black_config, ufmt_string +from usort import Config as UsortConfig + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def format_error_message(filename: str, err: Exception) -> LintMessage: + return LintMessage( + path=filename, + line=None, + char=None, + code="UFMT", + severity=LintSeverity.ADVICE, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + + +def check_file( + filename: str, +) -> List[LintMessage]: + with open(filename, "rb") as f: + original = f.read().decode("utf-8") + + try: + path = Path(filename) + + usort_config = UsortConfig.find(path) + black_config = make_black_config(path) + + # Use UFMT API to call both usort and black + replacement = ufmt_string( + path=path, + content=original, + usort_config=usort_config, + black_config=black_config, + ) + + if original == replacement: + return [] + + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="UFMT", + severity=LintSeverity.WARNING, + name="format", + original=original, + replacement=replacement, + description="Run `lintrunner -a` to apply this patch.", + ) + ] + except Exception as err: + return [format_error_message(filename, err)] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Format files with ufmt (black + usort).", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = {executor.submit(check_file, x): x for x in args.filenames} + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/update_s3.py b/scripts/linters/update_s3.py new file mode 100644 index 00000000..5f19b472 --- /dev/null +++ b/scripts/linters/update_s3.py @@ -0,0 +1,95 @@ +"""Uploads a new binary to s3 and updates its hash in the config file. + +You'll need to have appropriate credentials on the PyTorch AWS buckets, see: +https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration +for how to configure them. +""" + +import argparse +import hashlib +import json +import logging +import os + +import boto3 # type: ignore[import] + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="s3 binary updater", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--config-json", + required=True, + help="path to config json that you are trying to update", + ) + parser.add_argument( + "--linter", + required=True, + help="name of linter you're trying to update", + ) + parser.add_argument( + "--platform", + required=True, + help="which platform you are uploading the binary for", + ) + parser.add_argument( + "--file", + required=True, + help="file to upload", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="if set, don't actually upload/write hash", + ) + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + config = json.load(open(args.config_json)) + linter_config = config[args.linter][args.platform] + bucket = linter_config["s3_bucket"] + object_name = linter_config["object_name"] + + # Upload the file + logging.info( + f"Uploading file {args.file} to s3 bucket: {bucket}, object name: {object_name}" + ) + if not args.dry_run: + s3_client = boto3.client("s3") + s3_client.upload_file(args.file, bucket, object_name) + + # Update hash in repo + hash_of_new_binary = compute_file_sha256(args.file) + logging.info(f"Computed new hash for binary {hash_of_new_binary}") + + linter_config["hash"] = hash_of_new_binary + config_dump = json.dumps(config, indent=4, sort_keys=True) + + logging.info("Writing out new config:") + logging.info(config_dump) + if not args.dry_run: + with open(args.config_json, "w") as f: + f.write(config_dump) + + +if __name__ == "__main__": + main() diff --git a/scripts/linters/workflow_consistency_linter.py b/scripts/linters/workflow_consistency_linter.py new file mode 100644 index 00000000..6e5fb4db --- /dev/null +++ b/scripts/linters/workflow_consistency_linter.py @@ -0,0 +1,115 @@ +"""Checks for consistency of jobs between different GitHub workflows. + +Any job with a specific `sync-tag` must match all other jobs with the same `sync-tag`. +""" +import argparse +import itertools +import json +from collections import defaultdict +from enum import Enum +from pathlib import Path +from typing import Any, Dict, Iterable, NamedTuple, Optional + +from yaml import CSafeLoader, dump, load + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def glob_yamls(path: Path) -> Iterable[Path]: + return itertools.chain(path.glob("**/*.yml"), path.glob("**/*.yaml")) + + +def load_yaml(path: Path) -> Any: + with open(path) as f: + return load(f, CSafeLoader) + + +def is_workflow(yaml: Any) -> bool: + return yaml.get("jobs") is not None + + +def print_lint_message(path: Path, job: Dict[str, Any], sync_tag: str) -> None: + job_id = list(job.keys())[0] + with open(path) as f: + lines = f.readlines() + for i, line in enumerate(lines): + if f"{job_id}:" in line: + line_number = i + 1 + + lint_message = LintMessage( + path=str(path), + line=line_number, + char=None, + code="WORKFLOWSYNC", + severity=LintSeverity.ERROR, + name="workflow-inconsistency", + original=None, + replacement=None, + description=f"Job doesn't match other jobs with sync-tag: '{sync_tag}'", + ) + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="workflow consistency linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + # Go through the provided files, aggregating jobs with the same sync tag + tag_to_jobs = defaultdict(list) + for path in args.filenames: + workflow = load_yaml(Path(path)) + jobs = workflow["jobs"] + for job_id, job in jobs.items(): + try: + sync_tag = job["with"]["sync-tag"] + except KeyError: + continue + + # remove the "if" field, which we allow to be different between jobs + # (since you might have different triggering conditions on pull vs. + # trunk, say.) + if "if" in job: + del job["if"] + + tag_to_jobs[sync_tag].append((path, {job_id: job})) + + # For each sync tag, check that all the jobs have the same code. + for sync_tag, path_and_jobs in tag_to_jobs.items(): + baseline_path, baseline_dict = path_and_jobs.pop() + baseline_str = dump(baseline_dict) + + printed_baseline = False + + for path, job_dict in path_and_jobs: + job_str = dump(job_dict) + if baseline_str != job_str: + print_lint_message(path, job_dict, sync_tag) + + if not printed_baseline: + print_lint_message(baseline_path, baseline_dict, sync_tag) + printed_baseline = True