diff --git a/changes.d/6552.fix.md b/changes.d/6552.fix.md new file mode 100644 index 00000000000..a05464a9091 --- /dev/null +++ b/changes.d/6552.fix.md @@ -0,0 +1 @@ +Fixes bugs where `cylc lint` would report problems with multiline strings or jinja2. \ No newline at end of file diff --git a/cylc/flow/lint/state.py b/cylc/flow/lint/state.py new file mode 100644 index 00000000000..a85dd63da52 --- /dev/null +++ b/cylc/flow/lint/state.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Cylc Linter state store object. +""" + +from dataclasses import dataclass +import re + + +@dataclass +class LinterState: + """A place to keep linter state""" + TRIPLE_QUOTES = re.compile(r'\'{3}|\"{3}') + JINJA2_START = re.compile(r'{%') + JINJA2_END = re.compile(r'%}') + NEW_SECTION_START = re.compile(r'^[^\[]*\[[^\[]') + is_metadata_section: bool = False + is_multiline_chunk: bool = False + is_jinja2_block: bool = False + jinja2_shebang: bool = False + line_no: int = 1 + + def skip_line(self, line): + """Is this a line we should skip, according to state we are holding + and the line content? + + TODO: Testme + """ + return any(( + self.skip_metatadata_desc(line), + self.skip_jinja2_block(line) + )) + + def skip_metatadata_desc(self, line): + """Should we skip this line because it's part of a metadata multiline + description section. + + TODO: Testme + """ + if '[meta]' in line: + self.is_metadata_section = True + elif self.is_metadata_section and self.is_end_of_meta_section(line): + self.is_metadata_section = False + + if self.is_metadata_section: + # Start quoted section if there is an odd number of triple quotes + # i.e. don't match """stuff""". + if len(self.TRIPLE_QUOTES.findall(line)) % 2 == 1: + self.is_multiline_chunk = not self.is_multiline_chunk + if self.is_multiline_chunk: + return True + + return False + + def skip_jinja2_block(self, line): + """Is this line part of a jinja2 block? + + TODO: Testme + """ + if self.jinja2_shebang: + if ( + self.JINJA2_START.findall(line) + and not self.JINJA2_END.findall(line) + ): + self.is_jinja2_block = True + elif self.is_jinja2_block and self.JINJA2_END.findall(line): + self.is_jinja2_block = False + return True + + return self.is_jinja2_block + + @staticmethod + def is_end_of_meta_section(line): + """Best tests I can think of for end of metadata section. + + Exists as separate function to improve documentation of what we + look for as the end of the meta section. + + Examples: + >>> this = LinterState.is_end_of_meta_section + >>> this('[scheduler]') # Likely right answer + True + >>> this('[garbage]') # Unreasonable, not worth guarding against + True + >>> this('') + False + >>> this(' ') + False + >>> this('{{NAME}}') + False + >>> this(' [[custom metadata subsection]]') + False + >>> this('[[custom metadata subsection]]') + False + >>> this('arbitrary crud') + False + """ + return bool(line and LinterState.NEW_SECTION_START.findall(line)) diff --git a/cylc/flow/scripts/lint.py b/cylc/flow/scripts/lint.py index 88f9a0ed5b8..2b22be4e5e7 100755 --- a/cylc/flow/scripts/lint.py +++ b/cylc/flow/scripts/lint.py @@ -49,6 +49,7 @@ rulesets = ['style', '728'] # Sets default rulesets to check max-line-length = 130 # Max line length for linting """ + import functools import pkgutil import re @@ -89,6 +90,7 @@ from cylc.flow.exceptions import CylcError from cylc.flow.id_cli import parse_id from cylc.flow.job_runner_mgr import JobRunnerManager +from cylc.flow.lint.state import LinterState from cylc.flow.loggingutil import set_timestamps from cylc.flow.option_parsers import ( WORKFLOW_ID_OR_PATH_ARG_DOC, @@ -1280,13 +1282,21 @@ def lint( The original file with added comments when `modify is True`. """ + state = LinterState() + # get the first line - line_no = 1 line = next(lines) + # A few bits of state # check if it is a jinja2 shebang - jinja_shebang = line.strip().lower() == JINJA2_SHEBANG + state.jinja2_shebang = line.strip().lower() == JINJA2_SHEBANG while True: + # Don't check extended text in metadata section. + if state.skip_line(line): + line = next(lines) + state.line_no += 1 + continue + # run lint checks against the current line for index, check_meta in checks.items(): # Skip commented line unless check says not to. @@ -1306,7 +1316,7 @@ def lint( check_meta['function'], check_meta=check_meta, file=file_rel, - jinja_shebang=jinja_shebang, + jinja_shebang=state.jinja2_shebang, ) else: # Just going to pass the line to the check function: @@ -1335,7 +1345,7 @@ def lint( # write a message to inform the user write(cparse( '' - f'[{index_str}] {file_rel}:{line_no}: {msg}' + f'[{index_str}] {file_rel}:{state.line_no}: {msg}' '' )) if modify: @@ -1347,7 +1357,7 @@ def lint( except StopIteration: # end of interator return - line_no += 1 + state.line_no += 1 def get_cylc_files( diff --git a/tests/unit/lint/test_state.py b/tests/unit/lint/test_state.py new file mode 100644 index 00000000000..64c52660ddd --- /dev/null +++ b/tests/unit/lint/test_state.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Test Cylc Linter state store object. +""" + +import pytest + +from cylc.flow.lint.state import LinterState + + +param = pytest.param + + +@pytest.mark.parametrize( + 'is_metadata_section, is_multiline_chunk, line,' + 'after_is_metadata_section, after_is_multiline_chunk, returns', + ( + param( + False, False, '[meta]', True, False, False, + id='start-meta-section' + ), + param( + True, False, '[garbage]', False, False, False, + id='end-meta-section' + ), + param( + True, False, '"""', True, True, True, + id='start-quoted-section' + ), + param( + True, True, '"""', True, False, False, + id='stop-quoted-section' + ), + param( + True, False, '"""Some Stuff"""', True, False, False, + id='dont-start-quoted-section' + ), + param( + True, True, 'defintly rubish', True, True, True, + id='should-be-ignored' + ), + ) +) +def test_skip_metadata_desc( + is_metadata_section, + is_multiline_chunk, + line, + after_is_metadata_section, + after_is_multiline_chunk, + returns +): + state = LinterState() + state.is_metadata_section = is_metadata_section + state.is_multiline_chunk = is_multiline_chunk + + assert state.skip_metatadata_desc(line) == returns + assert state.is_metadata_section == after_is_metadata_section + assert state.is_multiline_chunk == after_is_multiline_chunk + + +@pytest.mark.parametrize( + 'is_j2_block_before, line, is_j2_block_after, returns', + ( + param( + False, '{%', True, True, + id='block-starts' + ), + param( + False, '{% if something %}', False, False, + id='no-block-starts' + ), + param( + True, 'Anything Goes', True, True, + id='block-content' + ), + param( + True, '%}', False, True, + id='block-end' + ), + ) +) +def test_skip_jinja2_block( + is_j2_block_before, + line, + is_j2_block_after, + returns +): + state = LinterState() + state.jinja2_shebang = True + state.is_jinja2_block = is_j2_block_before + + assert state.skip_jinja2_block(line) == returns + assert state.is_jinja2_block == is_j2_block_after + + +@pytest.mark.parametrize( + 'line, expect', + ( + ('', False), + ('key=value', False), + ('# Comment', False), + ('Garbage', False), + (' indented', False), + ('[section]', True), + (' [section]', True), + ('[[subsection]]', False), + (' [[subsection]]', False), + ) +) +def test_NEW_SECTION_START(line, expect): + """It correctly identifies a new section, and doesn't + identify subsections.""" + exp = LinterState.NEW_SECTION_START + assert bool(exp.findall(line)) == expect diff --git a/tests/unit/scripts/test_lint.py b/tests/unit/scripts/test_lint.py index 42623bf6f3a..1f0f32561c0 100644 --- a/tests/unit/scripts/test_lint.py +++ b/tests/unit/scripts/test_lint.py @@ -268,6 +268,15 @@ def test_check_cylc_file_7to8_has_shebang(): assert not lint.counter +def test_check_ignores_jinja2_middle(): + """Only the badly indented section outside the jinja2 block should + return a lint.""" + lint = lint_text( + '#!jinja2\n{%\n [scheduler]\n%}\n [scheduler]', ['style'] + ) + len(lint.messages) == 1 + + def test_check_cylc_file_line_no(): """It prints the correct line numbers""" lint = lint_text(TEST_FILE, ['728'])