From b3c2ab707ff28f3cdfd1940d6957cbf759ee3d5a Mon Sep 17 00:00:00 2001 From: Tim Pillinger Date: Sat, 11 Jan 2025 11:24:06 +0000 Subject: [PATCH 1/4] Devise a tidy way to hold more state for Cylc lint fix linting --- cylc/flow/scripts/lint.py | 106 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 5 deletions(-) diff --git a/cylc/flow/scripts/lint.py b/cylc/flow/scripts/lint.py index 88f9a0ed5b..04bbd44625 100755 --- a/cylc/flow/scripts/lint.py +++ b/cylc/flow/scripts/lint.py @@ -49,6 +49,8 @@ rulesets = ['style', '728'] # Sets default rulesets to check max-line-length = 130 # Max line length for linting """ + +from dataclasses import dataclass import functools import pkgutil import re @@ -1251,6 +1253,92 @@ def no_qa(line: str, index: str): return False +@dataclass +class LinterState: + """A place to keep linter state""" + TRIPLE_QUOTES = re.compile(r'\'{3}|\"{3}') + JINJA2_START = re.compile(r'{%') + JINJA2_END = re.compile(r'%}') + NEW_SECTION_START = re.compile(r'^[^\[]*\[[^\[]') + is_metadata_section: bool = False + is_multiline_chunk: bool = False + is_jinja2_block: bool = False + jinja2_shebang: bool = False + line_no: int = 1 + + def skip_line(self, line): + """Is this a line we should skip, according to state we are holding + and the line content? + + TODO: Testme + """ + return any(( + self.skip_metatadata_desc(line), + self.skip_jinja2_block(line) + )) + + def skip_metatadata_desc(self, line): + """Should we skip this line because it's part of a metadata multiline + description section. + + TODO: Testme + """ + if '[meta]' in line: + self.is_metadata_section = True + elif self.is_metadata_section and self.is_end_of_meta_section(line): + self.is_metadata_section = False + + if self.is_metadata_section: + if self.TRIPLE_QUOTES.findall(line): + self.is_multiline_chunk = not self.is_multiline_chunk + if self.is_multiline_chunk: + return True + + return False + + def skip_jinja2_block(self, line): + """Is this line part of a jinja2 block? + + TODO: Testme + """ + if self.jinja2_shebang: + if ( + self.JINJA2_START.findall(line) + and not self.JINJA2_END.findall(line) + ): + self.is_jinja2_block = True + elif self.is_jinja2_block and self.JINJA2_END.findall(line): + self.is_jinja2_block = False + return True + + return self.is_jinja2_block + + @staticmethod + def is_end_of_meta_section(line): + """Best tests I can think of for end of metadata section. + + Examples: + >>> this = LinterState.is_end_of_meta_section + >>> this('[scheduler]') # Likely right answer + True + >>> this('[garbage]') # Unreasonable, not worth guarding against + True + >>> this('') + False + >>> this(' ') + False + >>> this('{{NAME}}') + False + >>> this(' [[custom metadata subsection]]') + False + >>> this('[[custom metadata subsection]]') + False + >>> this('arbitrary crud') + False + """ + return line and LinterState.NEW_SECTION_START.findall(line) + + def lint( file_rel: Path, lines: Iterator[str], @@ -1280,13 +1368,21 @@ def lint( The original file with added comments when `modify is True`. """ + state = LinterState() + # get the first line - line_no = 1 line = next(lines) + # A few bits of state # check if it is a jinja2 shebang - jinja_shebang = line.strip().lower() == JINJA2_SHEBANG + state.jinja2_shebang = line.strip().lower() == JINJA2_SHEBANG while True: + # Don't check extended text in metadata section. + if state.skip_line(line): + line = next(lines) + state.line_no += 1 + continue + # run lint checks against the current line for index, check_meta in checks.items(): # Skip commented line unless check says not to. @@ -1306,7 +1402,7 @@ def lint( check_meta['function'], check_meta=check_meta, file=file_rel, - jinja_shebang=jinja_shebang, + jinja_shebang=state.jinja2_shebang, ) else: # Just going to pass the line to the check function: @@ -1335,7 +1431,7 @@ def lint( # write a message to inform the user write(cparse( '' - f'[{index_str}] {file_rel}:{line_no}: {msg}' + f'[{index_str}] {file_rel}:{state.line_no}: {msg}' '' )) if modify: @@ -1347,7 +1443,7 @@ def lint( except StopIteration: # end of interator return - line_no += 1 + state.line_no += 1 def get_cylc_files( From b784135e0f1f847466d7241c90933e2904e58e6a Mon Sep 17 00:00:00 2001 From: Tim Pillinger <26465611+wxtim@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:05:52 +0000 Subject: [PATCH 2/4] move LinterState to it's own directory --- cylc/flow/lint/state.py | 110 ++++++++++++++++++++++++++++++++++++++ cylc/flow/scripts/lint.py | 88 +----------------------------- 2 files changed, 111 insertions(+), 87 deletions(-) create mode 100644 cylc/flow/lint/state.py diff --git a/cylc/flow/lint/state.py b/cylc/flow/lint/state.py new file mode 100644 index 0000000000..fb9d08c240 --- /dev/null +++ b/cylc/flow/lint/state.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Cylc Linter state store object. +""" + +from dataclasses import dataclass +import re + + +@dataclass +class LinterState: + """A place to keep linter state""" + TRIPLE_QUOTES = re.compile(r'\'{3}|\"{3}') + JINJA2_START = re.compile(r'{%') + JINJA2_END = re.compile(r'%}') + NEW_SECTION_START = re.compile(r'^[^\[]*\[[^\[]') + is_metadata_section: bool = False + is_multiline_chunk: bool = False + is_jinja2_block: bool = False + jinja2_shebang: bool = False + line_no: int = 1 + + def skip_line(self, line): + """Is this a line we should skip, according to state we are holding + and the line content? + + TODO: Testme + """ + return any(( + self.skip_metatadata_desc(line), + self.skip_jinja2_block(line) + )) + + def skip_metatadata_desc(self, line): + """Should we skip this line because it's part of a metadata multiline + description section. + + TODO: Testme + """ + if '[meta]' in line: + self.is_metadata_section = True + elif self.is_metadata_section and self.is_end_of_meta_section(line): + self.is_metadata_section = False + + if self.is_metadata_section: + if self.TRIPLE_QUOTES.findall(line): + self.is_multiline_chunk = not self.is_multiline_chunk + if self.is_multiline_chunk: + return True + + return False + + def skip_jinja2_block(self, line): + """Is this line part of a jinja2 block? + + TODO: Testme + """ + if self.jinja2_shebang: + if ( + self.JINJA2_START.findall(line) + and not self.JINJA2_END.findall(line) + ): + self.is_jinja2_block = True + elif self.is_jinja2_block and self.JINJA2_END.findall(line): + self.is_jinja2_block = False + return True + + return self.is_jinja2_block + + @staticmethod + def is_end_of_meta_section(line): + """Best tests I can think of for end of metadata section. + + Exists as separate function to improve documentation of what we + look for as the end of the meta section. + + Examples: + >>> this = LinterState.is_end_of_meta_section + >>> this('[scheduler]') # Likely right answer + True + >>> this('[garbage]') # Unreasonable, not worth guarding against + True + >>> this('') + False + >>> this(' ') + False + >>> this('{{NAME}}') + False + >>> this(' [[custom metadata subsection]]') + False + >>> this('[[custom metadata subsection]]') + False + >>> this('arbitrary crud') + False + """ + return bool(line and LinterState.NEW_SECTION_START.findall(line)) diff --git a/cylc/flow/scripts/lint.py b/cylc/flow/scripts/lint.py index 04bbd44625..2b22be4e5e 100755 --- a/cylc/flow/scripts/lint.py +++ b/cylc/flow/scripts/lint.py @@ -50,7 +50,6 @@ max-line-length = 130 # Max line length for linting """ -from dataclasses import dataclass import functools import pkgutil import re @@ -91,6 +90,7 @@ from cylc.flow.exceptions import CylcError from cylc.flow.id_cli import parse_id from cylc.flow.job_runner_mgr import JobRunnerManager +from cylc.flow.lint.state import LinterState from cylc.flow.loggingutil import set_timestamps from cylc.flow.option_parsers import ( WORKFLOW_ID_OR_PATH_ARG_DOC, @@ -1253,92 +1253,6 @@ def no_qa(line: str, index: str): return False -@dataclass -class LinterState: - """A place to keep linter state""" - TRIPLE_QUOTES = re.compile(r'\'{3}|\"{3}') - JINJA2_START = re.compile(r'{%') - JINJA2_END = re.compile(r'%}') - NEW_SECTION_START = re.compile(r'^[^\[]*\[[^\[]') - is_metadata_section: bool = False - is_multiline_chunk: bool = False - is_jinja2_block: bool = False - jinja2_shebang: bool = False - line_no: int = 1 - - def skip_line(self, line): - """Is this a line we should skip, according to state we are holding - and the line content? - - TODO: Testme - """ - return any(( - self.skip_metatadata_desc(line), - self.skip_jinja2_block(line) - )) - - def skip_metatadata_desc(self, line): - """Should we skip this line because it's part of a metadata multiline - description section. - - TODO: Testme - """ - if '[meta]' in line: - self.is_metadata_section = True - elif self.is_metadata_section and self.is_end_of_meta_section(line): - self.is_metadata_section = False - - if self.is_metadata_section: - if self.TRIPLE_QUOTES.findall(line): - self.is_multiline_chunk = not self.is_multiline_chunk - if self.is_multiline_chunk: - return True - - return False - - def skip_jinja2_block(self, line): - """Is this line part of a jinja2 block? - - TODO: Testme - """ - if self.jinja2_shebang: - if ( - self.JINJA2_START.findall(line) - and not self.JINJA2_END.findall(line) - ): - self.is_jinja2_block = True - elif self.is_jinja2_block and self.JINJA2_END.findall(line): - self.is_jinja2_block = False - return True - - return self.is_jinja2_block - - @staticmethod - def is_end_of_meta_section(line): - """Best tests I can think of for end of metadata section. - - Examples: - >>> this = LinterState.is_end_of_meta_section - >>> this('[scheduler]') # Likely right answer - True - >>> this('[garbage]') # Unreasonable, not worth guarding against - True - >>> this('') - False - >>> this(' ') - False - >>> this('{{NAME}}') - False - >>> this(' [[custom metadata subsection]]') - False - >>> this('[[custom metadata subsection]]') - False - >>> this('arbitrary crud') - False - """ - return line and LinterState.NEW_SECTION_START.findall(line) - - def lint( file_rel: Path, lines: Iterator[str], From 7437cfab4662f255d735eab540f58ca3cf3cbfa8 Mon Sep 17 00:00:00 2001 From: Tim Pillinger <26465611+wxtim@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:58:03 +0000 Subject: [PATCH 3/4] added tests for linterstate object --- changes.d/6552.fix.md | 1 + cylc/flow/lint/state.py | 4 +- tests/unit/lint/test_state.py | 128 ++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 changes.d/6552.fix.md create mode 100644 tests/unit/lint/test_state.py diff --git a/changes.d/6552.fix.md b/changes.d/6552.fix.md new file mode 100644 index 0000000000..a05464a909 --- /dev/null +++ b/changes.d/6552.fix.md @@ -0,0 +1 @@ +Fixes bugs where `cylc lint` would report problems with multiline strings or jinja2. \ No newline at end of file diff --git a/cylc/flow/lint/state.py b/cylc/flow/lint/state.py index fb9d08c240..a85dd63da5 100644 --- a/cylc/flow/lint/state.py +++ b/cylc/flow/lint/state.py @@ -57,7 +57,9 @@ def skip_metatadata_desc(self, line): self.is_metadata_section = False if self.is_metadata_section: - if self.TRIPLE_QUOTES.findall(line): + # Start quoted section if there is an odd number of triple quotes + # i.e. don't match """stuff""". + if len(self.TRIPLE_QUOTES.findall(line)) % 2 == 1: self.is_multiline_chunk = not self.is_multiline_chunk if self.is_multiline_chunk: return True diff --git a/tests/unit/lint/test_state.py b/tests/unit/lint/test_state.py new file mode 100644 index 0000000000..64c52660dd --- /dev/null +++ b/tests/unit/lint/test_state.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +"""Test Cylc Linter state store object. +""" + +import pytest + +from cylc.flow.lint.state import LinterState + + +param = pytest.param + + +@pytest.mark.parametrize( + 'is_metadata_section, is_multiline_chunk, line,' + 'after_is_metadata_section, after_is_multiline_chunk, returns', + ( + param( + False, False, '[meta]', True, False, False, + id='start-meta-section' + ), + param( + True, False, '[garbage]', False, False, False, + id='end-meta-section' + ), + param( + True, False, '"""', True, True, True, + id='start-quoted-section' + ), + param( + True, True, '"""', True, False, False, + id='stop-quoted-section' + ), + param( + True, False, '"""Some Stuff"""', True, False, False, + id='dont-start-quoted-section' + ), + param( + True, True, 'defintly rubish', True, True, True, + id='should-be-ignored' + ), + ) +) +def test_skip_metadata_desc( + is_metadata_section, + is_multiline_chunk, + line, + after_is_metadata_section, + after_is_multiline_chunk, + returns +): + state = LinterState() + state.is_metadata_section = is_metadata_section + state.is_multiline_chunk = is_multiline_chunk + + assert state.skip_metatadata_desc(line) == returns + assert state.is_metadata_section == after_is_metadata_section + assert state.is_multiline_chunk == after_is_multiline_chunk + + +@pytest.mark.parametrize( + 'is_j2_block_before, line, is_j2_block_after, returns', + ( + param( + False, '{%', True, True, + id='block-starts' + ), + param( + False, '{% if something %}', False, False, + id='no-block-starts' + ), + param( + True, 'Anything Goes', True, True, + id='block-content' + ), + param( + True, '%}', False, True, + id='block-end' + ), + ) +) +def test_skip_jinja2_block( + is_j2_block_before, + line, + is_j2_block_after, + returns +): + state = LinterState() + state.jinja2_shebang = True + state.is_jinja2_block = is_j2_block_before + + assert state.skip_jinja2_block(line) == returns + assert state.is_jinja2_block == is_j2_block_after + + +@pytest.mark.parametrize( + 'line, expect', + ( + ('', False), + ('key=value', False), + ('# Comment', False), + ('Garbage', False), + (' indented', False), + ('[section]', True), + (' [section]', True), + ('[[subsection]]', False), + (' [[subsection]]', False), + ) +) +def test_NEW_SECTION_START(line, expect): + """It correctly identifies a new section, and doesn't + identify subsections.""" + exp = LinterState.NEW_SECTION_START + assert bool(exp.findall(line)) == expect From d385f67c712b02b9deb31c898d9b63ea1b1653ee Mon Sep 17 00:00:00 2001 From: Tim Pillinger Date: Tue, 21 Jan 2025 13:47:38 +0000 Subject: [PATCH 4/4] fix coverage --- tests/unit/scripts/test_lint.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/unit/scripts/test_lint.py b/tests/unit/scripts/test_lint.py index 42623bf6f3..1f0f32561c 100644 --- a/tests/unit/scripts/test_lint.py +++ b/tests/unit/scripts/test_lint.py @@ -268,6 +268,15 @@ def test_check_cylc_file_7to8_has_shebang(): assert not lint.counter +def test_check_ignores_jinja2_middle(): + """Only the badly indented section outside the jinja2 block should + return a lint.""" + lint = lint_text( + '#!jinja2\n{%\n [scheduler]\n%}\n [scheduler]', ['style'] + ) + len(lint.messages) == 1 + + def test_check_cylc_file_line_no(): """It prints the correct line numbers""" lint = lint_text(TEST_FILE, ['728'])