From 40f5eb4728e54368f88373ddb937f1f28aa60991 Mon Sep 17 00:00:00 2001 From: joncrall Date: Sat, 12 Oct 2024 11:55:22 -0400 Subject: [PATCH] Update util yaml and git url helper --- requirements/runtime.txt | 7 +- xcookie/builders/github_actions.py | 2 +- xcookie/main.py | 97 ++++++---- xcookie/util_yaml.py | 286 ++++++++++++++++++++++++++--- 4 files changed, 334 insertions(+), 58 deletions(-) diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 1d34236..be2e66c 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -22,8 +22,11 @@ black>=22.1.0 ; python_version >= '3.6.2' # Pyt packaging>=21.3 -PyYAML>=6.0 -ruamel.yaml >= 0.17.21 +# PyYAML>=6.0 +# ruamel.yaml >= 0.17.21 +ruamel.yaml>=0.17.22 +PyYAML>=6.0.1 ; python_version < '4.0' and python_version >= '3.12' # Python 3.12+ +PyYAML>=6.0 ; python_version < '3.12' # Python 3.6 - 3.11 argcomplete>=1.0 diff --git a/xcookie/builders/github_actions.py b/xcookie/builders/github_actions.py index a1e9dbe..643f025 100644 --- a/xcookie/builders/github_actions.py +++ b/xcookie/builders/github_actions.py @@ -101,7 +101,7 @@ def codecov_action(cls, *args, **kwargs): https://github.com/codecov/codecov-action """ return cls.action({ - 'uses': 'codecov/codecov-action@v4.0.1' + 'uses': 'codecov/codecov-action@v4.5.0' }, *args, **kwargs) @classmethod diff --git a/xcookie/main.py b/xcookie/main.py index 977f44c..a542db1 100755 --- a/xcookie/main.py +++ b/xcookie/main.py @@ -1560,38 +1560,30 @@ class GitURL(str): Represents a url to a git repo and can parse info about / modify the protocol + References: + https://git-scm.com/docs/git-clone#_git_urls + TODO: can use git-well as a helper here. + CommandLine: + xdoctest -m /home/joncrall/code/xcookie/xcookie/main.py GitURL + xdoctest -m xcookie.main GitURL + Example: - >>> from git_well.git_remote_protocol import * # NOQA - >>> url1 = GitURL('https://foo.bar/user/repo.git') - >>> url2 = GitURL('git@foo.bar:group/repo.git') - >>> print(url1.to_git()) - >>> print(url1.to_https()) - >>> print(url2.to_git()) - >>> print(url2.to_https()) - git@foo.bar:user/repo.git - https://foo.bar/user/repo.git - git@foo.bar:group/repo.git - https://foo.bar/group/repo.git - >>> print(ub.urepr(url1.info)) - >>> print(ub.urepr(url2.info)) - { - 'host': 'foo.bar', - 'group': 'user', - 'repo_name': 'repo.git', - 'user': None, - 'protocol': 'https', - 'url': 'https://foo.bar/user/repo.git', - } - { - 'host': 'foo.bar', - 'group': 'group', - 'repo_name': 'repo.git', - 'user': 'git', - 'protocol': 'git', - 'url': 'git@foo.bar:group/repo.git', - } + >>> urls = [ + >>> GitURL('https://foo.bar/user/repo.git'), + >>> GitURL('ssh://foo.bar/user/repo.git'), + >>> GitURL('ssh://git@foo.bar/user/repo.git'), + >>> GitURL('git@foo.bar:group/repo.git'), + >>> GitURL('host:path/to/my/repo/.git'), + >>> ] + >>> for url in urls: + >>> print('---') + >>> print(f'url = {url}') + >>> print(ub.urepr(url.info)) + >>> print('As git : ' + url.to_git()) + >>> print('As ssh : ' + url.to_ssh()) + >>> print('As https : ' + url.to_https()) """ @@ -1599,24 +1591,56 @@ def __init__(self, data): # note: inheriting from str so data is handled in __new__ self._info = None + def _parse(self): + import parse + parse.Parser('ssh://{user}') + @property def info(self): if self._info is None: url = self info = {} if url.startswith('https://'): - parts = url.split('https://')[1].split('/') + parts = url.split('https://')[1].split('/', 3) info['host'] = parts[0] info['group'] = parts[1] info['repo_name'] = parts[2] + info['user'] = None info['protocol'] = 'https' elif url.startswith('git@'): - url.split('git@')[1] parts = url.split('git@')[1].split(':') info['host'] = parts[0] info['group'] = parts[1].split('/')[0] info['repo_name'] = parts[1].split('/')[1] + info['user'] = 'git' info['protocol'] = 'git' + elif url.startswith('ssh://'): + parts = url.split('ssh://')[1].split('/', 3) + user = None + if '@' in parts[0]: + user, host = parts[0].split('@') + else: + host = parts[0] + info['host'] = host + info['user'] = user + info['group'] = parts[1] + info['repo_name'] = parts[2] + info['protocol'] = 'ssh' + elif url.endswith('/.git'): + # An ssh protocol to an explicit directory + host, rest = url.split(':', 1) + parts = rest.rsplit('/', 2) + info['host'] = host + info['group'] = parts[0] + # info['group'] = '' + info['repo_name'] = parts[1] + '/.git' + info['protocol'] = 'scp' + elif '//' not in url and '@' not in url: + parts = url.split(':') + info['host'] = parts[0] + info['group'] = parts[1].split('/')[0] + info['repo_name'] = parts[1].split('/')[1] + info['protocol'] = 'ssh' else: raise ValueError(url) info['url'] = url @@ -1628,11 +1652,22 @@ def to_git(self): new_url = 'git@' + info['host'] + ':' + info['group'] + '/' + info['repo_name'] return self.__class__(new_url) + def to_ssh(self): + info = self.info + user = info.get('user', None) + if user is None: + user_part = '' + else: + user_part = user + '@' + new_url = 'ssh://' + user_part + info['host'] + '/' + info['group'] + '/' + info['repo_name'] + return self.__class__(new_url) + def to_https(self): info = self.info new_url = 'https://' + info['host'] + '/' + info['group'] + '/' + info['repo_name'] return self.__class__(new_url) + if __name__ == '__main__': """ CommandLine: diff --git a/xcookie/util_yaml.py b/xcookie/util_yaml.py index eb6461d..39ba9e2 100644 --- a/xcookie/util_yaml.py +++ b/xcookie/util_yaml.py @@ -1,11 +1,35 @@ """ -Vendored from kwutil.util_yaml +Wrappers around :mod:`pyyaml` or :mod:`ruamel.yaml`. + + +The important functions to know are: + +* :func:`Yaml.loads` + +* :func:`Yaml.dumps` + +* :func:`Yaml.coerce` + +Loads and Dumps are strightforward. Loads takes a block of text and passes it +through the ruamel.yaml or pyyaml to parse the string. Dumps takes a data +structure and turns it into a YAML string. Roundtripping is supported with the +ruamel.yaml backend. + +Coerce will accept input as a non-string data structure, and simply return it, +a path to a file, or a string which it assumes is YAML text (note: there is a +small ambiguity introduced here). If coerce encounters a string that looks like +an existing path it reads it. This does not happen by default in longer YAML +text inputs, but the parser does respect a !include constructor, which does let +you make nested configs by pointing to other configs. """ import io import os import ubelt as ub +NEW_RUAMEL = 1 + + class _YamlRepresenter: @staticmethod @@ -21,15 +45,18 @@ def str_presenter(dumper, data): @ub.memoize def _custom_ruaml_loader(): """ + old method + References: https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another + https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0 """ import ruamel.yaml Loader = ruamel.yaml.RoundTripLoader def _construct_include_tag(self, node): - print(f'node={node}') + # print(f'node={node}') if isinstance(node.value, list): return [Yaml.coerce(v.value) for v in node.value] else: @@ -70,9 +97,106 @@ class Dumper(yaml.Dumper): return Dumper +# @ub.memoize +def _custom_new_ruaml_yaml_obj(): + """ + new method + + References: + https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags + https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another + https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0 + + Example: + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) + >>> # Test new load + >>> import io + >>> file = io.StringIO('[a, b, c]') + >>> yaml_obj = _custom_new_ruaml_yaml_obj() + >>> data = yaml_obj.load(file) + >>> print(data) + >>> # Test round trip tump + >>> file = io.StringIO() + >>> yaml_obj.dump(data, file) + >>> print(file.getvalue()) + >>> # + >>> # Test new dump + >>> data2 = ub.udict(a=1, b=2) + >>> file = io.StringIO() + >>> yaml_obj.dump(data2, file) + >>> print(file.getvalue()) + """ + import ruamel.yaml + from collections import Counter, OrderedDict, defaultdict + + # make a new instance, although you could get the YAML + # instance from the constructor argument + class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor): + ... + + class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter): + ... + + CustomRepresenter.add_representer(str, _YamlRepresenter.str_presenter) + CustomRepresenter.add_representer(ub.udict, CustomRepresenter.represent_dict) + CustomRepresenter.add_representer(Counter, CustomRepresenter.represent_dict) + CustomRepresenter.add_representer(OrderedDict, CustomRepresenter.represent_dict) + CustomRepresenter.add_representer(defaultdict, CustomRepresenter.represent_dict) + + def _construct_include_tag(self, node): + print(f'node={node}') + value = node.value + print(f'value={value}') + if isinstance(value, list): + return [Yaml.coerce(v.value) for v in value] + else: + external_fpath = ub.Path(value) + if not external_fpath.exists(): + raise IOError(f'Included external yaml file {external_fpath} ' + 'does not exist') + # Not sure why we can't recurse here... + # yaml_obj + # print(f'yaml_obj={yaml_obj}') + # import xdev + # xdev.embed() + return Yaml.load(value) + # Loader = ruamel.yaml.RoundTripLoader + # Loader.add_constructor("!include", _construct_include_tag) + + CustomConstructor.add_constructor('!include', _construct_include_tag) + # yaml_obj = ruamel.yaml.YAML(typ='unsafe', pure=True) + yaml_obj = ruamel.yaml.YAML() + yaml_obj.Constructor = CustomConstructor + yaml_obj.Representer = CustomRepresenter + yaml_obj.preserve_quotes = True + yaml_obj.width = float('inf') + return yaml_obj + + class Yaml: """ Namespace for yaml functions + + Example: + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) + >>> import ubelt as ub + >>> data = { + >>> 'a': 'hello world', + >>> 'b': ub.udict({'a': 3}) + >>> } + >>> text1 = Yaml.dumps(data, backend='ruamel') + >>> # Coerce is idempotent and resolves the input to nested Python + >>> # structures. + >>> resolved1 = Yaml.coerce(data) + >>> resolved2 = Yaml.coerce(text1) + >>> resolved3 = Yaml.coerce(resolved2) + >>> assert resolved1 == resolved2 == resolved3 == data + >>> # with ruamel + >>> data2 = Yaml.loads(text1) + >>> assert data2 == data + >>> # with pyyaml + >>> data2 = Yaml.loads(text1, backend='pyyaml') + >>> assert data2 == data """ @staticmethod @@ -89,22 +213,28 @@ def dumps(data, backend='ruamel'): str: yaml text Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> import ubelt as ub >>> data = { >>> 'a': 'hello world', >>> 'b': ub.udict({'a': 3}) >>> } - >>> text1 = Yaml.dumps(data, backend='ruamel') - >>> print(text1) >>> text2 = Yaml.dumps(data, backend='pyyaml') >>> print(text2) + >>> text1 = Yaml.dumps(data, backend='ruamel') + >>> print(text1) >>> assert text1 == text2 """ file = io.StringIO() if backend == 'ruamel': - import ruamel.yaml - Dumper = _custom_ruaml_dumper() - ruamel.yaml.round_trip_dump(data, file, Dumper=Dumper, width=float("inf")) + if NEW_RUAMEL: + yaml_obj = _custom_new_ruaml_yaml_obj() + yaml_obj.dump(data, file) + else: + import ruamel.yaml + Dumper = _custom_ruaml_dumper() + ruamel.yaml.round_trip_dump(data, file, Dumper=Dumper, width=float("inf")) elif backend == 'pyyaml': import yaml Dumper = _custom_pyaml_dumper() @@ -125,16 +255,44 @@ def load(file, backend='ruamel'): Returns: object + + Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) + >>> import ubelt as ub + >>> data = { + >>> 'a': 'hello world', + >>> 'b': ub.udict({'a': 3}) + >>> } + >>> text1 = Yaml.dumps(data, backend='ruamel') + >>> import io + >>> # with ruamel + >>> file = io.StringIO(text1) + >>> data2 = Yaml.load(file) + >>> assert data2 == data + >>> # with pyyaml + >>> file = io.StringIO(text1) + >>> data2 = Yaml.load(file, backend='pyyaml') + >>> assert data2 == data """ if isinstance(file, (str, os.PathLike)): - with open(file, 'r') as fp: + fpath = file + with open(fpath, 'r') as fp: return Yaml.load(fp, backend=backend) else: if backend == 'ruamel': - import ruamel.yaml - Loader = _custom_ruaml_loader() - data = ruamel.yaml.load(file, Loader=Loader, preserve_quotes=True) - # data = ruamel.yaml.load(file, Loader=ruamel.yaml.RoundTripLoader, preserve_quotes=True) + import ruamel.yaml # NOQA + # TODO: seems like there will be a deprecation + # from ruamel.yaml import YAML + if NEW_RUAMEL: + yaml_obj = _custom_new_ruaml_yaml_obj() + data = yaml_obj.load(file) + else: + # yaml = YAML(typ='unsafe', pure=True) + # data = yaml.load(file, Loader=Loader, preserve_quotes=True) + Loader = _custom_ruaml_loader() + data = ruamel.yaml.load(file, Loader=Loader, preserve_quotes=True) + # data = ruamel.yaml.load(file, Loader=ruamel.yaml.RoundTripLoader, preserve_quotes=True) elif backend == 'pyyaml': import yaml # data = yaml.load(file, Loader=yaml.SafeLoader) @@ -156,6 +314,8 @@ def loads(text, backend='ruamel'): object Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> import ubelt as ub >>> data = { >>> 'a': 'hello world', @@ -173,11 +333,38 @@ def loads(text, backend='ruamel'): >>> print('data3 = {}'.format(ub.urepr(data3, nl=1))) >>> assert data == data3 """ + # TODO: add debugging helpers when a loads fails file = io.StringIO(text) - return Yaml.load(file, backend=backend) + if backend == 'ruamel': + import ruamel.yaml # NOQA + try: + data = Yaml.load(file, backend=backend) + except ruamel.yaml.parser.ParserError as ex_: + ex = ex_ + print(f'YAML ERROR: {ex!r}') + try: + from xdoctest.utils import add_line_numbers, highlight_code + lines = text.split('\n') + error_line = ex.context_mark.line + context_before = 3 + context_after = 3 + start_line = error_line - context_before + stop_line = error_line + context_after + show_lines = lines[start_line:stop_line] + show_lines = highlight_code('\n'.join(show_lines), 'YAML').split('\n') + lines = add_line_numbers(show_lines, start=start_line + 1) + print(f'ex.context_mark.line={ex.context_mark.line + 1}') + print(f'ex.context_mark.column={ex.context_mark.column}') + print('\n'.join(lines)) + except Exception: + ... + raise + else: + data = Yaml.load(file, backend=backend) + return data @staticmethod - def coerce(data, backend='ruamel'): + def coerce(data, backend='ruamel', path_policy='existing_file_with_extension'): """ Attempt to convert input into a parsed yaml / json data structure. If the data looks like a path, it tries to load and parse file contents. @@ -187,6 +374,12 @@ def coerce(data, backend='ruamel'): Args: data (str | PathLike | dict | list): backend (str): either ruamel or pyyaml + path_policy (str): + Determines how we determine if something looks like a path. + Pre 0.3.2 behavior is from path_policy='existing_file'. + Default is 'existing_file_with_extension'. + Can also be 'never' to disable the path feature and decrease + ambiguity. Returns: object: parsed yaml data @@ -201,6 +394,25 @@ def coerce(data, backend='ruamel'): https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) + >>> text = ub.codeblock( + ''' + - !!float nan + - !!float inf + - nan + - inf + # Seems to break older ruamel.yaml 0.17.21 + # - .nan + # - .inf + - null + ''') + >>> Yaml.coerce(text, backend='pyyaml') + >>> Yaml.coerce(text, backend='ruamel') + + Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> Yaml.coerce('"[1, 2, 3]"') [1, 2, 3] >>> fpath = ub.Path.appdir('cmd_queue/tests/util_yaml').ensuredir() / 'file.yaml' @@ -215,9 +427,13 @@ def coerce(data, backend='ruamel'): None Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> assert Yaml.coerce('') is None Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> dpath = ub.Path.appdir('cmd_queue/tests/util_yaml').ensuredir() >>> fpath = dpath / 'external.yaml' >>> fpath.write_text(Yaml.dumps({'foo': 'bar'})) @@ -240,22 +456,42 @@ def coerce(data, backend='ruamel'): >>> print('data = {}'.format(ub.urepr(data, nl=1))) >>> print(Yaml.dumps(data, backend='ruamel')) """ - if isinstance(data, str): + if isinstance(data, os.PathLike): + result = Yaml.load(data, backend=backend) + elif isinstance(data, str): maybe_path = None - if '\n' not in data and len(data.strip()) > 0: - # Ambiguous case: might this be path-like? - maybe_path = ub.Path(data) - try: - if not maybe_path.exists(): + + if path_policy == 'never': + ... + else: + if path_policy == 'existing_file': + path_requires_extension = False + elif path_policy == 'existing_file_with_extension': + path_requires_extension = True + else: + raise KeyError(path_policy) + + if '\n' not in data and len(data.strip()) > 0: + # Ambiguous case: might this be path-like? + maybe_path = ub.Path(data) + try: + if not maybe_path.is_file(): + maybe_path = None + except OSError: maybe_path = None - except OSError: - maybe_path = None + + if maybe_path and path_requires_extension: + # If the input looks like a path, try to load it. This was + # added because I tried to coerce "auto" as a string, but + # for some reason there was a file "auto" in my cwd and + # that was confusing. + if '.' not in maybe_path.name: + maybe_path = None + if maybe_path is not None: result = Yaml.coerce(maybe_path, backend=backend) else: result = Yaml.loads(data, backend=backend) - elif isinstance(data, os.PathLike): - result = Yaml.load(data, backend=backend) elif hasattr(data, 'read'): # assume file result = Yaml.load(data, backend=backend) @@ -281,6 +517,8 @@ def Dict(data): Get a ruamel-enhanced dictionary Example: + >>> # xdoctest: +REQUIRES(module:pyyaml) + >>> # xdoctest: +REQUIRES(module:ruamel.yaml) >>> data = {'a': 'avalue', 'b': 'bvalue'} >>> data = Yaml.Dict(data) >>> data.yaml_set_start_comment('hello')