From 01e05819238282a69787f418855698acdd723f3d Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 27 Jan 2025 17:06:10 +0100 Subject: [PATCH 01/10] install remorph-community-transpiler and morpheus --- src/databricks/labs/remorph/install.py | 117 ++++++++++++++++++++++++- tests/integration/test_install.py | 36 ++++++++ tests/unit/test_install.py | 3 + 3 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_install.py diff --git a/src/databricks/labs/remorph/install.py b/src/databricks/labs/remorph/install.py index 32968c77e7..92d8ff5976 100644 --- a/src/databricks/labs/remorph/install.py +++ b/src/databricks/labs/remorph/install.py @@ -1,7 +1,14 @@ import dataclasses +from json import loads, dumps import logging import os +from shutil import rmtree, move +from subprocess import run +import sys +from urllib import request import webbrowser +from datetime import datetime +from pathlib import Path from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug from databricks.labs.blueprint.installation import Installation @@ -30,7 +37,12 @@ TRANSPILER_WAREHOUSE_PREFIX = "Remorph Transpiler Validation" MODULES = sorted({"transpile", "reconcile", "all"}) - +LABS_PATH = Path.home() / ".databricks" / "labs" +TRANSPILERS_PATH = LABS_PATH / "remorph-transpilers" +OSS_TRANSPILER_NAME = "remorph-community-transpiler" +OSS_TRANSPILER_PYPI_NAME = f"databricks-labs-{OSS_TRANSPILER_NAME}" +MORPHEUS_TRANSPILER_NAME = "morpheus" +MORPHEUS_TRANSPILER_GROUP_NAME = "com.databricks.labs" class WorkspaceInstaller: def __init__( @@ -63,6 +75,8 @@ def run( self, config: RemorphConfigs | None = None, ) -> RemorphConfigs: + self.install_community_transpiler() + self.install_morpheus() logger.info(f"Installing Remorph v{self._product_info.version()}") if not config: config = self.configure() @@ -72,6 +86,107 @@ def run( logger.info("Installation completed successfully! Please refer to the documentation for the next steps.") return config + @classmethod + def install_morpheus(cls): + current_version = cls.get_installed_version(MORPHEUS_TRANSPILER_NAME) + latest_version = cls.get_maven_version(MORPHEUS_TRANSPILER_GROUP_NAME, MORPHEUS_TRANSPILER_NAME) + if current_version == latest_version: + logger.info(f"Databricks Morpheus transpiler v{latest_version} already installed") + return + logger.info(f"Installing Databricks Morpheus transpiler v{latest_version}") + product_path = TRANSPILERS_PATH / MORPHEUS_TRANSPILER_NAME + if current_version is not None: + product_path.rename(f"{MORPHEUS_TRANSPILER_NAME}-saved") + install_path = product_path / "lib" + install_path.mkdir() + return_code = cls.download_from_maven(MORPHEUS_TRANSPILER_GROUP_NAME, MORPHEUS_TRANSPILER_NAME, latest_version) + if return_code == 0: + state_path = product_path / "state" + state_path.mkdir() + version_data = { "version": f"v{latest_version}", "date": str(datetime.now()) } + version_path = state_path / "version.json" + version_path.write_text(dumps(version_data), "utf-8") + logger.info(f"Successfully installed Databricks Morpheus transpiler v{latest_version}") + if current_version is not None: + rmtree(f"{product_path!s}-saved") + else: + logger.info(f"Failed to install Databricks Morpheus transpiler v{latest_version}") + if current_version is not None: + rmtree(str(product_path)) + renamed = Path(f"{product_path!s}-saved") + renamed.rename(product_path.name) + + @classmethod + def download_from_maven(cls, group_id: str, artifact_id: str, version: str, target: Path, extension="jar"): + group_id = group_id.replace(".", "/") + url = f"https://search.maven.org/remotecontent?filepath={group_id}/{artifact_id}/{version}/{artifact_id}-{version}.{extension}" + try: + path, message = request.urlretrieve(url) + if path: + move(path, str(target)) + return 0 + return -2 + except: + return -1 + + + @classmethod + def install_community_transpiler(cls): + current_version = cls.get_installed_version(OSS_TRANSPILER_NAME) + latest_version = cls.get_pypi_version(OSS_TRANSPILER_PYPI_NAME) + if current_version == latest_version: + logger.info(f"Remorph community transpiler v{latest_version} already installed") + return + logger.info(f"Installing Remorph community transpiler v{latest_version}") + product_path = TRANSPILERS_PATH / OSS_TRANSPILER_NAME + if current_version is not None: + product_path.rename(f"{OSS_TRANSPILER_NAME}-saved") + install_path = product_path / "lib" + install_path.mkdir() + args = [ "pip", "install", OSS_TRANSPILER_PYPI_NAME, "-t", str(install_path)] + return_code = run(args, sys.stdin, sys.stdout, sys.stderr) + if return_code == 0: + state_path = product_path / "state" + state_path.mkdir() + version_data = { "version": f"v{latest_version}", "date": str(datetime.now()) } + version_path = state_path / "version.json" + version_path.write_text(dumps(version_data), "utf-8") + logger.info(f"Successfully installed Remorph community transpiler v{latest_version}") + if current_version is not None: + rmtree(f"{product_path!s}-saved") + else: + logger.info(f"Failed to install Remorph community transpiler v{latest_version}") + if current_version is not None: + rmtree(str(product_path)) + renamed = Path(f"{product_path!s}-saved") + renamed.rename(product_path.name) + + @classmethod + def get_maven_version(cls, group_id: str, artifact_id: str) -> str | None: + url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=1&wt=json" + text = request.urlopen(url).read() + data: dict[str, any] = loads(text) + return data.get("response", {}).get('docs', [{}])[0].get("v", None) + + @classmethod + def get_pypi_version(cls, product_name: str) -> str | None: + text = request.urlopen(f"https://pypi.org/pypi/{product_name}/json").read() + data: dict[str, any] = loads(text) + return data.get("info", {}).get('version', None) + + @classmethod + def get_installed_version(cls, product_name: str, is_transpiler = True) -> str | None: + product_path = (TRANSPILERS_PATH if is_transpiler else LABS_PATH) / product_name + current_version_path = product_path / "state" / "version.json" + if not current_version_path.exists(): + return None + text = current_version_path.read_text("utf-8") + data: dict[str, any] = loads(text) + version: str | None = data.get("version", None) + if not version or not version.startswith("v"): + return None + return version[1:] + def configure(self, module: str | None = None) -> RemorphConfigs: selected_module = module or self._prompts.choice("Select a module to configure:", MODULES) match selected_module: diff --git a/tests/integration/test_install.py b/tests/integration/test_install.py new file mode 100644 index 0000000000..a152a87df2 --- /dev/null +++ b/tests/integration/test_install.py @@ -0,0 +1,36 @@ +import os +from pathlib import Path +from tempfile import TemporaryFile + +import pytest + +from databricks.labs.remorph.install import WorkspaceInstaller + + +@pytest.mark.skipif(os.environ.get("CI", "false")=="true", reason="Skipping in CI since we have no installed product") +def test_gets_installed_version(): + version = WorkspaceInstaller.get_installed_version("remorph", False) + check_valid_version(version) + +def test_gets_maven_version(): + version = WorkspaceInstaller.get_maven_version("com.databricks", "databricks-connect") + check_valid_version(version) + +def test_downloads_from_maven(): + path = Path(str(TemporaryFile())) + rc = WorkspaceInstaller.download_from_maven("com.databricks", "databricks-connect", "16.0.0", path, extension="pom") + assert rc==0 + assert path.exists() + assert path.stat().st_size == 5_684 + + + +def check_valid_version(version: str): + parts = version.split(".") + for i in range(0, len(parts)): + part = parts[i] + try: + _ = int(part) + except: + assert False, f"{version} does not look like a valid semver" + diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 7f93a96973..008a502df2 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -1,3 +1,6 @@ +import os +from pathlib import Path +from tempfile import TemporaryFile from unittest.mock import create_autospec, patch import pytest From e7a30f74aea5337777aef6724811491ed08ffdc2 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 27 Jan 2025 18:03:57 +0100 Subject: [PATCH 02/10] formatting --- src/databricks/labs/remorph/install.py | 53 ++++++++++++++++---------- tests/integration/test_install.py | 17 +++++---- tests/unit/test_install.py | 3 -- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/databricks/labs/remorph/install.py b/src/databricks/labs/remorph/install.py index 92d8ff5976..9a78c53bec 100644 --- a/src/databricks/labs/remorph/install.py +++ b/src/databricks/labs/remorph/install.py @@ -3,9 +3,11 @@ import logging import os from shutil import rmtree, move -from subprocess import run +from subprocess import run, CalledProcessError import sys +from typing import Any from urllib import request +from urllib.error import URLError import webbrowser from datetime import datetime from pathlib import Path @@ -44,6 +46,7 @@ MORPHEUS_TRANSPILER_NAME = "morpheus" MORPHEUS_TRANSPILER_GROUP_NAME = "com.databricks.labs" + class WorkspaceInstaller: def __init__( self, @@ -99,11 +102,16 @@ def install_morpheus(cls): product_path.rename(f"{MORPHEUS_TRANSPILER_NAME}-saved") install_path = product_path / "lib" install_path.mkdir() - return_code = cls.download_from_maven(MORPHEUS_TRANSPILER_GROUP_NAME, MORPHEUS_TRANSPILER_NAME, latest_version) + return_code = cls.download_from_maven( + MORPHEUS_TRANSPILER_GROUP_NAME, + MORPHEUS_TRANSPILER_NAME, + latest_version, + install_path / f"{MORPHEUS_TRANSPILER_NAME}.jar", + ) if return_code == 0: state_path = product_path / "state" state_path.mkdir() - version_data = { "version": f"v{latest_version}", "date": str(datetime.now()) } + version_data = {"version": f"v{latest_version}", "date": str(datetime.now())} version_path = state_path / "version.json" version_path.write_text(dumps(version_data), "utf-8") logger.info(f"Successfully installed Databricks Morpheus transpiler v{latest_version}") @@ -125,10 +133,11 @@ def download_from_maven(cls, group_id: str, artifact_id: str, version: str, targ if path: move(path, str(target)) return 0 - return -2 - except: + logger.error(message) + return -1 + except URLError as e: + logger.error("While downloading from maven", exc_info=e) return -1 - @classmethod def install_community_transpiler(cls): @@ -143,19 +152,19 @@ def install_community_transpiler(cls): product_path.rename(f"{OSS_TRANSPILER_NAME}-saved") install_path = product_path / "lib" install_path.mkdir() - args = [ "pip", "install", OSS_TRANSPILER_PYPI_NAME, "-t", str(install_path)] - return_code = run(args, sys.stdin, sys.stdout, sys.stderr) - if return_code == 0: - state_path = product_path / "state" - state_path.mkdir() - version_data = { "version": f"v{latest_version}", "date": str(datetime.now()) } - version_path = state_path / "version.json" + args = ["pip", "install", OSS_TRANSPILER_PYPI_NAME, "-t", str(install_path)] + state_path = product_path / "state" + state_path.mkdir() + version_data = {"version": f"v{latest_version}", "date": str(datetime.now())} + version_path = state_path / "version.json" + try: + run(args, sys.stdin, sys.stdout, sys.stderr, check=True) version_path.write_text(dumps(version_data), "utf-8") logger.info(f"Successfully installed Remorph community transpiler v{latest_version}") if current_version is not None: rmtree(f"{product_path!s}-saved") - else: - logger.info(f"Failed to install Remorph community transpiler v{latest_version}") + except CalledProcessError as e: + logger.info(f"Failed to install Remorph community transpiler v{latest_version}", exc_info=e) if current_version is not None: rmtree(str(product_path)) renamed = Path(f"{product_path!s}-saved") @@ -164,24 +173,26 @@ def install_community_transpiler(cls): @classmethod def get_maven_version(cls, group_id: str, artifact_id: str) -> str | None: url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=1&wt=json" - text = request.urlopen(url).read() - data: dict[str, any] = loads(text) + with request.urlopen(url) as server: + text = server.read() + data: dict[str, Any] = loads(text) return data.get("response", {}).get('docs', [{}])[0].get("v", None) @classmethod def get_pypi_version(cls, product_name: str) -> str | None: - text = request.urlopen(f"https://pypi.org/pypi/{product_name}/json").read() - data: dict[str, any] = loads(text) + with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server: + text = server.read() + data: dict[str, Any] = loads(text) return data.get("info", {}).get('version', None) @classmethod - def get_installed_version(cls, product_name: str, is_transpiler = True) -> str | None: + def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None: product_path = (TRANSPILERS_PATH if is_transpiler else LABS_PATH) / product_name current_version_path = product_path / "state" / "version.json" if not current_version_path.exists(): return None text = current_version_path.read_text("utf-8") - data: dict[str, any] = loads(text) + data: dict[str, Any] = loads(text) version: str | None = data.get("version", None) if not version or not version.startswith("v"): return None diff --git a/tests/integration/test_install.py b/tests/integration/test_install.py index a152a87df2..79b2921405 100644 --- a/tests/integration/test_install.py +++ b/tests/integration/test_install.py @@ -7,30 +7,31 @@ from databricks.labs.remorph.install import WorkspaceInstaller -@pytest.mark.skipif(os.environ.get("CI", "false")=="true", reason="Skipping in CI since we have no installed product") +@pytest.mark.skipif(os.environ.get("CI", "false") == "true", reason="Skipping in CI since we have no installed product") def test_gets_installed_version(): version = WorkspaceInstaller.get_installed_version("remorph", False) check_valid_version(version) + def test_gets_maven_version(): version = WorkspaceInstaller.get_maven_version("com.databricks", "databricks-connect") check_valid_version(version) + def test_downloads_from_maven(): path = Path(str(TemporaryFile())) - rc = WorkspaceInstaller.download_from_maven("com.databricks", "databricks-connect", "16.0.0", path, extension="pom") - assert rc==0 + result = WorkspaceInstaller.download_from_maven( + "com.databricks", "databricks-connect", "16.0.0", path, extension="pom" + ) + assert result == 0 assert path.exists() assert path.stat().st_size == 5_684 - def check_valid_version(version: str): parts = version.split(".") - for i in range(0, len(parts)): - part = parts[i] + for _, part in enumerate(parts): try: _ = int(part) - except: + except ValueError: assert False, f"{version} does not look like a valid semver" - diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 008a502df2..7f93a96973 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -1,6 +1,3 @@ -import os -from pathlib import Path -from tempfile import TemporaryFile from unittest.mock import create_autospec, patch import pytest From 8308dab600f991d8a778fb54535e2e333b31b607 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 27 Jan 2025 18:14:53 +0100 Subject: [PATCH 03/10] fix failing tests --- tests/unit/test_install.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 7f93a96973..4863cb9d38 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -27,6 +27,19 @@ def ws(): ) return w +@pytest.fixture() +def workspace_installer(): + + class TestWorkspaceInstaller(WorkspaceInstaller): + + def install_morpheus(cls): pass + def install_community_transpiler(cls): pass + + def installer(*args, **kwargs) -> WorkspaceInstaller: + return TestWorkspaceInstaller(*args, **kwargs) + + yield installer + def test_workspace_installer_run_raise_error_in_dbr(ws): ctx = ApplicationContext(ws) @@ -44,7 +57,7 @@ def test_workspace_installer_run_raise_error_in_dbr(ws): ) -def test_workspace_installer_run_install_not_called_in_test(ws): +def test_workspace_installer_run_install_not_called_in_test(workspace_installer, ws): ws_installation = create_autospec(WorkspaceInstallation) ctx = ApplicationContext(ws) ctx.replace( @@ -54,7 +67,7 @@ def test_workspace_installer_run_install_not_called_in_test(ws): ) provided_config = RemorphConfigs() - workspace_installer = WorkspaceInstaller( + workspace_installer = workspace_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -68,7 +81,7 @@ def test_workspace_installer_run_install_not_called_in_test(ws): ws_installation.install.assert_not_called() -def test_workspace_installer_run_install_called_with_provided_config(ws): +def test_workspace_installer_run_install_called_with_provided_config(workspace_installer, ws): ws_installation = create_autospec(WorkspaceInstallation) ctx = ApplicationContext(ws) ctx.replace( @@ -76,7 +89,7 @@ def test_workspace_installer_run_install_called_with_provided_config(ws): workspace_installation=ws_installation, ) provided_config = RemorphConfigs() - workspace_installer = WorkspaceInstaller( + workspace_installer = workspace_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -110,7 +123,7 @@ def test_configure_error_if_invalid_module_selected(ws): workspace_installer.configure(module="invalid_module") -def test_workspace_installer_run_install_called_with_generated_config(ws): +def test_workspace_installer_run_install_called_with_generated_config(workspace_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), @@ -133,7 +146,7 @@ def test_workspace_installer_run_install_called_with_generated_config(ws): workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = workspace_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -986,7 +999,7 @@ def test_configure_all_override_installation(ws): ) -def test_runs_upgrades_on_more_recent_version(ws): +def test_runs_upgrades_on_more_recent_version(workspace_installer, ws): installation = MockInstallation( { 'version.json': {'version': '0.3.0', 'wheel': '...', 'date': '...'}, @@ -1038,7 +1051,7 @@ def test_runs_upgrades_on_more_recent_version(ws): wheels=wheels, ) - workspace_installer = WorkspaceInstaller( + workspace_installer = workspace_installer( ctx.workspace_client, ctx.prompts, ctx.installation, From 9927edae689572e4f9f98ab970fcde548f5c1474 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 27 Jan 2025 18:17:21 +0100 Subject: [PATCH 04/10] add TODO comment --- tests/unit/test_install.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 4863cb9d38..34b8712607 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -31,7 +31,8 @@ def ws(): def workspace_installer(): class TestWorkspaceInstaller(WorkspaceInstaller): - + # TODO the below methods currently raise a 404 because the artifacts don't exist yet + # TODO remove this once they are available !!! def install_morpheus(cls): pass def install_community_transpiler(cls): pass From d33ddbe6dc8ea3bfd28641bd51095909f933c1d6 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 27 Jan 2025 18:26:09 +0100 Subject: [PATCH 05/10] formatting --- tests/unit/test_install.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 34b8712607..e16ac613c5 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -27,14 +27,20 @@ def ws(): ) return w + @pytest.fixture() -def workspace_installer(): +def ws_installer(): class TestWorkspaceInstaller(WorkspaceInstaller): # TODO the below methods currently raise a 404 because the artifacts don't exist yet # TODO remove this once they are available !!! - def install_morpheus(cls): pass - def install_community_transpiler(cls): pass + @classmethod + def install_morpheus(cls): + pass + + @classmethod + def install_community_transpiler(cls): + pass def installer(*args, **kwargs) -> WorkspaceInstaller: return TestWorkspaceInstaller(*args, **kwargs) @@ -58,7 +64,7 @@ def test_workspace_installer_run_raise_error_in_dbr(ws): ) -def test_workspace_installer_run_install_not_called_in_test(workspace_installer, ws): +def test_workspace_installer_run_install_not_called_in_test(ws_installer, ws): ws_installation = create_autospec(WorkspaceInstallation) ctx = ApplicationContext(ws) ctx.replace( @@ -68,7 +74,7 @@ def test_workspace_installer_run_install_not_called_in_test(workspace_installer, ) provided_config = RemorphConfigs() - workspace_installer = workspace_installer( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -82,7 +88,7 @@ def test_workspace_installer_run_install_not_called_in_test(workspace_installer, ws_installation.install.assert_not_called() -def test_workspace_installer_run_install_called_with_provided_config(workspace_installer, ws): +def test_workspace_installer_run_install_called_with_provided_config(ws_installer, ws): ws_installation = create_autospec(WorkspaceInstallation) ctx = ApplicationContext(ws) ctx.replace( @@ -90,7 +96,7 @@ def test_workspace_installer_run_install_called_with_provided_config(workspace_i workspace_installation=ws_installation, ) provided_config = RemorphConfigs() - workspace_installer = workspace_installer( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -124,7 +130,7 @@ def test_configure_error_if_invalid_module_selected(ws): workspace_installer.configure(module="invalid_module") -def test_workspace_installer_run_install_called_with_generated_config(workspace_installer, ws): +def test_workspace_installer_run_install_called_with_generated_config(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), @@ -147,7 +153,7 @@ def test_workspace_installer_run_install_called_with_generated_config(workspace_ workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = workspace_installer( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -1000,7 +1006,7 @@ def test_configure_all_override_installation(ws): ) -def test_runs_upgrades_on_more_recent_version(workspace_installer, ws): +def test_runs_upgrades_on_more_recent_version(ws_installer, ws): installation = MockInstallation( { 'version.json': {'version': '0.3.0', 'wheel': '...', 'date': '...'}, @@ -1052,7 +1058,7 @@ def test_runs_upgrades_on_more_recent_version(workspace_installer, ws): wheels=wheels, ) - workspace_installer = workspace_installer( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, From ccab068b77e0ecdfe7ee4600853b0a5a8a4f9c90 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 3 Feb 2025 16:12:59 +0100 Subject: [PATCH 06/10] add transpiler configs --- .../resources/transpilers/mct/config.yml | 22 +++++++++++++++++++ .../resources/transpilers/morpheus/config.yml | 9 ++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/databricks/labs/remorph/resources/transpilers/mct/config.yml create mode 100644 src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml diff --git a/src/databricks/labs/remorph/resources/transpilers/mct/config.yml b/src/databricks/labs/remorph/resources/transpilers/mct/config.yml new file mode 100644 index 0000000000..51531ced13 --- /dev/null +++ b/src/databricks/labs/remorph/resources/transpilers/mct/config.yml @@ -0,0 +1,22 @@ +remorph: + version: 1 + dialects: + - athena + - bigquery + - mysql + - netezza + - oracle + - postgresql + - presto + - redshift + - snowflake + - sqlite + - teradata + - trino + - tsql + - vertica + command_line: + - python + - databricks/labs/remorph/transpiler/server.py +custom: + experimental: true diff --git a/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml b/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml new file mode 100644 index 0000000000..c5da5f44b5 --- /dev/null +++ b/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml @@ -0,0 +1,9 @@ +remorph: + version: 1 + dialects: + - snowflake + - tsql + command_line: + - java + - '-jar' + - morpheus-lsp.jar From e4ff475c762ed449e4ee1cfa63169d2a12f7505a Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 3 Feb 2025 20:05:01 +0100 Subject: [PATCH 07/10] refactor LSP config for transpiler installer, and add tests --- src/databricks/labs/remorph/install.py | 284 +++++++++++------- .../resources/transpilers/mct/config.yml | 1 + .../resources/transpilers/morpheus/config.yml | 1 + .../labs/remorph/transpiler/lsp/lsp_engine.py | 62 ++-- tests/integration/test_install.py | 45 ++- tests/resources/lsp_transpiler/lsp_config.yml | 1 + tests/unit/test_install.py | 4 +- tests/unit/transpiler/test_lsp_config.py | 11 +- tests/unit/transpiler/test_snow.py | 2 +- 9 files changed, 266 insertions(+), 145 deletions(-) diff --git a/src/databricks/labs/remorph/install.py b/src/databricks/labs/remorph/install.py index 9a78c53bec..8dd45ea647 100644 --- a/src/databricks/labs/remorph/install.py +++ b/src/databricks/labs/remorph/install.py @@ -1,3 +1,5 @@ +import abc +import configparser import dataclasses from json import loads, dumps import logging @@ -12,6 +14,7 @@ from datetime import datetime from pathlib import Path +import yaml from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.installation import SerdeError @@ -33,96 +36,49 @@ from databricks.labs.remorph.deployment.configurator import ResourceConfigurator from databricks.labs.remorph.deployment.installation import WorkspaceInstallation from databricks.labs.remorph.reconcile.constants import ReconReportType, ReconSourceType +from databricks.labs.remorph.transpiler.lsp.lsp_engine import LSPConfig from databricks.labs.remorph.transpiler.sqlglot.dialect_utils import SQLGLOT_DIALECTS logger = logging.getLogger(__name__) TRANSPILER_WAREHOUSE_PREFIX = "Remorph Transpiler Validation" MODULES = sorted({"transpile", "reconcile", "all"}) -LABS_PATH = Path.home() / ".databricks" / "labs" -TRANSPILERS_PATH = LABS_PATH / "remorph-transpilers" -OSS_TRANSPILER_NAME = "remorph-community-transpiler" -OSS_TRANSPILER_PYPI_NAME = f"databricks-labs-{OSS_TRANSPILER_NAME}" -MORPHEUS_TRANSPILER_NAME = "morpheus" -MORPHEUS_TRANSPILER_GROUP_NAME = "com.databricks.labs" -class WorkspaceInstaller: - def __init__( - self, - ws: WorkspaceClient, - prompts: Prompts, - installation: Installation, - install_state: InstallState, - product_info: ProductInfo, - resource_configurator: ResourceConfigurator, - workspace_installation: WorkspaceInstallation, - environ: dict[str, str] | None = None, - ): - self._ws = ws - self._prompts = prompts - self._installation = installation - self._install_state = install_state - self._product_info = product_info - self._resource_configurator = resource_configurator - self._ws_installation = workspace_installation +class TranspilerInstaller(abc.ABC): - if not environ: - environ = dict(os.environ.items()) + @classmethod + def labs_path(cls): + return Path.home() / ".databricks" / "labs" - if "DATABRICKS_RUNTIME_VERSION" in environ: - msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime" - raise SystemExit(msg) + @classmethod + def transpilers_path(cls): + return cls.labs_path() / "remorph-transpilers" - def run( - self, - config: RemorphConfigs | None = None, - ) -> RemorphConfigs: - self.install_community_transpiler() - self.install_morpheus() - logger.info(f"Installing Remorph v{self._product_info.version()}") - if not config: - config = self.configure() - if self._is_testing(): - return config - self._ws_installation.install(config) - logger.info("Installation completed successfully! Please refer to the documentation for the next steps.") - return config + @classmethod + def resources_folder(cls): + return Path(__file__).parent / "resources" / "transpilers" @classmethod - def install_morpheus(cls): - current_version = cls.get_installed_version(MORPHEUS_TRANSPILER_NAME) - latest_version = cls.get_maven_version(MORPHEUS_TRANSPILER_GROUP_NAME, MORPHEUS_TRANSPILER_NAME) - if current_version == latest_version: - logger.info(f"Databricks Morpheus transpiler v{latest_version} already installed") - return - logger.info(f"Installing Databricks Morpheus transpiler v{latest_version}") - product_path = TRANSPILERS_PATH / MORPHEUS_TRANSPILER_NAME - if current_version is not None: - product_path.rename(f"{MORPHEUS_TRANSPILER_NAME}-saved") - install_path = product_path / "lib" - install_path.mkdir() - return_code = cls.download_from_maven( - MORPHEUS_TRANSPILER_GROUP_NAME, - MORPHEUS_TRANSPILER_NAME, - latest_version, - install_path / f"{MORPHEUS_TRANSPILER_NAME}.jar", - ) - if return_code == 0: - state_path = product_path / "state" - state_path.mkdir() - version_data = {"version": f"v{latest_version}", "date": str(datetime.now())} - version_path = state_path / "version.json" - version_path.write_text(dumps(version_data), "utf-8") - logger.info(f"Successfully installed Databricks Morpheus transpiler v{latest_version}") - if current_version is not None: - rmtree(f"{product_path!s}-saved") - else: - logger.info(f"Failed to install Databricks Morpheus transpiler v{latest_version}") - if current_version is not None: - rmtree(str(product_path)) - renamed = Path(f"{product_path!s}-saved") - renamed.rename(product_path.name) + def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None: + product_path = (cls.transpilers_path() if is_transpiler else cls.labs_path()) / product_name + current_version_path = product_path / "state" / "version.json" + if not current_version_path.exists(): + return None + text = current_version_path.read_text("utf-8") + data: dict[str, Any] = loads(text) + version: str | None = data.get("version", None) + if not version or not version.startswith("v"): + return None + return version[1:] + + @classmethod + def get_maven_version(cls, group_id: str, artifact_id: str) -> str | None: + url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=1&wt=json" + with request.urlopen(url) as server: + text = server.read() + data: dict[str, Any] = loads(text) + return data.get("response", {}).get('docs', [{}])[0].get("v", None) @classmethod def download_from_maven(cls, group_id: str, artifact_id: str, version: str, target: Path, extension="jar"): @@ -140,63 +96,173 @@ def download_from_maven(cls, group_id: str, artifact_id: str, version: str, targ return -1 @classmethod - def install_community_transpiler(cls): - current_version = cls.get_installed_version(OSS_TRANSPILER_NAME) - latest_version = cls.get_pypi_version(OSS_TRANSPILER_PYPI_NAME) + def get_pypi_version(cls, product_name: str) -> str | None: + with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server: + text = server.read() + data: dict[str, Any] = loads(text) + return data.get("info", {}).get('version', None) + + @classmethod + def install_from_pypi(cls, product_name: str, pypi_name: str): + current_version = cls.get_installed_version(product_name) + latest_version = cls.get_pypi_version(pypi_name) if current_version == latest_version: - logger.info(f"Remorph community transpiler v{latest_version} already installed") + logger.info(f"{pypi_name} v{latest_version} already installed") return - logger.info(f"Installing Remorph community transpiler v{latest_version}") - product_path = TRANSPILERS_PATH / OSS_TRANSPILER_NAME + logger.info(f"Installing {pypi_name} v{latest_version}") + product_path = cls.transpilers_path() / product_name if current_version is not None: - product_path.rename(f"{OSS_TRANSPILER_NAME}-saved") + product_path.rename(f"{product_name}-saved") install_path = product_path / "lib" install_path.mkdir() - args = ["pip", "install", OSS_TRANSPILER_PYPI_NAME, "-t", str(install_path)] + args = ["pip", "install", pypi_name, "-t", str(install_path)] state_path = product_path / "state" state_path.mkdir() version_data = {"version": f"v{latest_version}", "date": str(datetime.now())} version_path = state_path / "version.json" try: - run(args, sys.stdin, sys.stdout, sys.stderr, check=True) + run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, check=True) version_path.write_text(dumps(version_data), "utf-8") - logger.info(f"Successfully installed Remorph community transpiler v{latest_version}") + logger.info(f"Successfully installed {pypi_name} v{latest_version}") if current_version is not None: rmtree(f"{product_path!s}-saved") except CalledProcessError as e: - logger.info(f"Failed to install Remorph community transpiler v{latest_version}", exc_info=e) + logger.info(f"Failed to install {pypi_name} v{latest_version}", exc_info=e) if current_version is not None: rmtree(str(product_path)) renamed = Path(f"{product_path!s}-saved") renamed.rename(product_path.name) @classmethod - def get_maven_version(cls, group_id: str, artifact_id: str) -> str | None: - url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=1&wt=json" - with request.urlopen(url) as server: - text = server.read() - data: dict[str, Any] = loads(text) - return data.get("response", {}).get('docs', [{}])[0].get("v", None) + def all_transpiler_configs(cls) -> dict[str, LSPConfig]: + all_files = os.listdir(cls.transpilers_path()) + all_configs = [cls._transpiler_config(cls.transpilers_path() / file) for file in all_files] + return { config.name: config for config in filter(lambda _: _ is not None, all_configs)} @classmethod - def get_pypi_version(cls, product_name: str) -> str | None: - with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server: - text = server.read() - data: dict[str, Any] = loads(text) - return data.get("info", {}).get('version', None) + def all_transpiler_names(cls) -> set[str]: + all_configs = cls.all_transpiler_configs() + return set(all_configs.keys()) @classmethod - def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None: - product_path = (TRANSPILERS_PATH if is_transpiler else LABS_PATH) / product_name - current_version_path = product_path / "state" / "version.json" - if not current_version_path.exists(): - return None - text = current_version_path.read_text("utf-8") - data: dict[str, Any] = loads(text) - version: str | None = data.get("version", None) - if not version or not version.startswith("v"): + def _transpiler_config(cls, path: Path) -> LSPConfig | None: + try: + return LSPConfig.load(path / "config.yml") + except: return None - return version[1:] + + @classmethod + def all_dialects(cls): + all_dialects: set[str] = set() + for config in cls.all_transpiler_configs().values(): + all_dialects = all_dialects.union(config.remorph.dialects) + return all_dialects + + @classmethod + def transpilers_with_dialect(cls, dialect: str) -> set[str]: + configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values()) + return set(config.name for config in configs) + + + +class RCTInstaller(TranspilerInstaller): + RCT_TRANSPILER_NAME = "remorph-community-transpiler" + RCT_TRANSPILER_PYPI_NAME = f"databricks-labs-{RCT_TRANSPILER_NAME}" + + @classmethod + def install(cls): + cls.install_from_pypi(cls.RCT_TRANSPILER_NAME, cls.RCT_TRANSPILER_PYPI_NAME) + + +class MorpheusInstaller(TranspilerInstaller): + MORPHEUS_TRANSPILER_NAME = "morpheus" + MORPHEUS_TRANSPILER_GROUP_NAME = "com.databricks.labs" + + @classmethod + def install(cls): + current_version = cls.get_installed_version(cls.MORPHEUS_TRANSPILER_NAME) + latest_version = cls.get_maven_version(cls.MORPHEUS_TRANSPILER_GROUP_NAME, cls.MORPHEUS_TRANSPILER_NAME) + if current_version == latest_version: + logger.info(f"Databricks Morpheus transpiler v{latest_version} already installed") + return + logger.info(f"Installing Databricks Morpheus transpiler v{latest_version}") + product_path = cls.TRANSPILERS_PATH / cls.MORPHEUS_TRANSPILER_NAME + if current_version is not None: + product_path.rename(f"{cls.MORPHEUS_TRANSPILER_NAME}-saved") + install_path = product_path / "lib" + install_path.mkdir() + return_code = cls.download_from_maven( + cls.MORPHEUS_TRANSPILER_GROUP_NAME, + cls.MORPHEUS_TRANSPILER_NAME, + latest_version, + install_path / f"{cls.MORPHEUS_TRANSPILER_NAME}.jar", + ) + if return_code == 0: + state_path = product_path / "state" + state_path.mkdir() + version_data = {"version": f"v{latest_version}", "date": str(datetime.now())} + version_path = state_path / "version.json" + version_path.write_text(dumps(version_data), "utf-8") + logger.info(f"Successfully installed Databricks Morpheus transpiler v{latest_version}") + if current_version is not None: + rmtree(f"{product_path!s}-saved") + else: + logger.info(f"Failed to install Databricks Morpheus transpiler v{latest_version}") + if current_version is not None: + rmtree(str(product_path)) + renamed = Path(f"{product_path!s}-saved") + renamed.rename(product_path.name) + + +class WorkspaceInstaller: + def __init__( + self, + ws: WorkspaceClient, + prompts: Prompts, + installation: Installation, + install_state: InstallState, + product_info: ProductInfo, + resource_configurator: ResourceConfigurator, + workspace_installation: WorkspaceInstallation, + environ: dict[str, str] | None = None, + ): + self._ws = ws + self._prompts = prompts + self._installation = installation + self._install_state = install_state + self._product_info = product_info + self._resource_configurator = resource_configurator + self._ws_installation = workspace_installation + + if not environ: + environ = dict(os.environ.items()) + + if "DATABRICKS_RUNTIME_VERSION" in environ: + msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime" + raise SystemExit(msg) + + def run( + self, + config: RemorphConfigs | None = None, + ) -> RemorphConfigs: + self.install_rct() + self.install_morpheus() + logger.info(f"Installing Remorph v{self._product_info.version()}") + if not config: + config = self.configure() + if self._is_testing(): + return config + self._ws_installation.install(config) + logger.info("Installation completed successfully! Please refer to the documentation for the next steps.") + return config + + @classmethod + def install_rct(cls): + RCTInstaller.install() + + @classmethod + def install_morpheus(cls): + MorpheusInstaller.install() def configure(self, module: str | None = None) -> RemorphConfigs: selected_module = module or self._prompts.choice("Select a module to configure:", MODULES) diff --git a/src/databricks/labs/remorph/resources/transpilers/mct/config.yml b/src/databricks/labs/remorph/resources/transpilers/mct/config.yml index 51531ced13..c6ea81d747 100644 --- a/src/databricks/labs/remorph/resources/transpilers/mct/config.yml +++ b/src/databricks/labs/remorph/resources/transpilers/mct/config.yml @@ -1,5 +1,6 @@ remorph: version: 1 + name: Remorph Community Transpiler dialects: - athena - bigquery diff --git a/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml b/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml index c5da5f44b5..1ed9a77f9d 100644 --- a/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml +++ b/src/databricks/labs/remorph/resources/transpilers/morpheus/config.yml @@ -1,5 +1,6 @@ remorph: version: 1 + name: Morpheus dialects: - snowflake - tsql diff --git a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py index 7dc82e7ad7..ab74855620 100644 --- a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py +++ b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py @@ -56,6 +56,7 @@ @dataclass class _LSPRemorphConfigV1: + name: str dialects: list[str] env_vars: dict[str, str] command_line: list[str] @@ -65,17 +66,43 @@ def parse(cls, data: dict[str, Any]) -> _LSPRemorphConfigV1: version = data.get("version", 0) if version != 1: raise ValueError(f"Unsupported transpiler config version: {version}") + name: str | None = data.get("name", None) + if not name: + raise ValueError("Missing 'name' entry") dialects = data.get("dialects", []) if len(dialects) == 0: - raise ValueError("Missing dialects entry") + raise ValueError("Missing 'dialects' entry") env_list = data.get("environment", []) env_vars: dict[str, str] = {} for env_var in env_list: env_vars = env_vars | env_var command_line = data.get("command_line", []) if len(command_line) == 0: - raise ValueError("Missing command_line entry") - return _LSPRemorphConfigV1(dialects, env_vars, command_line) + raise ValueError("Missing 'command_line' entry") + return _LSPRemorphConfigV1(name, dialects, env_vars, command_line) + +@dataclass +class LSPConfig: + remorph: _LSPRemorphConfigV1 + custom: dict[str, Any] + + @property + def name(self): + return self.remorph.name + + @classmethod + def load(cls, path: Path) -> LSPConfig: + yaml_text = path.read_text() + data = yaml.safe_load(yaml_text) + if not isinstance(data, dict): + raise ValueError(f"Invalid transpiler config, expecting a dict, got a {type(data).__name__}") + remorph_data = data.get("remorph", None) + if not isinstance(remorph_data, dict): + raise ValueError(f"Invalid transpiler config, expecting a 'remorph' dict entry, got {remorph_data}") + remorph = _LSPRemorphConfigV1.parse(remorph_data) + custom = data.get("custom", {}) + return LSPConfig(remorph, custom) + def lsp_feature( @@ -304,31 +331,18 @@ class LSPEngine(TranspileEngine): @classmethod def from_config_path(cls, config_path: Path) -> LSPEngine: - config, custom = cls._load_config(config_path) - return LSPEngine(config_path.parent, config, custom) - - @classmethod - def _load_config(cls, config_path: Path) -> tuple[_LSPRemorphConfigV1, dict[str, Any]]: - yaml_text = config_path.read_text() - data = yaml.safe_load(yaml_text) - if not isinstance(data, dict): - raise ValueError(f"Invalid transpiler config, expecting a dict, got a {type(data).__name__}") - remorph = data.get("remorph", None) - if not isinstance(remorph, dict): - raise ValueError(f"Invalid transpiler config, expecting a 'remorph' dict entry, got {remorph}") - config = _LSPRemorphConfigV1.parse(remorph) - return config, data.get("custom", {}) + config = LSPConfig.load(config_path) + return LSPEngine(config_path.parent, config) - def __init__(self, workdir: Path, config: _LSPRemorphConfigV1, custom: dict[str, Any]): + def __init__(self, workdir: Path, config: LSPConfig): self._workdir = workdir self._config = config - self._custom = custom self._client = _LanguageClient() self._init_response: InitializeResult | None = None @property def supported_dialects(self) -> list[str]: - return self._config.dialects + return self._config.remorph.dialects @property def server_has_transpile_capability(self) -> bool: @@ -347,11 +361,11 @@ async def initialize(self, config: TranspileConfig) -> None: os.chdir(cwd) async def _do_initialize(self, config: TranspileConfig) -> None: - executable = self._config.command_line[0] + executable = self._config.remorph.command_line[0] env = deepcopy(os.environ) - for name, value in self._config.env_vars.items(): + for name, value in self._config.remorph.env_vars.items(): env[name] = value - args = self._config.command_line[1:] + args = self._config.remorph.command_line[1:] await self._client.start_io(executable, env=env, *args) input_path = config.input_path root_path = input_path if input_path.is_dir() else input_path.parent @@ -370,7 +384,7 @@ def _initialization_options(self, config: TranspileConfig): "remorph": { "source-dialect": config.source_dialect, }, - "custom": self._custom, + "custom": self._config.custom, } async def shutdown(self): diff --git a/tests/integration/test_install.py b/tests/integration/test_install.py index 79b2921405..e7361ae554 100644 --- a/tests/integration/test_install.py +++ b/tests/integration/test_install.py @@ -1,26 +1,27 @@ import os +import shutil from pathlib import Path -from tempfile import TemporaryFile +from tempfile import TemporaryFile, TemporaryDirectory import pytest -from databricks.labs.remorph.install import WorkspaceInstaller +from databricks.labs.remorph.install import TranspilerInstaller @pytest.mark.skipif(os.environ.get("CI", "false") == "true", reason="Skipping in CI since we have no installed product") def test_gets_installed_version(): - version = WorkspaceInstaller.get_installed_version("remorph", False) + version = TranspilerInstaller.get_installed_version("remorph", False) check_valid_version(version) def test_gets_maven_version(): - version = WorkspaceInstaller.get_maven_version("com.databricks", "databricks-connect") + version = TranspilerInstaller.get_maven_version("com.databricks", "databricks-connect") check_valid_version(version) def test_downloads_from_maven(): path = Path(str(TemporaryFile())) - result = WorkspaceInstaller.download_from_maven( + result = TranspilerInstaller.download_from_maven( "com.databricks", "databricks-connect", "16.0.0", path, extension="pom" ) assert result == 0 @@ -28,6 +29,40 @@ def test_downloads_from_maven(): assert path.stat().st_size == 5_684 +@pytest.fixture() +def mock_transpiler_folder(): + with TemporaryDirectory() as tmpdir: + folder = Path(tmpdir) + folder.mkdir(exist_ok=True) + for transpiler in { "mct", "morpheus" }: + target = folder / transpiler + target.mkdir(exist_ok=True) + target = target / "config.yml" + source = TranspilerInstaller.resources_folder() / transpiler / "config.yml" + shutil.copyfile(str(source), str(target)) + yield folder + + +def test_lists_all_transpiler_names(mock_transpiler_folder): + TranspilerInstaller.transpilers_path = lambda: mock_transpiler_folder + transpiler_names = TranspilerInstaller.all_transpiler_names() + assert transpiler_names == {'Morpheus', 'Remorph Community Transpiler'} + + +def test_lists_all_dialects(mock_transpiler_folder): + TranspilerInstaller.transpilers_path = lambda: mock_transpiler_folder + dialects = TranspilerInstaller.all_dialects() + assert dialects == { "athena", "bigquery", "mysql", "netezza", "oracle", "postgresql", "presto", "redshift", "snowflake", "sqlite", "teradata", "trino", "tsql", "vertica" } + + +def test_lists_dialect_transpilers(mock_transpiler_folder): + TranspilerInstaller.transpilers_path = lambda: mock_transpiler_folder + transpilers = TranspilerInstaller.transpilers_with_dialect("snowflake") + assert transpilers == {'Morpheus', 'Remorph Community Transpiler'} + transpilers = TranspilerInstaller.transpilers_with_dialect("presto") + assert transpilers == {'Remorph Community Transpiler'} + + def check_valid_version(version: str): parts = version.split(".") for _, part in enumerate(parts): diff --git a/tests/resources/lsp_transpiler/lsp_config.yml b/tests/resources/lsp_transpiler/lsp_config.yml index 252951068d..2fc8a2b85b 100644 --- a/tests/resources/lsp_transpiler/lsp_config.yml +++ b/tests/resources/lsp_transpiler/lsp_config.yml @@ -1,5 +1,6 @@ remorph: version: 1 + name: test-transpiler dialects: - snowflake environment: diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index e16ac613c5..ab46a8ee4c 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -35,11 +35,11 @@ class TestWorkspaceInstaller(WorkspaceInstaller): # TODO the below methods currently raise a 404 because the artifacts don't exist yet # TODO remove this once they are available !!! @classmethod - def install_morpheus(cls): + def install_rct(cls): pass @classmethod - def install_community_transpiler(cls): + def install_morpheus(cls): pass def installer(*args, **kwargs) -> WorkspaceInstaller: diff --git a/tests/unit/transpiler/test_lsp_config.py b/tests/unit/transpiler/test_lsp_config.py index ea14d913fb..6263a18e5d 100644 --- a/tests/unit/transpiler/test_lsp_config.py +++ b/tests/unit/transpiler/test_lsp_config.py @@ -19,6 +19,7 @@ def test_valid_config(): VALID_CONFIG: dict[str, Any] = yaml.safe_load( """remorph: version: 1 + name: test-transpiler dialects: - snowflake - oracle @@ -38,10 +39,12 @@ def test_valid_config(): [ ("version", None, "Unsupported transpiler config version"), ("version", 0, "Unsupported transpiler config version"), - ("dialects", None, "Missing dialects entry"), - ("dialects", [], "Missing dialects entry"), - ("command_line", None, "Missing command_line entry"), - ("command_line", [], "Missing command_line entry"), + ("name", None, "Missing 'name' entry"), + ("name", "", "Missing 'name' entry"), + ("dialects", None, "Missing 'dialects' entry"), + ("dialects", [], "Missing 'dialects' entry"), + ("command_line", None, "Missing 'command_line' entry"), + ("command_line", [], "Missing 'command_line' entry"), ], ) def test_invalid_config_raises_error(key, value, message): diff --git a/tests/unit/transpiler/test_snow.py b/tests/unit/transpiler/test_snow.py index a03c7f93a1..4c21185c75 100644 --- a/tests/unit/transpiler/test_snow.py +++ b/tests/unit/transpiler/test_snow.py @@ -1,5 +1,5 @@ """ - Test Cases to validate source Snowflake dialect +Test Cases to validate source Snowflake dialect """ From c24cab154646d572d5ef7ef04ac5bb92cecf7bc4 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 3 Feb 2025 20:30:59 +0100 Subject: [PATCH 08/10] formatting --- src/databricks/labs/remorph/install.py | 35 +++++++++++-------- .../labs/remorph/transpiler/lsp/lsp_engine.py | 2 +- tests/integration/test_install.py | 19 ++++++++-- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/src/databricks/labs/remorph/install.py b/src/databricks/labs/remorph/install.py index 8dd45ea647..f6da87769d 100644 --- a/src/databricks/labs/remorph/install.py +++ b/src/databricks/labs/remorph/install.py @@ -1,6 +1,6 @@ import abc -import configparser import dataclasses +from collections.abc import Iterable from json import loads, dumps import logging import os @@ -14,7 +14,6 @@ from datetime import datetime from pathlib import Path -import yaml from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.installation import SerdeError @@ -134,27 +133,19 @@ def install_from_pypi(cls, product_name: str, pypi_name: str): renamed.rename(product_path.name) @classmethod - def all_transpiler_configs(cls) -> dict[str, LSPConfig]: - all_files = os.listdir(cls.transpilers_path()) - all_configs = [cls._transpiler_config(cls.transpilers_path() / file) for file in all_files] - return { config.name: config for config in filter(lambda _: _ is not None, all_configs)} + def all_transpiler_configs(cls) -> dict[str, LSPConfig]: + all_configs = cls._all_transpiler_configs() + return {config.name: config for config in all_configs} @classmethod def all_transpiler_names(cls) -> set[str]: all_configs = cls.all_transpiler_configs() return set(all_configs.keys()) - @classmethod - def _transpiler_config(cls, path: Path) -> LSPConfig | None: - try: - return LSPConfig.load(path / "config.yml") - except: - return None - @classmethod def all_dialects(cls): all_dialects: set[str] = set() - for config in cls.all_transpiler_configs().values(): + for config in cls._all_transpiler_configs(): all_dialects = all_dialects.union(config.remorph.dialects) return all_dialects @@ -163,6 +154,20 @@ def transpilers_with_dialect(cls, dialect: str) -> set[str]: configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values()) return set(config.name for config in configs) + @classmethod + def _all_transpiler_configs(cls) -> Iterable[LSPConfig]: + all_files = os.listdir(cls.transpilers_path()) + for file in all_files: + config = cls._transpiler_config(cls.transpilers_path() / file) + if config: + yield config + + @classmethod + def _transpiler_config(cls, path: Path) -> LSPConfig | None: + try: + return LSPConfig.load(path / "config.yml") + except ValueError: + return None class RCTInstaller(TranspilerInstaller): @@ -186,7 +191,7 @@ def install(cls): logger.info(f"Databricks Morpheus transpiler v{latest_version} already installed") return logger.info(f"Installing Databricks Morpheus transpiler v{latest_version}") - product_path = cls.TRANSPILERS_PATH / cls.MORPHEUS_TRANSPILER_NAME + product_path = cls.transpilers_path() / cls.MORPHEUS_TRANSPILER_NAME if current_version is not None: product_path.rename(f"{cls.MORPHEUS_TRANSPILER_NAME}-saved") install_path = product_path / "lib" diff --git a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py index ab74855620..f8f3c66b14 100644 --- a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py +++ b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py @@ -81,6 +81,7 @@ def parse(cls, data: dict[str, Any]) -> _LSPRemorphConfigV1: raise ValueError("Missing 'command_line' entry") return _LSPRemorphConfigV1(name, dialects, env_vars, command_line) + @dataclass class LSPConfig: remorph: _LSPRemorphConfigV1 @@ -104,7 +105,6 @@ def load(cls, path: Path) -> LSPConfig: return LSPConfig(remorph, custom) - def lsp_feature( name: str, options: Any | None = None, diff --git a/tests/integration/test_install.py b/tests/integration/test_install.py index e7361ae554..16feb511ae 100644 --- a/tests/integration/test_install.py +++ b/tests/integration/test_install.py @@ -34,7 +34,7 @@ def mock_transpiler_folder(): with TemporaryDirectory() as tmpdir: folder = Path(tmpdir) folder.mkdir(exist_ok=True) - for transpiler in { "mct", "morpheus" }: + for transpiler in ("mct", "morpheus"): target = folder / transpiler target.mkdir(exist_ok=True) target = target / "config.yml" @@ -52,7 +52,22 @@ def test_lists_all_transpiler_names(mock_transpiler_folder): def test_lists_all_dialects(mock_transpiler_folder): TranspilerInstaller.transpilers_path = lambda: mock_transpiler_folder dialects = TranspilerInstaller.all_dialects() - assert dialects == { "athena", "bigquery", "mysql", "netezza", "oracle", "postgresql", "presto", "redshift", "snowflake", "sqlite", "teradata", "trino", "tsql", "vertica" } + assert dialects == { + "athena", + "bigquery", + "mysql", + "netezza", + "oracle", + "postgresql", + "presto", + "redshift", + "snowflake", + "sqlite", + "teradata", + "trino", + "tsql", + "vertica", + } def test_lists_dialect_transpilers(mock_transpiler_folder): From 61f655d02f8770e718e9a7dacbc18fcb87231ac0 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Tue, 4 Feb 2025 13:44:11 +0100 Subject: [PATCH 09/10] evolve transpile installer to present lsp dialects and valid transpilers --- src/databricks/labs/remorph/install.py | 29 ++++- .../labs/remorph/transpiler/lsp/lsp_engine.py | 3 +- tests/unit/test_install.py | 121 ++++++++++-------- 3 files changed, 93 insertions(+), 60 deletions(-) diff --git a/src/databricks/labs/remorph/install.py b/src/databricks/labs/remorph/install.py index f6da87769d..4ccfafa86c 100644 --- a/src/databricks/labs/remorph/install.py +++ b/src/databricks/labs/remorph/install.py @@ -36,7 +36,6 @@ from databricks.labs.remorph.deployment.installation import WorkspaceInstallation from databricks.labs.remorph.reconcile.constants import ReconReportType, ReconSourceType from databricks.labs.remorph.transpiler.lsp.lsp_engine import LSPConfig -from databricks.labs.remorph.transpiler.sqlglot.dialect_utils import SQLGLOT_DIALECTS logger = logging.getLogger(__name__) @@ -154,6 +153,11 @@ def transpilers_with_dialect(cls, dialect: str) -> set[str]: configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values()) return set(config.name for config in configs) + @classmethod + def transpiler_config_path(cls, transpiler_name): + config = cls.all_transpiler_configs()[transpiler_name] + return f"{config.path!s}" + @classmethod def _all_transpiler_configs(cls) -> Iterable[LSPConfig]: all_files = os.listdir(cls.transpilers_path()) @@ -170,6 +174,7 @@ def _transpiler_config(cls, path: Path) -> LSPConfig | None: return None + class RCTInstaller(TranspilerInstaller): RCT_TRANSPILER_NAME = "remorph-community-transpiler" RCT_TRANSPILER_PYPI_NAME = f"databricks-labs-{RCT_TRANSPILER_NAME}" @@ -330,10 +335,26 @@ def _configure_new_transpile_installation(self) -> TranspileConfig: self._save_config(config) return config + def _all_installed_dialects(self): + return sorted(TranspilerInstaller.all_dialects()) + + def _transpilers_with_dialect(self, dialect: str): + return sorted(TranspilerInstaller.transpilers_with_dialect(dialect)) + + def _transpiler_config_path(self, transpiler: str): + return TranspilerInstaller.transpiler_config_path(transpiler) + def _prompt_for_new_transpile_installation(self) -> TranspileConfig: logger.info("Please answer a few questions to configure remorph `transpile`") - transpiler = self._prompts.question("Enter path to the transpiler configuration file", default="sqlglot") - source_dialect = self._prompts.choice("Select the source dialect:", list(SQLGLOT_DIALECTS.keys())) + all_dialects = self._all_installed_dialects() + source_dialect = self._prompts.choice("Select the source dialect:", all_dialects) + transpilers = self._transpilers_with_dialect(source_dialect) + if len(transpilers) > 1: + transpiler_name = self._prompts.choice("Select the transpiler:", transpilers) + else: + transpiler_name = next(t for t in transpilers) + logger.info(f"Remorph will use the {transpiler_name} transpiler") + transpiler_config_path = self._transpiler_config_path(transpiler_name) input_source = self._prompts.question("Enter input SQL path (directory/file)") output_folder = self._prompts.question("Enter output directory", default="transpiled") error_file_path = self._prompts.question("Enter error file path", default="errors.log") @@ -342,7 +363,7 @@ def _prompt_for_new_transpile_installation(self) -> TranspileConfig: ) return TranspileConfig( - transpiler_config_path=transpiler, + transpiler_config_path=transpiler_config_path, source_dialect=source_dialect, skip_validation=(not run_validation), mode="current", # mode will not have a prompt as this is a hidden flag diff --git a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py index f8f3c66b14..d1d504ff11 100644 --- a/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py +++ b/src/databricks/labs/remorph/transpiler/lsp/lsp_engine.py @@ -84,6 +84,7 @@ def parse(cls, data: dict[str, Any]) -> _LSPRemorphConfigV1: @dataclass class LSPConfig: + path: Path remorph: _LSPRemorphConfigV1 custom: dict[str, Any] @@ -102,7 +103,7 @@ def load(cls, path: Path) -> LSPConfig: raise ValueError(f"Invalid transpiler config, expecting a 'remorph' dict entry, got {remorph_data}") remorph = _LSPRemorphConfigV1.parse(remorph_data) custom = data.get("custom", {}) - return LSPConfig(remorph, custom) + return LSPConfig(path, remorph, custom) def lsp_feature( diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index ab46a8ee4c..4c61b27c89 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -13,7 +13,6 @@ from databricks.labs.remorph.config import TranspileConfig from databricks.labs.blueprint.wheels import ProductInfo, WheelsV2 from databricks.labs.remorph.reconcile.constants import ReconSourceType, ReconReportType -from databricks.labs.remorph.transpiler.sqlglot.dialect_utils import SQLGLOT_DIALECTS RECONCILE_DATA_SOURCES = sorted([source_type.value for source_type in ReconSourceType]) RECONCILE_REPORT_TYPES = sorted([report_type.value for report_type in ReconReportType]) @@ -27,6 +26,9 @@ def ws(): ) return w +ALL_INSTALLED_DIALECTS = sorted([ "tsql", "snowflake" ]) +TRANSPILERS_FOR_SNOWFLAKE = sorted([ "Remorph Community Transpiler", "Morpheus" ]) +PATH_TO_TRANSPILER_COMFIG = "/some/path/to/config.yml" @pytest.fixture() def ws_installer(): @@ -42,6 +44,15 @@ def install_rct(cls): def install_morpheus(cls): pass + def _all_installed_dialects(self): + return ALL_INSTALLED_DIALECTS + + def _transpilers_with_dialect(self, dialect): + return TRANSPILERS_FOR_SNOWFLAKE + + def _transpiler_config_path(self, transpiler): + return PATH_TO_TRANSPILER_COMFIG + def installer(*args, **kwargs) -> WorkspaceInstaller: return TestWorkspaceInstaller(*args, **kwargs) @@ -135,8 +146,8 @@ def test_workspace_installer_run_install_called_with_generated_config(ws_install { r"Select a module to configure:": MODULES.index("transpile"), r"Do you want to override the existing installation?": "no", - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source dialect": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -167,26 +178,26 @@ def test_workspace_installer_run_install_called_with_generated_config(ws_install "config.yml", { "catalog_name": "remorph", - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, + "source_dialect": "snowflake", "input_source": "/tmp/queries/snow", "mode": "current", "output_folder": "/tmp/queries/databricks", "error_file_path": "/tmp/queries/errors.log", "schema_name": "transpiler", "skip_validation": True, - "source_dialect": "snowflake", - "version": 2, + "version": 2, }, ) -def test_configure_transpile_no_existing_installation(ws): +def test_configure_transpile_no_existing_installation(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), r"Do you want to override the existing installation?": "no", - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -202,7 +213,7 @@ def test_configure_transpile_no_existing_installation(ws): resource_configurator=create_autospec(ResourceConfigurator), workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -213,7 +224,7 @@ def test_configure_transpile_no_existing_installation(ws): ) config = workspace_installer.configure() expected_morph_config = TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -229,7 +240,7 @@ def test_configure_transpile_no_existing_installation(ws): "config.yml", { "catalog_name": "remorph", - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "input_source": "/tmp/queries/snow", "mode": "current", "output_folder": "/tmp/queries/databricks", @@ -257,7 +268,7 @@ def test_configure_transpile_installation_no_override(ws): installation=MockInstallation( { "config.yml": { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "source_dialect": "snowflake", "catalog_name": "transpiler_test", "input_source": "sf_queries", @@ -285,13 +296,13 @@ def test_configure_transpile_installation_no_override(ws): workspace_installer.configure() -def test_configure_transpile_installation_config_error_continue_install(ws): +def test_configure_transpile_installation_config_error_continue_install(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), r"Do you want to override the existing installation?": "no", - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -302,7 +313,7 @@ def test_configure_transpile_installation_config_error_continue_install(ws): installation = MockInstallation( { "config.yml": { - "invalid_transpiler": "sqlglot", # Invalid key + "invalid_transpiler": "some value", # Invalid key "source_dialect": "snowflake", "catalog_name": "transpiler_test", "input_source": "sf_queries", @@ -323,7 +334,7 @@ def test_configure_transpile_installation_config_error_continue_install(ws): resource_configurator=create_autospec(ResourceConfigurator), workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -334,7 +345,7 @@ def test_configure_transpile_installation_config_error_continue_install(ws): ) config = workspace_installer.configure() expected_morph_config = TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -349,7 +360,7 @@ def test_configure_transpile_installation_config_error_continue_install(ws): installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph", "input_source": "/tmp/queries/snow", "mode": "current", @@ -364,12 +375,12 @@ def test_configure_transpile_installation_config_error_continue_install(ws): @patch("webbrowser.open") -def test_configure_transpile_installation_with_no_validation(ws): +def test_configure_transpile_installation_with_no_validation(ws, ws_installer): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source dialect": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -386,7 +397,7 @@ def test_configure_transpile_installation_with_no_validation(ws): workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -397,7 +408,7 @@ def test_configure_transpile_installation_with_no_validation(ws): ) config = workspace_installer.configure() expected_morph_config = TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -412,7 +423,7 @@ def test_configure_transpile_installation_with_no_validation(ws): installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph", "input_source": "/tmp/queries/snow", "mode": "current", @@ -426,12 +437,12 @@ def test_configure_transpile_installation_with_no_validation(ws): ) -def test_configure_transpile_installation_with_validation_and_cluster_id_in_config(ws): +def test_configure_transpile_installation_with_validation_and_cluster_id_in_config(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -455,7 +466,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_in_conf workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -467,7 +478,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_in_conf config = workspace_installer.configure() expected_config = RemorphConfigs( transpile=TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -482,7 +493,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_in_conf installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph_test", "input_source": "/tmp/queries/snow", "mode": "current", @@ -496,12 +507,12 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_in_conf ) -def test_configure_transpile_installation_with_validation_and_cluster_id_from_prompt(ws): +def test_configure_transpile_installation_with_validation_and_cluster_id_from_prompt(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -526,7 +537,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_from_pr workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -538,7 +549,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_from_pr config = workspace_installer.configure() expected_config = RemorphConfigs( transpile=TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -553,7 +564,7 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_from_pr installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph_test", "input_source": "/tmp/queries/snow", "mode": "current", @@ -567,12 +578,12 @@ def test_configure_transpile_installation_with_validation_and_cluster_id_from_pr ) -def test_configure_transpile_installation_with_validation_and_warehouse_id_from_prompt(ws): +def test_configure_transpile_installation_with_validation_and_warehouse_id_from_prompt(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("transpile"), - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -595,7 +606,7 @@ def test_configure_transpile_installation_with_validation_and_warehouse_id_from_ workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -607,7 +618,7 @@ def test_configure_transpile_installation_with_validation_and_warehouse_id_from_ config = workspace_installer.configure() expected_config = RemorphConfigs( transpile=TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -622,7 +633,7 @@ def test_configure_transpile_installation_with_validation_and_warehouse_id_from_ installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph_test", "input_source": "/tmp/queries/snow", "mode": "current", @@ -859,13 +870,13 @@ def test_configure_reconcile_no_existing_installation(ws): ) -def test_configure_all_override_installation(ws): +def test_configure_all_override_installation(ws_installer, ws): prompts = MockPrompts( { r"Select a module to configure:": MODULES.index("all"), r"Do you want to override the existing installation?": "yes", - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file path.*": "/tmp/queries/errors.log", @@ -883,7 +894,7 @@ def test_configure_all_override_installation(ws): installation = MockInstallation( { "config.yml": { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "source_dialect": "snowflake", "catalog_name": "transpiler_test", "input_source": "sf_queries", @@ -928,7 +939,7 @@ def test_configure_all_override_installation(ws): workspace_installation=create_autospec(WorkspaceInstallation), ) - workspace_installer = WorkspaceInstaller( + workspace_installer = ws_installer( ctx.workspace_client, ctx.prompts, ctx.installation, @@ -939,7 +950,7 @@ def test_configure_all_override_installation(ws): ) config = workspace_installer.configure() expected_transpile_config = TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", @@ -971,7 +982,7 @@ def test_configure_all_override_installation(ws): installation.assert_file_written( "config.yml", { - "transpiler_config_path": "sqlglot", + "transpiler_config_path": PATH_TO_TRANSPILER_COMFIG, "catalog_name": "remorph", "input_source": "/tmp/queries/snow", "mode": "current", @@ -1017,7 +1028,7 @@ def test_runs_upgrades_on_more_recent_version(ws_installer, ws): } }, 'config.yml': { - "transpiler-config-path": "sqlglot", + "transpiler-config-path": PATH_TO_TRANSPILER_COMFIG, "source_dialect": "snowflake", "catalog_name": "upgrades", "input_source": "queries", @@ -1037,8 +1048,8 @@ def test_runs_upgrades_on_more_recent_version(ws_installer, ws): { r"Select a module to configure:": MODULES.index("transpile"), r"Do you want to override the existing installation?": "yes", - r"Enter path to the transpiler configuration file": "sqlglot", - r"Select the source": sorted(SQLGLOT_DIALECTS.keys()).index("snowflake"), + r"Select the source dialect": ALL_INSTALLED_DIALECTS.index("snowflake"), + r"Select the transpiler": TRANSPILERS_FOR_SNOWFLAKE.index("Morpheus"), r"Enter input SQL path.*": "/tmp/queries/snow", r"Enter output directory.*": "/tmp/queries/databricks", r"Enter error file.*": "/tmp/queries/errors.log", @@ -1073,7 +1084,7 @@ def test_runs_upgrades_on_more_recent_version(ws_installer, ws): mock_workspace_installation.install.assert_called_once_with( RemorphConfigs( transpile=TranspileConfig( - transpiler_config_path="sqlglot", + transpiler_config_path=PATH_TO_TRANSPILER_COMFIG, source_dialect="snowflake", input_source="/tmp/queries/snow", output_folder="/tmp/queries/databricks", From 1c08c6f34799b779d52d7fbaddaf20cf3b848905 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Fri, 7 Feb 2025 15:22:51 +0100 Subject: [PATCH 10/10] fix typo --- tests/integration/test_install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_install.py b/tests/integration/test_install.py index 16feb511ae..5287ce0636 100644 --- a/tests/integration/test_install.py +++ b/tests/integration/test_install.py @@ -34,7 +34,7 @@ def mock_transpiler_folder(): with TemporaryDirectory() as tmpdir: folder = Path(tmpdir) folder.mkdir(exist_ok=True) - for transpiler in ("mct", "morpheus"): + for transpiler in ("rct", "morpheus"): target = folder / transpiler target.mkdir(exist_ok=True) target = target / "config.yml"