Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

install remorph-community-transpiler and morpheus #1432

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions src/databricks/labs/remorph/install.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import dataclasses
from json import loads, dumps
import logging
import os
from shutil import rmtree, move
from subprocess import run, CalledProcessError
import sys
from typing import Any
from urllib import request
from urllib.error import URLError
import webbrowser
from datetime import datetime
from pathlib import Path

from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
from databricks.labs.blueprint.installation import Installation
Expand Down Expand Up @@ -30,6 +39,12 @@

TRANSPILER_WAREHOUSE_PREFIX = "Remorph Transpiler Validation"
MODULES = sorted({"transpile", "reconcile", "all"})
LABS_PATH = Path.home() / ".databricks" / "labs"
TRANSPILERS_PATH = LABS_PATH / "remorph-transpilers"
OSS_TRANSPILER_NAME = "remorph-community-transpiler"
OSS_TRANSPILER_PYPI_NAME = f"databricks-labs-{OSS_TRANSPILER_NAME}"
MORPHEUS_TRANSPILER_NAME = "morpheus"
MORPHEUS_TRANSPILER_GROUP_NAME = "com.databricks.labs"


class WorkspaceInstaller:
Expand Down Expand Up @@ -63,6 +78,8 @@ def run(
self,
config: RemorphConfigs | None = None,
) -> RemorphConfigs:
self.install_community_transpiler()
self.install_morpheus()
logger.info(f"Installing Remorph v{self._product_info.version()}")
if not config:
config = self.configure()
Expand All @@ -72,6 +89,115 @@ def run(
logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
return config

@classmethod
def install_morpheus(cls):
current_version = cls.get_installed_version(MORPHEUS_TRANSPILER_NAME)
latest_version = cls.get_maven_version(MORPHEUS_TRANSPILER_GROUP_NAME, MORPHEUS_TRANSPILER_NAME)
if current_version == latest_version:
logger.info(f"Databricks Morpheus transpiler v{latest_version} already installed")
return
logger.info(f"Installing Databricks Morpheus transpiler v{latest_version}")
product_path = TRANSPILERS_PATH / MORPHEUS_TRANSPILER_NAME
if current_version is not None:
product_path.rename(f"{MORPHEUS_TRANSPILER_NAME}-saved")
install_path = product_path / "lib"
install_path.mkdir()
return_code = cls.download_from_maven(
MORPHEUS_TRANSPILER_GROUP_NAME,
MORPHEUS_TRANSPILER_NAME,
latest_version,
install_path / f"{MORPHEUS_TRANSPILER_NAME}.jar",
)
if return_code == 0:
state_path = product_path / "state"
state_path.mkdir()
version_data = {"version": f"v{latest_version}", "date": str(datetime.now())}
version_path = state_path / "version.json"
version_path.write_text(dumps(version_data), "utf-8")
logger.info(f"Successfully installed Databricks Morpheus transpiler v{latest_version}")
if current_version is not None:
rmtree(f"{product_path!s}-saved")
else:
logger.info(f"Failed to install Databricks Morpheus transpiler v{latest_version}")
if current_version is not None:
rmtree(str(product_path))
renamed = Path(f"{product_path!s}-saved")
renamed.rename(product_path.name)

@classmethod
def download_from_maven(cls, group_id: str, artifact_id: str, version: str, target: Path, extension="jar"):
group_id = group_id.replace(".", "/")
url = f"https://search.maven.org/remotecontent?filepath={group_id}/{artifact_id}/{version}/{artifact_id}-{version}.{extension}"
try:
path, message = request.urlretrieve(url)
if path:
move(path, str(target))
return 0
logger.error(message)
return -1
except URLError as e:
logger.error("While downloading from maven", exc_info=e)
return -1

@classmethod
def install_community_transpiler(cls):
current_version = cls.get_installed_version(OSS_TRANSPILER_NAME)
latest_version = cls.get_pypi_version(OSS_TRANSPILER_PYPI_NAME)
if current_version == latest_version:
logger.info(f"Remorph community transpiler v{latest_version} already installed")
return
logger.info(f"Installing Remorph community transpiler v{latest_version}")
product_path = TRANSPILERS_PATH / OSS_TRANSPILER_NAME
if current_version is not None:
product_path.rename(f"{OSS_TRANSPILER_NAME}-saved")
install_path = product_path / "lib"
install_path.mkdir()
args = ["pip", "install", OSS_TRANSPILER_PYPI_NAME, "-t", str(install_path)]
state_path = product_path / "state"
state_path.mkdir()
version_data = {"version": f"v{latest_version}", "date": str(datetime.now())}
version_path = state_path / "version.json"
try:
run(args, sys.stdin, sys.stdout, sys.stderr, check=True)
version_path.write_text(dumps(version_data), "utf-8")
logger.info(f"Successfully installed Remorph community transpiler v{latest_version}")
if current_version is not None:
rmtree(f"{product_path!s}-saved")
except CalledProcessError as e:
logger.info(f"Failed to install Remorph community transpiler v{latest_version}", exc_info=e)
if current_version is not None:
rmtree(str(product_path))
renamed = Path(f"{product_path!s}-saved")
renamed.rename(product_path.name)

@classmethod
def get_maven_version(cls, group_id: str, artifact_id: str) -> str | None:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=1&wt=json"
with request.urlopen(url) as server:
text = server.read()
data: dict[str, Any] = loads(text)
return data.get("response", {}).get('docs', [{}])[0].get("v", None)

@classmethod
def get_pypi_version(cls, product_name: str) -> str | None:
with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server:
text = server.read()
data: dict[str, Any] = loads(text)
return data.get("info", {}).get('version', None)

@classmethod
def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None:
product_path = (TRANSPILERS_PATH if is_transpiler else LABS_PATH) / product_name
current_version_path = product_path / "state" / "version.json"
if not current_version_path.exists():
return None
text = current_version_path.read_text("utf-8")
data: dict[str, Any] = loads(text)
version: str | None = data.get("version", None)
if not version or not version.startswith("v"):
return None
return version[1:]

def configure(self, module: str | None = None) -> RemorphConfigs:
selected_module = module or self._prompts.choice("Select a module to configure:", MODULES)
match selected_module:
Expand Down
37 changes: 37 additions & 0 deletions tests/integration/test_install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
from pathlib import Path
from tempfile import TemporaryFile

import pytest

from databricks.labs.remorph.install import WorkspaceInstaller


@pytest.mark.skipif(os.environ.get("CI", "false") == "true", reason="Skipping in CI since we have no installed product")
def test_gets_installed_version():
version = WorkspaceInstaller.get_installed_version("remorph", False)
check_valid_version(version)


def test_gets_maven_version():
version = WorkspaceInstaller.get_maven_version("com.databricks", "databricks-connect")
check_valid_version(version)


def test_downloads_from_maven():
path = Path(str(TemporaryFile()))
result = WorkspaceInstaller.download_from_maven(
"com.databricks", "databricks-connect", "16.0.0", path, extension="pom"
)
assert result == 0
assert path.exists()
assert path.stat().st_size == 5_684


def check_valid_version(version: str):
parts = version.split(".")
for _, part in enumerate(parts):
try:
_ = int(part)
except ValueError:
assert False, f"{version} does not look like a valid semver"
36 changes: 28 additions & 8 deletions tests/unit/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,26 @@ def ws():
return w


@pytest.fixture()
def ws_installer():

class TestWorkspaceInstaller(WorkspaceInstaller):
# TODO the below methods currently raise a 404 because the artifacts don't exist yet
# TODO remove this once they are available !!!
@classmethod
def install_morpheus(cls):
pass

@classmethod
def install_community_transpiler(cls):
pass

def installer(*args, **kwargs) -> WorkspaceInstaller:
return TestWorkspaceInstaller(*args, **kwargs)

yield installer


def test_workspace_installer_run_raise_error_in_dbr(ws):
ctx = ApplicationContext(ws)
environ = {"DATABRICKS_RUNTIME_VERSION": "8.3.x-scala2.12"}
Expand All @@ -44,7 +64,7 @@ def test_workspace_installer_run_raise_error_in_dbr(ws):
)


def test_workspace_installer_run_install_not_called_in_test(ws):
def test_workspace_installer_run_install_not_called_in_test(ws_installer, ws):
ws_installation = create_autospec(WorkspaceInstallation)
ctx = ApplicationContext(ws)
ctx.replace(
Expand All @@ -54,7 +74,7 @@ def test_workspace_installer_run_install_not_called_in_test(ws):
)

provided_config = RemorphConfigs()
workspace_installer = WorkspaceInstaller(
workspace_installer = ws_installer(
ctx.workspace_client,
ctx.prompts,
ctx.installation,
Expand All @@ -68,15 +88,15 @@ def test_workspace_installer_run_install_not_called_in_test(ws):
ws_installation.install.assert_not_called()


def test_workspace_installer_run_install_called_with_provided_config(ws):
def test_workspace_installer_run_install_called_with_provided_config(ws_installer, ws):
ws_installation = create_autospec(WorkspaceInstallation)
ctx = ApplicationContext(ws)
ctx.replace(
resource_configurator=create_autospec(ResourceConfigurator),
workspace_installation=ws_installation,
)
provided_config = RemorphConfigs()
workspace_installer = WorkspaceInstaller(
workspace_installer = ws_installer(
ctx.workspace_client,
ctx.prompts,
ctx.installation,
Expand Down Expand Up @@ -110,7 +130,7 @@ def test_configure_error_if_invalid_module_selected(ws):
workspace_installer.configure(module="invalid_module")


def test_workspace_installer_run_install_called_with_generated_config(ws):
def test_workspace_installer_run_install_called_with_generated_config(ws_installer, ws):
prompts = MockPrompts(
{
r"Select a module to configure:": MODULES.index("transpile"),
Expand All @@ -133,7 +153,7 @@ def test_workspace_installer_run_install_called_with_generated_config(ws):
workspace_installation=create_autospec(WorkspaceInstallation),
)

workspace_installer = WorkspaceInstaller(
workspace_installer = ws_installer(
ctx.workspace_client,
ctx.prompts,
ctx.installation,
Expand Down Expand Up @@ -986,7 +1006,7 @@ def test_configure_all_override_installation(ws):
)


def test_runs_upgrades_on_more_recent_version(ws):
def test_runs_upgrades_on_more_recent_version(ws_installer, ws):
installation = MockInstallation(
{
'version.json': {'version': '0.3.0', 'wheel': '...', 'date': '...'},
Expand Down Expand Up @@ -1038,7 +1058,7 @@ def test_runs_upgrades_on_more_recent_version(ws):
wheels=wheels,
)

workspace_installer = WorkspaceInstaller(
workspace_installer = ws_installer(
ctx.workspace_client,
ctx.prompts,
ctx.installation,
Expand Down
Loading