Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent register_launch_plan from re-registering already registered workflow #3049

Merged
merged 4 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 59 additions & 12 deletions flytekit/remote/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,12 @@ async def _serialize_and_register(
loop.run_in_executor(
None,
functools.partial(
self.raw_register, cp_entity, serialization_settings, version, og_entity=task_entity
self.raw_register,
cp_entity,
serialization_settings,
version,
create_default_launchplan=create_default_launchplan,
og_entity=task_entity,
),
)
)
Expand Down Expand Up @@ -1244,6 +1249,27 @@ def register_script(
return self.register_launch_plan(entity, version, project, domain, options, serialization_settings)
raise ValueError(f"Unsupported entity type {type(entity)}")

def _wf_exists(
self,
name: str,
version: str,
project: str,
domain: str,
) -> bool:
"""Does the workflow with the given id components exist?"""
workflow_id = Identifier(
resource_type=ResourceType.WORKFLOW,
project=project,
domain=domain,
name=name,
version=version,
)
try:
self.client.get_workflow(workflow_id)
return True
except FlyteEntityNotExistException:
return False
Comment on lines +1270 to +1271
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider broader exception handling for API call

Consider adding error handling for other potential exceptions besides FlyteEntityNotExistException when calling get_workflow. Network errors or other API issues could occur.

Code suggestion
Check the AI-generated fix before applying
Suggested change
except FlyteEntityNotExistException:
return False
except FlyteEntityNotExistException:
return False
except Exception as e:
logger.error(f"Error checking workflow existence: {e}")
return False

Code Review Run #8550a9


Is this a valid issue, or was it incorrectly flagged by the Agent?

  • it was incorrectly flagged


def register_launch_plan(
self,
entity: LaunchPlan,
Expand All @@ -1254,14 +1280,16 @@ def register_launch_plan(
serialization_settings: typing.Optional[SerializationSettings] = None,
) -> FlyteLaunchPlan:
"""
Register a given launchplan, possibly applying overrides from the provided options.
Register a given launchplan, possibly applying overrides from the provided options. If the underlying workflow
is not already registered, it, along with any underlying entities, will also be registered. If the underlying
workflow does exist (with the given project/domain/version), then only the launchplan will be registered.

:param entity: Launchplan to be registered
:param version:
:param version: Version to be registered for the launch plan, and used to check (and register) underlying wf
:param project: Optionally provide a project, if not already provided in flyteremote constructor or a separate one
:param domain: Optionally provide a domain, if not already provided in FlyteRemote constructor or a separate one
:param serialization_settings: Optionally provide serialization settings, if not provided, will use the default
:param options:
:return:
"""
if serialization_settings is None:
_, _, _, module_file = extract_task_module(entity.workflow)
Expand All @@ -1271,16 +1299,35 @@ def register_launch_plan(
source_root=project_root,
project=project or self.default_project,
domain=domain or self.default_domain,
version=version,
)

if self._wf_exists(
name=entity.workflow.name,
version=version,
project=serialization_settings.project,
domain=serialization_settings.domain,
):
# Underlying workflow, exists, only register the launch plan itself
launch_plan_model = get_serializable(
OrderedDict(), settings=serialization_settings, entity=entity, options=options
)
ident = self.raw_register(
launch_plan_model, serialization_settings, version, create_default_launchplan=False
)
if ident is None:
raise ValueError("Failed to register launch plan, identifier returned was empty...")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider more specific exception type

Consider using a more specific exception type like FlyteRegistrationException instead of ValueError when handling registration failures. This would make error handling more targeted.

Code suggestion
Check the AI-generated fix before applying
Suggested change
raise ValueError("Failed to register launch plan, identifier returned was empty...")
raise FlyteRegistrationException("Failed to register launch plan, identifier returned was empty...")

Code Review Run #8550a9


Is this a valid issue, or was it incorrectly flagged by the Agent?

  • it was incorrectly flagged

else:
# Register the launch and everything under it
ident = run_sync(
self._serialize_and_register,
entity,
serialization_settings,
version,
options,
False,
)

ident = run_sync(
self._serialize_and_register,
entity,
serialization_settings,
version,
options,
False,
)
flp = self.fetch_launch_plan(ident.project, ident.domain, ident.name, ident.version)
flp.python_interface = entity.python_interface
return flp
Expand Down
40 changes: 40 additions & 0 deletions tests/flytekit/unit/remote/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,3 +852,43 @@ def workflow1():
registered_workflow = rr.register_workflow(workflow1, ss)
assert isinstance(registered_workflow, FlyteWorkflow)
assert registered_workflow.id == Identifier(ResourceType.WORKFLOW, "flytesnacks", "development", "tests.flytekit.unit.remote.test_remote.workflow1", "dummy_version")


@mock.patch("flytekit.remote.remote.get_serializable")
@mock.patch("flytekit.remote.remote.FlyteRemote.fetch_launch_plan")
@mock.patch("flytekit.remote.remote.FlyteRemote.raw_register")
@mock.patch("flytekit.remote.remote.FlyteRemote._serialize_and_register")
@mock.patch("flytekit.remote.remote.FlyteRemote.client")
def test_register_launch_plan(mock_client, mock_serialize_and_register, mock_raw_register,mock_fetch_launch_plan, mock_get_serializable):
serialization_settings = SerializationSettings(
image_config=ImageConfig.auto_default_image(),
version="dummy_version",
)

rr = FlyteRemote(
Config.for_sandbox(),
default_project="flytesnacks",
default_domain="development",
)

@task
def say_hello() -> str:
return "Hello, World!"

@workflow
def hello_world_wf() -> str:
res = say_hello()
return res

lp = LaunchPlan.get_or_create(workflow=hello_world_wf, name="additional_lp_for_hello_world", default_inputs={})

mock_get_serializable.return_value = MagicMock()
mock_client.get_workflow.return_value = MagicMock()

mock_remote_lp = MagicMock()
mock_fetch_launch_plan.return_value = mock_remote_lp

remote_lp = rr.register_launch_plan(lp, version="dummy_version", project="flytesnacks", domain="development", serialization_settings=serialization_settings)
assert remote_lp is mock_remote_lp
assert not mock_serialize_and_register.called
assert mock_raw_register.called
Loading