Skip to content

Commit

Permalink
Merge branch 'master' into feature/cus-3571-powerbi-access-assets-by-id
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomezvillamor authored Jan 22, 2025
2 parents d7a26bc + 96758e2 commit 3d643f2
Show file tree
Hide file tree
Showing 18 changed files with 100 additions and 221 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3108,16 +3108,6 @@ private void configureDataProcessInstanceResolvers(final RuntimeWiring.Builder b
? dataProcessInstance.getDataPlatformInstance().getUrn()
: null;
}))
.dataFetcher(
"platform",
new LoadableTypeResolver<>(
dataPlatformType,
(env) -> {
final DataProcessInstance dataProcessInstance = env.getSource();
return dataProcessInstance.getPlatform() != null
? dataProcessInstance.getPlatform().getUrn()
: null;
}))
.dataFetcher("parentContainers", new ParentContainersResolver(entityClient))
.dataFetcher(
"container",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import com.linkedin.data.DataMap;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.DataPlatform;
import com.linkedin.datahub.graphql.generated.DataProcessInstance;
import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper;
Expand Down Expand Up @@ -80,10 +79,6 @@ public DataProcessInstance apply(
DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataMap);
dataProcessInstance.setDataPlatformInstance(
DataPlatformInstanceAspectMapper.map(context, dataPlatformInstance));
DataPlatform dataPlatform = new DataPlatform();
dataPlatform.setUrn(dataPlatformInstance.getPlatform().toString());
dataPlatform.setType(EntityType.DATA_PLATFORM);
dataProcessInstance.setPlatform(dataPlatform);
});
mappingHelper.mapToResult(
SUB_TYPES_ASPECT_NAME,
Expand Down
5 changes: 0 additions & 5 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -13029,11 +13029,6 @@ extend type DataProcessInstance {
"""
container: Container

"""
Standardized platform urn where the data process instance is defined
"""
platform: DataPlatform!

"""
Recursively get the lineage of containers for this entity
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,10 @@ public void testMapPlatformInstance() throws Exception {
DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse);

assertNotNull(instance.getDataPlatformInstance());
assertNotNull(instance.getPlatform());
assertEquals(instance.getPlatform().getUrn(), TEST_PLATFORM_URN);
assertEquals(instance.getPlatform().getType(), EntityType.DATA_PLATFORM);
assertNotNull(instance.getDataPlatformInstance().getPlatform());
assertEquals(instance.getDataPlatformInstance().getPlatform().getUrn(), TEST_PLATFORM_URN);
assertEquals(
instance.getDataPlatformInstance().getPlatform().getType(), EntityType.DATA_PLATFORM);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ export class DataProcessInstanceEntity implements Entity<DataProcessInstance> {
return {
name,
externalUrl,
platform: processInstance?.dataPlatformInstance?.platform,
};
};

Expand All @@ -174,9 +175,10 @@ export class DataProcessInstanceEntity implements Entity<DataProcessInstance> {
subType={data.subTypes?.typeNames?.[0]}
description=""
platformName={
data?.platform?.properties?.displayName || capitalizeFirstLetterOnly(data?.platform?.name)
data?.dataPlatformInstance?.platform?.properties?.displayName ||
capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name)
}
platformLogo={data.platform.properties?.logoUrl}
platformLogo={data?.dataPlatformInstance?.platform?.properties?.logoUrl}
owners={null}
globalTags={null}
// domain={data.domain?.domain}
Expand All @@ -201,9 +203,10 @@ export class DataProcessInstanceEntity implements Entity<DataProcessInstance> {
subType={data.subTypes?.typeNames?.[0]}
description=""
platformName={
data?.platform?.properties?.displayName || capitalizeFirstLetterOnly(data?.platform?.name)
data?.dataPlatformInstance?.platform?.properties?.displayName ||
capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name)
}
platformLogo={data.platform.properties?.logoUrl}
platformLogo={data.dataPlatformInstance?.platform?.properties?.logoUrl}
platformInstanceId={data.dataPlatformInstance?.instanceId}
owners={null}
globalTags={null}
Expand Down Expand Up @@ -231,8 +234,8 @@ export class DataProcessInstanceEntity implements Entity<DataProcessInstance> {
name: this.displayName(entity),
type: EntityType.DataProcessInstance,
subtype: entity?.subTypes?.typeNames?.[0],
icon: entity?.platform?.properties?.logoUrl || undefined,
platform: entity?.platform,
icon: entity?.dataPlatformInstance?.platform?.properties?.logoUrl || undefined,
platform: entity?.dataPlatformInstance?.platform,
container: entity?.container,
// health: entity?.health || undefined,
};
Expand Down
6 changes: 0 additions & 6 deletions datahub-web-react/src/graphql/dataProcessInstance.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ fragment processInstanceRelationshipResults on EntityRelationshipsResult {
fragment dataProcessInstanceFields on DataProcessInstance {
urn
type
platform {
...platformFields
}
parentContainers {
...parentContainersFields
}
Expand Down Expand Up @@ -125,9 +122,6 @@ query getDataProcessInstance($urn: String!) {
dataProcessInstance(urn: $urn) {
urn
type
platform {
...platformFields
}
parentContainers {
...parentContainersFields
}
Expand Down
2 changes: 1 addition & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ area.
## Quickstart

The easiest way to bring up and test DataHub is using DataHub [Docker](https://www.docker.com) images
which are continuously deployed to [Docker Hub](https://hub.docker.com/u/linkedin) with every commit to repository.
which are continuously deployed to [Docker Hub](https://hub.docker.com/u/acryldata) with every commit to repository.

You can easily download and run all these images and their dependencies with our
[quick start guide](../docs/quickstart.md).
Expand Down
16 changes: 16 additions & 0 deletions docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ export default function DocsVersionDropdownNavbarItem({
type: 'html',
value: '<div class="dropdown__link"><b>Archived versions</b></div>',
},
{
value: `
<a class="dropdown__link" href="https://docs-website-eue2qafvn-acryldata.vercel.app//docs/features">0.14.0
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
`,
type: "html",
},
{
value: `
<a class="dropdown__link" href="https://docs-website-psat3nzgi-acryldata.vercel.app/docs/features">0.13.1
<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
</a>
`,
type: "html",
},
{
value: `
<a class="dropdown__link" href="https://docs-website-lzxh86531-acryldata.vercel.app/docs/features">0.13.0
Expand Down
4 changes: 2 additions & 2 deletions docs-website/versions.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[
"0.14.0",
"0.13.1"
"0.15.0",
"0.14.1"
]
2 changes: 1 addition & 1 deletion docs/automations/bigquery-metadata-sync.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Ensure your service account has the following permissions:
| Propagation Type | DataHub Entity | BigQuery Entity | Note |
| -------- | ------- | ------- | ------- |
| Table Tags as Labels | [Table Tag](https://datahubproject.io/docs/tags/) | [BigQuery Label](https://cloud.google.com/bigquery/docs/labels-intro) | - |
| Column Glossary Terms as Policy Tags | [Glossary Term on Table Column](https://datahubproject.io/docs/0.14.0/glossary/business-glossary/) | [Policy Tag](https://cloud.google.com/bigquery/docs/best-practices-policy-tags) | <ul><li>Assigned Policy tags are created under DataHub taxonomy.</li></ul><ul><li>Only the latest assigned glossary term set as policy tag. BigQuery only supports one assigned policy tag.</li></ul> <ul><li>Policy Tags are not synced to DataHub as glossary term from BigQuery.</li></ul>
| Column Glossary Terms as Policy Tags | [Glossary Term on Table Column](https://datahubproject.io/docs/glossary/business-glossary/) | [Policy Tag](https://cloud.google.com/bigquery/docs/best-practices-policy-tags) | <ul><li>Assigned Policy tags are created under DataHub taxonomy.</li></ul><ul><li>Only the latest assigned glossary term set as policy tag. BigQuery only supports one assigned policy tag.</li></ul> <ul><li>Policy Tags are not synced to DataHub as glossary term from BigQuery.</li></ul>
| Table Descriptions | [Table Description](https://datahubproject.io/docs/api/tutorials/descriptions/) | Table Description | - |
| Column Descriptions | [Column Description](https://datahubproject.io/docs/api/tutorials/descriptions/) | Column Description | - |

Expand Down
2 changes: 2 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Breaking Changes

- #12408: The `platform` field in the DataPlatformInstance GraphQL type is removed. Clients need to retrieve the platform via the optional `dataPlatformInstance` field.

### Potential Downtime

### Deprecations
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
source:
type: redshift
config:
# Coordinates
Expand Down
25 changes: 1 addition & 24 deletions metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import logging
import os
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional

if TYPE_CHECKING:
from mypy_boto3_s3.service_resource import ObjectSummary

from typing import Optional

S3_PREFIXES = ["s3://", "s3n://", "s3a://"]

Expand Down Expand Up @@ -73,21 +68,3 @@ def get_key_prefix(s3_uri: str) -> str:
f"Not an S3 URI. Must start with one of the following prefixes: {str(S3_PREFIXES)}"
)
return strip_s3_prefix(s3_uri).split("/", maxsplit=1)[1]


def group_s3_objects_by_dirname(
s3_objects: Iterable["ObjectSummary"],
) -> Dict[str, List["ObjectSummary"]]:
"""
Groups S3 objects by their directory name.
If a s3_object in the root directory (i.e., s3://bucket/file.txt), it is grouped under '/'.
"""
grouped_s3_objs = defaultdict(list)
for obj in s3_objects:
if "/" in obj.key:
dirname = obj.key.rsplit("/", 1)[0]
else:
dirname = "/"
grouped_s3_objs[dirname].append(obj)
return grouped_s3_objs
8 changes: 6 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/s3/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
get_bucket_name,
get_bucket_relative_path,
get_key_prefix,
group_s3_objects_by_dirname,
strip_s3_prefix,
)
from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
Expand Down Expand Up @@ -73,6 +72,7 @@
_Aspect,
)
from datahub.telemetry import stats, telemetry
from datahub.utilities.groupby import groupby_unsorted
from datahub.utilities.perf_timer import PerfTimer

if TYPE_CHECKING:
Expand Down Expand Up @@ -868,7 +868,11 @@ def get_folder_info(
"""
partitions: List[Folder] = []
s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
for key, group in group_s3_objects_by_dirname(s3_objects).items():
grouped_s3_objects_by_dirname = groupby_unsorted(
s3_objects,
key=lambda obj: obj.key.rsplit("/", 1)[0],
)
for key, group in grouped_s3_objects_by_dirname:
file_size = 0
creation_time = None
modification_time = None
Expand Down
33 changes: 23 additions & 10 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,23 +1147,36 @@ def fetch_projects():
)
# Set parent project name
for _project_id, project in all_project_map.items():
if (
project.parent_id is not None
and project.parent_id in all_project_map
):
if project.parent_id is None:
continue

if project.parent_id in all_project_map:
project.parent_name = all_project_map[project.parent_id].name
else:
self.report.warning(
title="Incomplete project hierarchy",
message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
context=f"Missing {project.parent_id}, referenced by {project.id} {project.project_name}",
)
project.parent_id = None

# Post-condition
assert all(
[
((project.parent_id is None) == (project.parent_name is None))
and (
project.parent_id is None
or project.parent_id in all_project_map
)
for project in all_project_map.values()
]
), "Parent project id and name should be consistent"

def set_project_path():
def form_path(project_id: str) -> List[str]:
cur_proj = all_project_map[project_id]
ancestors = [cur_proj.name]
while cur_proj.parent_id is not None:
if cur_proj.parent_id not in all_project_map:
self.report.warning(
"project-issue",
f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
)
break
cur_proj = all_project_map[cur_proj.parent_id]
ancestors = [cur_proj.name, *ancestors]
return ancestors
Expand Down
29 changes: 0 additions & 29 deletions metadata-ingestion/tests/unit/s3/test_s3_util.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,6 @@ def test_search_dpi(auth_session, ingest_cleanup_data):
name
}
}
platform {
urn
name
properties {
type
}
}
subTypes {
typeNames
}
Expand Down
Loading

0 comments on commit 3d643f2

Please sign in to comment.