- Reliably provides direction to community members and submitted 5 pull request, including improvements to Athena ingestion (support for nested schemas) and the REST emitter.
+ Submitted 16 pull requests including improvements on graphQL and search API.
- Reliably provided direction to Community Members across all support channels in Slack.
+ Submitted 2 pull requests and 1 issue while reliably providing direction to Community Members across all support channels in Slack.
- Drove DataHub's adaptation and implementation on Coursera.
+ Submitted 4 pull requests and reliably provided direction to Community Members across all support channels in Slack.
- Reliably provided direction to Community Members across all support channels in Slack and shared Zynga's experience adopting and implementing DataHub during the September 2023 Town Hall.
+ Reliably provides direction to Community Members across all support channels in Slack and shared Zynga's experience adopting and implementing DataHub during the September 2023 Town Hall.
- Drove DataHub's adaptation and implementation at Optum.
+ Reliably provides direction to community members and submitted 9 pull request, including improvements to Athena ingestion (support for nested schemas) and the REST emitter.
>
),
social: {
+ linkedin: "https://www.linkedin.com/in/tim-bossenmaier/",
+ github: "https://github.com/bossenti",
},
- location: "USA"
+ location: "Innsbruck, Austria"
},
{
name: "Raj Tekal",
diff --git a/docs/quick-ingestion-guides/snowflake/setup.md b/docs/quick-ingestion-guides/snowflake/setup.md
index af5f15492376b5..aaa9c67014814f 100644
--- a/docs/quick-ingestion-guides/snowflake/setup.md
+++ b/docs/quick-ingestion-guides/snowflake/setup.md
@@ -43,6 +43,8 @@ In order to configure ingestion from Snowflake, you'll first have to ensure you
grant select on future external tables in database identifier($db_var) to role datahub_role;
grant select on all views in database identifier($db_var) to role datahub_role;
grant select on future views in database identifier($db_var) to role datahub_role;
+ grant select on all dynamic tables in database identifier($db_var) to role datahub_role;
+ grant select on future dynamic tables in database identifier($db_var) to role datahub_role;
-- Grant access to view tables and views
grant references on all tables in database identifier($db_var) to role datahub_role;
@@ -51,6 +53,9 @@ In order to configure ingestion from Snowflake, you'll first have to ensure you
grant references on future external tables in database identifier($db_var) to role datahub_role;
grant references on all views in database identifier($db_var) to role datahub_role;
grant references on future views in database identifier($db_var) to role datahub_role;
+ -- Grant access to dynamic tables
+ grant monitor on all dynamic tables in database identifier($db_var) to role datahub_role;
+ grant monitor on future dynamic tables in database identifier($db_var) to role datahub_role;
-- Assign privileges to extract lineage and usage statistics from Snowflake by executing the below query.
grant imported privileges on database snowflake to role datahub_role;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
index e9ee7789550c6c..612f923d5d68b7 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java
@@ -20,6 +20,7 @@
import com.linkedin.structured.PrimitivePropertyValue;
import com.linkedin.structured.StructuredProperties;
import com.linkedin.structured.StructuredPropertyDefinition;
+import com.linkedin.structured.StructuredPropertySettings;
import com.linkedin.structured.StructuredPropertyValueAssignment;
import com.linkedin.structured.StructuredPropertyValueAssignmentArray;
import com.linkedin.util.Pair;
@@ -32,6 +33,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.UUID;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -45,6 +47,11 @@ private StructuredPropertyUtils() {}
static final Date MIN_DATE = Date.valueOf("1000-01-01");
static final Date MAX_DATE = Date.valueOf("9999-12-31");
+ public static final String INVALID_SETTINGS_MESSAGE =
+ "Cannot have property isHidden = true while other display location settings are also true.";
+ public static final String ONLY_ONE_BADGE =
+ "Cannot have more than one property set with show as badge. Property urns currently set: ";
+
public static LogicalValueType getLogicalValueType(
StructuredPropertyDefinition structuredPropertyDefinition) {
return getLogicalValueType(structuredPropertyDefinition.getValueType());
@@ -355,4 +362,48 @@ private static Pair filterValue
true);
}
}
+
+ /*
+ * We accept both ID and qualifiedName as inputs when creating a structured property. However,
+ * these two fields should ALWAYS be the same. If they don't provide either, use a UUID for both.
+ * If they provide both, ensure they are the same otherwise throw. Otherwise, use what is provided.
+ */
+ public static String getPropertyId(
+ @Nullable final String inputId, @Nullable final String inputQualifiedName) {
+ if (inputId != null && inputQualifiedName != null && !inputId.equals(inputQualifiedName)) {
+ throw new IllegalArgumentException(
+ "Qualified name and the ID of a structured property must match");
+ }
+
+ String id = UUID.randomUUID().toString();
+
+ if (inputQualifiedName != null) {
+ id = inputQualifiedName;
+ } else if (inputId != null) {
+ id = inputId;
+ }
+
+ return id;
+ }
+
+ /*
+ * Ensure that a structured property settings aspect is valid by ensuring that if isHidden is true,
+ * the other fields concerning display locations are false;
+ */
+ public static boolean validatePropertySettings(
+ StructuredPropertySettings settings, boolean shouldThrow) {
+ if (settings.isIsHidden()) {
+ if (settings.isShowInSearchFilters()
+ || settings.isShowInAssetSummary()
+ || settings.isShowAsAssetBadge()
+ || settings.isShowInColumnsTable()) {
+ if (shouldThrow) {
+ throw new IllegalArgumentException(INVALID_SETTINGS_MESSAGE);
+ } else {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
}
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index ab402a65a2a7f6..ff6a79108600a3 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -365,6 +365,8 @@ public class Constants {
// Structured Property
public static final String STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME = "propertyDefinition";
public static final String STRUCTURED_PROPERTY_KEY_ASPECT_NAME = "structuredPropertyKey";
+ public static final String STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME =
+ "structuredPropertySettings";
// Form
public static final String FORM_INFO_ASPECT_NAME = "formInfo";
diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
index 75bd579417a48f..4cfbc470e8c239 100644
--- a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
+++ b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
@@ -23,12 +23,16 @@ grant references on all external tables in database "" to role da
grant references on future external tables in database "" to role datahub_role;
grant references on all views in database "" to role datahub_role;
grant references on future views in database "" to role datahub_role;
+grant monitor on all dynamic tables in database "" to role datahub_role;
+grant monitor on future dynamic tables in database "" to role datahub_role;
// If you ARE using Snowflake Profiling or Classification feature: Grant select privileges to your tables
grant select on all tables in database "" to role datahub_role;
grant select on future tables in database "" to role datahub_role;
grant select on all external tables in database "" to role datahub_role;
grant select on future external tables in database "" to role datahub_role;
+grant select on all dynamic tables in database "" to role datahub_role;
+grant select on future dynamic tables in database "" to role datahub_role;
// Create a new DataHub user and assign the DataHub role to it
create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = '';
diff --git a/metadata-ingestion/sink_docs/metadata-file.md b/metadata-ingestion/sink_docs/metadata-file.md
index 7cac8d55422438..49ca3c75397af4 100644
--- a/metadata-ingestion/sink_docs/metadata-file.md
+++ b/metadata-ingestion/sink_docs/metadata-file.md
@@ -25,7 +25,7 @@ source:
sink:
type: file
config:
- filename: ./path/to/mce/file.json
+ path: ./path/to/mce/file.json
```
## Config details
@@ -34,4 +34,4 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
| Field | Required | Default | Description |
| -------- | -------- | ------- | ------------------------- |
-| filename | ā | | Path to file to write to. |
+| path | ā | | Path to file to write to. |
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index 181c70adc640a6..013efbdf6a2f6b 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -118,11 +118,13 @@ def validate_entity_types(cls, v):
@property
def fqn(self) -> str:
assert self.urn is not None
- return (
- self.qualified_name
- or self.id
- or Urn.from_string(self.urn).get_entity_id()[0]
- )
+ id = Urn.create_from_string(self.urn).get_entity_id()[0]
+ if self.qualified_name is not None:
+ # ensure that qualified name and ID match
+ assert (
+ self.qualified_name == id
+ ), "ID in the urn and the qualified_name must match"
+ return id
@validator("urn", pre=True, always=True)
def urn_must_be_present(cls, v, values):
diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index a640f941b75276..1a75459a92c5cf 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -1,4 +1,5 @@
import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from datetime import datetime
from random import choices
@@ -345,6 +346,9 @@ def undo_by_filter(
default=False,
help="Only delete soft-deleted entities, for hard deletion",
)
+@click.option(
+ "--workers", type=int, default=1, help="Num of workers to use for deletion."
+)
@upgrade.check_upgrade
@telemetry.with_telemetry()
def by_filter(
@@ -362,6 +366,7 @@ def by_filter(
batch_size: int,
dry_run: bool,
only_soft_deleted: bool,
+ workers: int = 1,
) -> None:
"""Delete metadata from datahub using a single urn or a combination of filters."""
@@ -382,16 +387,19 @@ def by_filter(
# TODO: add some validation on entity_type
if not force and not soft and not dry_run:
+ message = (
+ "Hard deletion will permanently delete data from DataHub and can be slow. "
+ "We generally recommend using soft deletes instead. "
+ "Do you want to continue?"
+ )
if only_soft_deleted:
click.confirm(
- "This will permanently delete data from DataHub. Do you want to continue?",
+ message,
abort=True,
)
else:
click.confirm(
- "Hard deletion will permanently delete data from DataHub and can be slow. "
- "We generally recommend using soft deletes instead. "
- "Do you want to continue?",
+ message,
abort=True,
)
@@ -462,26 +470,64 @@ def by_filter(
abort=True,
)
- urns_iter = urns
- if not delete_by_urn and not dry_run:
- urns_iter = progressbar.progressbar(urns, redirect_stdout=True)
+ _delete_urns_parallel(
+ graph=graph,
+ urns=urns,
+ aspect_name=aspect,
+ soft=soft,
+ dry_run=dry_run,
+ delete_by_urn=delete_by_urn,
+ start_time=start_time,
+ end_time=end_time,
+ workers=workers,
+ )
+
- # Run the deletion.
+def _delete_urns_parallel(
+ graph: DataHubGraph,
+ urns: List[str],
+ delete_by_urn: bool,
+ start_time: Optional[datetime],
+ end_time: Optional[datetime],
+ aspect_name: Optional[str] = None,
+ soft: bool = True,
+ dry_run: bool = False,
+ workers: int = 1,
+) -> None:
deletion_result = DeletionResult()
- with PerfTimer() as timer:
- for urn in urns_iter:
- one_result = _delete_one_urn(
- graph=graph,
- urn=urn,
- aspect_name=aspect,
- soft=soft,
- dry_run=dry_run,
- start_time=start_time,
- end_time=end_time,
+
+ def process_urn(urn):
+ return _delete_one_urn(
+ graph=graph,
+ urn=urn,
+ aspect_name=aspect_name,
+ soft=soft,
+ dry_run=dry_run,
+ start_time=start_time,
+ end_time=end_time,
+ )
+
+ with PerfTimer() as timer, ThreadPoolExecutor(max_workers=workers) as executor:
+ future_to_urn = {executor.submit(process_urn, urn): urn for urn in urns}
+
+ completed_futures = as_completed(future_to_urn)
+ if not delete_by_urn and not dry_run:
+ futures_iter = progressbar.progressbar(
+ as_completed(future_to_urn),
+ max_value=len(future_to_urn),
+ redirect_stdout=True,
)
- deletion_result.merge(one_result)
+ else:
+ futures_iter = completed_futures
+
+ for future in futures_iter:
+ try:
+ one_result = future.result()
+ deletion_result.merge(one_result)
+ except Exception as e:
+ urn = future_to_urn[future]
+ click.secho(f"Error processing URN {urn}: {e}", fg="red")
- # Report out a summary of the deletion result.
click.echo(
deletion_result.format_message(
dry_run=dry_run, soft=soft, time_sec=timer.elapsed_seconds()
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 4fdf564162410c..7df007e087979c 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -258,7 +258,7 @@ def allow_all(cls) -> "AllowDenyPattern":
return AllowDenyPattern()
def allowed(self, string: str) -> bool:
- if self._denied(string):
+ if self.denied(string):
return False
return any(
@@ -266,7 +266,7 @@ def allowed(self, string: str) -> bool:
for allow_pattern in self.allow
)
- def _denied(self, string: str) -> bool:
+ def denied(self, string: str) -> bool:
for deny_pattern in self.deny:
if re.match(deny_pattern, string, self.regex_flags):
return True
@@ -290,7 +290,7 @@ def get_allowed_list(self) -> List[str]:
raise ValueError(
"allow list must be fully specified to get list of allowed strings"
)
- return [a for a in self.allow if not self._denied(a)]
+ return [a for a in self.allow if not self.denied(a)]
def __eq__(self, other): # type: ignore
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index 586b1c610dc756..c80da04e481a9f 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -492,11 +492,15 @@ def close(self) -> None:
def _infer_platform(self) -> Optional[str]:
config = self.get_config()
- return (
+ platform = (
getattr(config, "platform_name", None)
or getattr(self, "platform", None)
or getattr(config, "platform", None)
)
+ if platform is None and hasattr(self, "get_platform_id"):
+ platform = type(self).get_platform_id()
+
+ return platform
def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
config = self.get_config()
diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
index 5961a553a14943..28def68ccf3f55 100644
--- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py
@@ -148,10 +148,10 @@ def __init__(self, sink: Sink, report_recipe: bool, ctx: PipelineContext) -> Non
def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
assert ctx.pipeline_config
- if not self.report_recipe or not ctx.pipeline_config._raw_dict:
+ if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
return ""
else:
- return json.dumps(redact_raw_config(ctx.pipeline_config._raw_dict))
+ return json.dumps(redact_raw_config(ctx.pipeline_config.get_raw_dict()))
def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
self.sink.write_record_async(
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 667129ff83584a..ee1c1608cd48c6 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -221,7 +221,7 @@ def __init__(
dry_run: bool = False,
preview_mode: bool = False,
preview_workunits: int = 10,
- report_to: Optional[str] = None,
+ report_to: Optional[str] = "datahub",
no_progress: bool = False,
):
self.config = config
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 2b2f992249f1e8..7a4e7ec52a8e96 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -117,3 +117,9 @@ def from_dict(
config = cls.parse_obj(resolved_dict)
config._raw_dict = raw_dict
return config
+
+ def get_raw_dict(self) -> Dict:
+ result = self._raw_dict
+ if result is None:
+ result = self.dict()
+ return result
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
index 80be566cdcd468..103f4175a9ccff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
@@ -88,8 +88,7 @@ def column_name_in_sql_attribute(self) -> List[str]:
for upstream_field_match in re.finditer(r"\${TABLE}\.[\"]*([\.\w]+)", sql):
matched_field = upstream_field_match.group(1)
# Remove quotes from field names
- matched_field = matched_field.replace('"', "").replace("`", "").lower()
- column_names.append(matched_field)
+ column_names.append(matched_field.replace('"', "").replace("`", "").lower())
return column_names
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
index 8cec6f2607774e..971181e4300d69 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
@@ -25,11 +25,13 @@
LookMLSourceReport,
)
from datahub.ingestion.source.looker.urn_functions import get_qualified_table_name
+from datahub.sql_parsing.schema_resolver import match_columns_to_schema
from datahub.sql_parsing.sqlglot_lineage import (
ColumnLineageInfo,
ColumnRef,
SqlParsingResult,
Urn,
+ create_and_cache_schema_resolver,
create_lineage_sql_parsed_result,
)
@@ -200,7 +202,7 @@ def _generate_fully_qualified_name(
class AbstractViewUpstream(ABC):
"""
Implementation of this interface extracts the view upstream as per the way the view is bound to datasets.
- For detail explanation please refer lookml_concept_context.LookerViewContext documentation.
+ For detail explanation, please refer lookml_concept_context.LookerViewContext documentation.
"""
view_context: LookerViewContext
@@ -236,6 +238,47 @@ def get_upstream_dataset_urn(self) -> List[Urn]:
def create_fields(self) -> List[ViewField]:
return [] # it is for the special case
+ def create_upstream_column_refs(
+ self, upstream_urn: str, downstream_looker_columns: List[str]
+ ) -> List[ColumnRef]:
+ """
+ - **`upstream_urn`**: The URN of the upstream dataset.
+
+ - **`expected_columns`**: These are the columns identified by the Looker connector as belonging to the `upstream_urn` dataset. However, there is potential for human error in specifying the columns of the upstream dataset. For example, a user might declare a column in lowercase, while on the actual platform, it may exist in uppercase, or vice versa.
+
+ - This function ensures consistency in column-level lineage by consulting GMS before creating the final `ColumnRef` instance, avoiding discrepancies.
+ """
+ schema_resolver = create_and_cache_schema_resolver(
+ platform=self.view_context.view_connection.platform,
+ platform_instance=self.view_context.view_connection.platform_instance,
+ env=self.view_context.view_connection.platform_env or self.config.env,
+ graph=self.ctx.graph,
+ )
+
+ urn, schema_info = schema_resolver.resolve_urn(urn=upstream_urn)
+
+ if schema_info:
+ actual_columns = match_columns_to_schema(
+ schema_info, downstream_looker_columns
+ )
+ else:
+ logger.info(
+ f"schema_info not found for dataset {urn} in GMS. Using expected_columns to form ColumnRef"
+ )
+ actual_columns = [column.lower() for column in downstream_looker_columns]
+
+ upstream_column_refs: List[ColumnRef] = []
+
+ for column in actual_columns:
+ upstream_column_refs.append(
+ ColumnRef(
+ column=column,
+ table=upstream_urn,
+ )
+ )
+
+ return upstream_column_refs
+
class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC):
"""
@@ -372,15 +415,12 @@ def get_upstream_column_ref(
# in-case of "select * from look_ml_view.SQL_TABLE_NAME" or extra field are defined in the looker view which is
# referring to upstream table
if self._get_upstream_dataset_urn() and not upstreams_column_refs:
- upstreams_column_refs = [
- ColumnRef(
- table=self._get_upstream_dataset_urn()[
- 0
- ], # 0th index has table of from clause
- column=column,
- )
- for column in field_context.column_name_in_sql_attribute()
- ]
+ upstreams_column_refs = self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn()[
+ 0
+ ], # 0th index has table of from clause,
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
# fix any derived view reference present in urn
upstreams_column_refs = resolve_derived_view_urn_of_col_ref(
@@ -487,18 +527,18 @@ def get_upstream_column_ref(
return upstream_column_refs
explore_urn: str = self._get_upstream_dataset_urn()[0]
+ expected_columns: List[str] = []
for column in field_context.column_name_in_sql_attribute():
if column in self._get_explore_column_mapping():
explore_column: Dict = self._get_explore_column_mapping()[column]
- upstream_column_refs.append(
- ColumnRef(
- column=explore_column.get("field", explore_column[NAME]),
- table=explore_urn,
- )
+ expected_columns.append(
+ explore_column.get("field", explore_column[NAME])
)
- return upstream_column_refs
+ return self.create_upstream_column_refs(
+ upstream_urn=explore_urn, downstream_looker_columns=expected_columns
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()
@@ -548,14 +588,10 @@ def __get_upstream_dataset_urn(self) -> Urn:
def get_upstream_column_ref(
self, field_context: LookerFieldContext
) -> List[ColumnRef]:
- upstream_column_ref: List[ColumnRef] = []
-
- for column_name in field_context.column_name_in_sql_attribute():
- upstream_column_ref.append(
- ColumnRef(table=self._get_upstream_dataset_urn(), column=column_name)
- )
-
- return upstream_column_ref
+ return self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn(),
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return [self._get_upstream_dataset_urn()]
@@ -609,15 +645,14 @@ def get_upstream_column_ref(
self, field_context: LookerFieldContext
) -> List[ColumnRef]:
upstream_column_ref: List[ColumnRef] = []
+
if not self._get_upstream_dataset_urn():
return upstream_column_ref
- for column_name in field_context.column_name_in_sql_attribute():
- upstream_column_ref.append(
- ColumnRef(table=self._get_upstream_dataset_urn()[0], column=column_name)
- )
-
- return upstream_column_ref
+ return self.create_upstream_column_refs(
+ upstream_urn=self._get_upstream_dataset_urn()[0],
+ downstream_looker_columns=field_context.column_name_in_sql_attribute(),
+ )
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py
index 79ec47a7efb2c2..26a0331e1e5767 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py
@@ -45,6 +45,9 @@ class Owners(ConfigModel):
groups: Optional[List[str]] = None
+OwnersMultipleTypes = Union[List[Owners], Owners]
+
+
class KnowledgeCard(ConfigModel):
url: Optional[str] = None
label: Optional[str] = None
@@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
term_source: Optional[str] = None
source_ref: Optional[str] = None
source_url: Optional[str] = None
- owners: Optional[Owners] = None
+ owners: Optional[OwnersMultipleTypes] = None
inherits: Optional[List[str]] = None
contains: Optional[List[str]] = None
values: Optional[List[str]] = None
@@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
id: Optional[str] = None
name: str
description: str
- owners: Optional[Owners] = None
+ owners: Optional[OwnersMultipleTypes] = None
terms: Optional[List["GlossaryTermConfig"]] = None
nodes: Optional[List["GlossaryNodeConfig"]] = None
knowledge_links: Optional[List[KnowledgeCard]] = None
@@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
"""Holds defaults for populating fields in glossary terms"""
source: Optional[str] = None
- owners: Owners
+ owners: OwnersMultipleTypes
url: Optional[str] = None
source_type: str = "INTERNAL"
@@ -153,30 +156,44 @@ def make_glossary_term_urn(
return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)
-def get_owners(owners: Owners) -> models.OwnershipClass:
- ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
+def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
+ """Allows owner types to be a list and maintains backward compatibility"""
+ if isinstance(owners, Owners):
+ return models.OwnershipClass(owners=list(get_owners(owners)))
+
+ owners_meta: List[models.OwnerClass] = []
+ for owner in owners:
+ owners_meta.extend(get_owners(owner))
+
+ return models.OwnershipClass(owners=owners_meta)
+
+
+def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
+ actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER
+
+ if actual_type.startswith("urn:li:ownershipType:"):
+ ownership_type: str = "CUSTOM"
+ ownership_type_urn: Optional[str] = actual_type
+ else:
+ ownership_type, ownership_type_urn = validate_ownership_type(actual_type)
+
if owners.typeUrn is not None:
ownership_type_urn = owners.typeUrn
- owners_meta: List[models.OwnerClass] = []
+
if owners.users is not None:
- owners_meta = owners_meta + [
- models.OwnerClass(
+ for o in owners.users:
+ yield models.OwnerClass(
owner=make_user_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
- for o in owners.users
- ]
if owners.groups is not None:
- owners_meta = owners_meta + [
- models.OwnerClass(
+ for o in owners.groups:
+ yield models.OwnerClass(
owner=make_group_urn(o),
type=ownership_type,
typeUrn=ownership_type_urn,
)
- for o in owners.groups
- ]
- return models.OwnershipClass(owners=owners_meta)
def get_mces(
@@ -185,7 +202,7 @@ def get_mces(
ingestion_config: BusinessGlossarySourceConfig,
ctx: PipelineContext,
) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
- root_owners = get_owners(glossary.owners)
+ root_owners = get_owners_multiple_types(glossary.owners)
if glossary.nodes:
for node in glossary.nodes:
@@ -270,7 +287,7 @@ def get_mces_from_node(
node_owners = parentOwners
if glossaryNode.owners is not None:
assert glossaryNode.owners is not None
- node_owners = get_owners(glossaryNode.owners)
+ node_owners = get_owners_multiple_types(glossaryNode.owners)
node_snapshot = models.GlossaryNodeSnapshotClass(
urn=node_urn,
@@ -426,7 +443,7 @@ def get_mces_from_term(
ownership: models.OwnershipClass = parentOwnership
if glossaryTerm.owners is not None:
assert glossaryTerm.owners is not None
- ownership = get_owners(glossaryTerm.owners)
+ ownership = get_owners_multiple_types(glossaryTerm.owners)
aspects.append(ownership)
if glossaryTerm.domain is not None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
index bb5d0636f67123..99790de529ac3a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
@@ -129,7 +129,9 @@ def tables_for_database(db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
- auto_clustering_on AS "AUTO_CLUSTERING_ON"
+ auto_clustering_on AS "AUTO_CLUSTERING_ON",
+ is_dynamic AS "IS_DYNAMIC",
+ is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
WHERE table_schema != 'INFORMATION_SCHEMA'
and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
@@ -149,7 +151,9 @@ def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
- auto_clustering_on AS "AUTO_CLUSTERING_ON"
+ auto_clustering_on AS "AUTO_CLUSTERING_ON",
+ is_dynamic AS "IS_DYNAMIC",
+ is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
where table_schema='{schema_name}'
and table_type in ('BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
index b5f56f99431f91..030b2d43be81f9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
@@ -113,6 +113,7 @@ class SnowflakeV2Report(
external_lineage_queries_secs: float = -1
num_tables_with_known_upstreams: int = 0
num_upstream_lineage_edge_parsing_failed: int = 0
+ num_secure_views_missing_definition: int = 0
data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index 600292c2c99429..5a69b4bb779d72 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -90,6 +90,12 @@ class SnowflakeTable(BaseTable):
foreign_keys: List[SnowflakeFK] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
+ is_dynamic: bool = False
+ is_iceberg: bool = False
+
+ @property
+ def is_hybrid(self) -> bool:
+ return self.type is not None and self.type == "HYBRID TABLE"
@dataclass
@@ -98,6 +104,7 @@ class SnowflakeView(BaseView):
columns: List[SnowflakeColumn] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
+ is_secure: bool = False
@dataclass
@@ -289,6 +296,8 @@ def get_tables_for_database(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
+ is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
+ is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
@@ -313,6 +322,8 @@ def get_tables_for_schema(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
+ is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
+ is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
@@ -356,6 +367,7 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]
materialized=(
view.get("is_materialized", "false").lower() == "true"
),
+ is_secure=(view.get("is_secure", "false").lower() == "true"),
)
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 2bd8e8017f5492..4ceeb8560c1758 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -431,6 +431,8 @@ def _process_schema(
default_db=db_name,
default_schema=schema_name,
)
+ elif view.is_secure:
+ self.report.num_secure_views_missing_definition += 1
if self.config.include_technical_schema:
for view in views:
@@ -749,8 +751,21 @@ def get_dataset_properties(
) -> DatasetProperties:
custom_properties = {}
- if isinstance(table, SnowflakeTable) and table.clustering_key:
- custom_properties["CLUSTERING_KEY"] = table.clustering_key
+ if isinstance(table, SnowflakeTable):
+ if table.clustering_key:
+ custom_properties["CLUSTERING_KEY"] = table.clustering_key
+
+ if table.is_hybrid:
+ custom_properties["IS_HYBRID"] = "true"
+
+ if table.is_dynamic:
+ custom_properties["IS_DYNAMIC"] = "true"
+
+ if table.is_iceberg:
+ custom_properties["IS_ICEBERG"] = "true"
+
+ if isinstance(table, SnowflakeView) and table.is_secure:
+ custom_properties["IS_SECURE"] = "true"
return DatasetProperties(
name=table.name,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index 5e79530d2391b8..d8c3075bd921b9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -1,6 +1,6 @@
import abc
from functools import cached_property
-from typing import ClassVar, Literal, Optional, Tuple
+from typing import ClassVar, List, Literal, Optional, Tuple
from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
@@ -184,6 +184,46 @@ def _is_sys_table(table_name: str) -> bool:
return table_name.lower().startswith("sys$")
+def _split_qualified_name(qualified_name: str) -> List[str]:
+ """
+ Split a qualified name into its constituent parts.
+
+ >>> _split_qualified_name("db.my_schema.my_table")
+ ['db', 'my_schema', 'my_table']
+ >>> _split_qualified_name('"db"."my_schema"."my_table"')
+ ['db', 'my_schema', 'my_table']
+ >>> _split_qualified_name('TEST_DB.TEST_SCHEMA."TABLE.WITH.DOTS"')
+ ['TEST_DB', 'TEST_SCHEMA', 'TABLE.WITH.DOTS']
+ >>> _split_qualified_name('TEST_DB."SCHEMA.WITH.DOTS".MY_TABLE')
+ ['TEST_DB', 'SCHEMA.WITH.DOTS', 'MY_TABLE']
+ """
+
+ # Fast path - no quotes.
+ if '"' not in qualified_name:
+ return qualified_name.split(".")
+
+ # First pass - split on dots that are not inside quotes.
+ in_quote = False
+ parts: List[List[str]] = [[]]
+ for char in qualified_name:
+ if char == '"':
+ in_quote = not in_quote
+ elif char == "." and not in_quote:
+ parts.append([])
+ else:
+ parts[-1].append(char)
+
+ # Second pass - remove outer pairs of quotes.
+ result = []
+ for part in parts:
+ if len(part) > 2 and part[0] == '"' and part[-1] == '"':
+ part = part[1:-1]
+
+ result.append("".join(part))
+
+ return result
+
+
# Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers,
# For example "test-database"."test-schema".test_table
# whereas we generate urns without quotes even for quoted identifiers for backward compatibility
@@ -192,7 +232,7 @@ def _is_sys_table(table_name: str) -> bool:
def _cleanup_qualified_name(
qualified_name: str, structured_reporter: SourceReport
) -> str:
- name_parts = qualified_name.split(".")
+ name_parts = _split_qualified_name(qualified_name)
if len(name_parts) != 3:
if not _is_sys_table(qualified_name):
structured_reporter.info(
@@ -203,9 +243,9 @@ def _cleanup_qualified_name(
)
return qualified_name.replace('"', "")
return _combine_identifier_parts(
- db_name=name_parts[0].strip('"'),
- schema_name=name_parts[1].strip('"'),
- table_name=name_parts[2].strip('"'),
+ db_name=name_parts[0],
+ schema_name=name_parts[1],
+ table_name=name_parts[2],
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
index a2e078f233f1d4..8630a959d3f6a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
@@ -69,7 +69,7 @@ def _init_job_id(self) -> JobId:
platform: Optional[str] = None
source_class = type(self.source)
if hasattr(source_class, "get_platform_name"):
- platform = source_class.get_platform_name() # type: ignore
+ platform = source_class.get_platform_name()
# Default name for everything else
job_name_suffix = self.get_job_name_suffix()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 68c38d4d064612..6844b8a425a7b6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -353,7 +353,7 @@ class TableauConfig(
project_path_separator: str = Field(
default="/",
- description="The separator used for the project_pattern field between project names. By default, we use a slash. "
+ description="The separator used for the project_path_pattern field between project names. By default, we use a slash. "
"You can change this if your Tableau projects contain slashes in their names, and you'd like to filter by project.",
)
@@ -959,19 +959,36 @@ def _is_allowed_project(self, project: TableauProject) -> bool:
return is_allowed
def _is_denied_project(self, project: TableauProject) -> bool:
- # Either project name or project path should exist in deny
- for deny_pattern in self.config.project_pattern.deny:
- # Either name or project path is denied
- if re.match(
- deny_pattern, project.name, self.config.project_pattern.regex_flags
- ) or re.match(
- deny_pattern,
- self._get_project_path(project),
- self.config.project_pattern.regex_flags,
- ):
- return True
- logger.info(f"project({project.name}) is not denied as per project_pattern")
- return False
+ """
+ Why use an explicit denial check instead of the `AllowDenyPattern.allowed` method?
+
+ Consider a scenario where a Tableau site contains four projects: A, B, C, and D, with the following hierarchical relationship:
+
+ - **A**
+ - **B** (Child of A)
+ - **C** (Child of A)
+ - **D**
+
+ In this setup:
+
+ - `project_pattern` is configured with `allow: ["A"]` and `deny: ["B"]`.
+ - `extract_project_hierarchy` is set to `True`.
+
+ The goal is to extract assets from project A and its children while explicitly denying the child project B.
+
+ If we rely solely on the `project_pattern.allowed()` method, project C's assets will not be ingested.
+ This happens because project C is not explicitly included in the `allow` list, nor is it part of the `deny` list.
+ However, since `extract_project_hierarchy` is enabled, project C should ideally be included in the ingestion process unless explicitly denied.
+
+ To address this, the function explicitly checks the deny regex to ensure that project Cās assets are ingested if it is not specifically denied in the deny list. This approach ensures that the hierarchy is respected while adhering to the configured allow/deny rules.
+ """
+
+ # Either project_pattern or project_path_pattern is set in a recipe
+ # TableauConfig.projects_backward_compatibility ensures that at least one of these properties is configured.
+
+ return self.config.project_pattern.denied(
+ project.name
+ ) or self.config.project_path_pattern.denied(self._get_project_path(project))
def _init_tableau_project_registry(self, all_project_map: dict) -> None:
list_of_skip_projects: List[TableauProject] = []
@@ -999,9 +1016,11 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None:
for project in list_of_skip_projects:
if (
project.parent_id in projects_to_ingest
- and self._is_denied_project(project) is False
+ and not self._is_denied_project(project)
):
- logger.debug(f"Project {project.name} is added in project registry")
+ logger.debug(
+ f"Project {project.name} is added in project registry as it's a child project and not explicitly denied in `deny` list"
+ )
projects_to_ingest[project.id] = project
# We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
index ac917c5f128ed2..c5d14e0afe15a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py
@@ -979,7 +979,6 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
len(query_filter.keys()) == 1
and query_filter.get(c.ID_WITH_IN)
and isinstance(query_filter[c.ID_WITH_IN], list)
- and len(query_filter[c.ID_WITH_IN]) > 100 * page_size
):
ids = query_filter[c.ID_WITH_IN]
filter_pages = [
diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
index e3f2fbc786b437..6aa10381a883ef 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
@@ -123,6 +123,13 @@ def get_urn_for_table(
)
return urn
+ def resolve_urn(self, urn: str) -> Tuple[str, Optional[SchemaInfo]]:
+ schema_info = self._resolve_schema_info(urn)
+ if schema_info:
+ return urn, schema_info
+
+ return urn, None
+
def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
urn = self.get_urn_for_table(table)
@@ -293,3 +300,19 @@ def _convert_schema_field_list_to_info(
def _convert_schema_aspect_to_info(schema_metadata: SchemaMetadataClass) -> SchemaInfo:
return _convert_schema_field_list_to_info(schema_metadata.fields)
+
+
+def match_columns_to_schema(
+ schema_info: SchemaInfo, input_columns: List[str]
+) -> List[str]:
+ column_from_gms: List[str] = list(schema_info.keys()) # list() to silent lint
+
+ gms_column_map: Dict[str, str] = {
+ column.lower(): column for column in column_from_gms
+ }
+
+ output_columns: List[str] = [
+ gms_column_map.get(column.lower(), column) for column in input_columns
+ ]
+
+ return output_columns
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
index 4ff68574bf20e6..f387618bfaec12 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
@@ -1181,6 +1181,45 @@ def sqlglot_lineage(
)
+@functools.lru_cache(maxsize=128)
+def create_and_cache_schema_resolver(
+ platform: str,
+ env: str,
+ graph: Optional[DataHubGraph] = None,
+ platform_instance: Optional[str] = None,
+ schema_aware: bool = True,
+) -> SchemaResolver:
+ return create_schema_resolver(
+ platform=platform,
+ env=env,
+ graph=graph,
+ platform_instance=platform_instance,
+ schema_aware=schema_aware,
+ )
+
+
+def create_schema_resolver(
+ platform: str,
+ env: str,
+ graph: Optional[DataHubGraph] = None,
+ platform_instance: Optional[str] = None,
+ schema_aware: bool = True,
+) -> SchemaResolver:
+ if graph and schema_aware:
+ return graph._make_schema_resolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ )
+
+ return SchemaResolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ graph=None,
+ )
+
+
def create_lineage_sql_parsed_result(
query: str,
default_db: Optional[str],
@@ -1191,21 +1230,17 @@ def create_lineage_sql_parsed_result(
graph: Optional[DataHubGraph] = None,
schema_aware: bool = True,
) -> SqlParsingResult:
+ schema_resolver = create_schema_resolver(
+ platform=platform,
+ platform_instance=platform_instance,
+ env=env,
+ schema_aware=schema_aware,
+ graph=graph,
+ )
+
+ needs_close: bool = True
if graph and schema_aware:
needs_close = False
- schema_resolver = graph._make_schema_resolver(
- platform=platform,
- platform_instance=platform_instance,
- env=env,
- )
- else:
- needs_close = True
- schema_resolver = SchemaResolver(
- platform=platform,
- platform_instance=platform_instance,
- env=env,
- graph=None,
- )
try:
return sqlglot_lineage(
diff --git a/metadata-ingestion/src/datahub/testing/doctest.py b/metadata-ingestion/src/datahub/testing/doctest.py
new file mode 100644
index 00000000000000..b89df5c65c7e1b
--- /dev/null
+++ b/metadata-ingestion/src/datahub/testing/doctest.py
@@ -0,0 +1,12 @@
+import doctest
+from types import ModuleType
+
+
+def assert_doctest(module: ModuleType) -> None:
+ result = doctest.testmod(
+ module,
+ raise_on_error=True,
+ verbose=True,
+ )
+ if result.attempted == 0:
+ raise ValueError(f"No doctests found in {module.__name__}")
diff --git a/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml
new file mode 100644
index 00000000000000..94aae6999a3f50
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns.yml
@@ -0,0 +1,38 @@
+version: "1"
+source: DataHub
+owners:
+ users:
+ - mjames
+url: "https://github.com/datahub-project/datahub/"
+
+nodes:
+ - name: Custom URN Types
+ description: Testing custom ownership URN types
+ owners:
+ - type: urn:li:ownershipType:custom_type_1
+ users:
+ - user1
+ groups:
+ - group1
+ - type: urn:li:ownershipType:custom_type_2
+ users:
+ - user2
+ terms:
+ - name: Mixed URN Types
+ description: Term with custom URN types
+ owners:
+ - type: urn:li:ownershipType:custom_type_3
+ users:
+ - user3
+ - type: urn:li:ownershipType:custom_type_4
+ groups:
+ - group2
+ - name: Mixed Standard and URN
+ description: Term with both standard and URN types
+ owners:
+ - type: DEVELOPER
+ users:
+ - dev1
+ - type: urn:li:ownershipType:custom_type_5
+ groups:
+ - group3
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json
new file mode 100644
index 00000000000000..2fc3de77efd8eb
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/custom_ownership_urns_golden.json
@@ -0,0 +1,188 @@
+[
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
+ "urn": "urn:li:glossaryNode:Custom URN Types",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
+ "customProperties": {},
+ "definition": "Testing custom ownership URN types",
+ "name": "Custom URN Types"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:user1",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_1"
+ },
+ {
+ "owner": "urn:li:corpGroup:group1",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_1"
+ },
+ {
+ "owner": "urn:li:corpuser:user2",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_2"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Mixed URN Types",
+ "definition": "Term with custom URN types",
+ "parentNode": "urn:li:glossaryNode:Custom URN Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:user3",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_3"
+ },
+ {
+ "owner": "urn:li:corpGroup:group2",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_4"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Mixed Standard and URN",
+ "definition": "Term with both standard and URN types",
+ "parentNode": "urn:li:glossaryNode:Custom URN Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev1",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:group3",
+ "type": "CUSTOM",
+ "typeUrn": "urn:li:ownershipType:custom_type_5"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryNode",
+ "entityUrn": "urn:li:glossaryNode:Custom URN Types",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml
new file mode 100644
index 00000000000000..efcc594f758fa8
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types.yml
@@ -0,0 +1,39 @@
+version: "1"
+source: DataHub
+owners:
+ users:
+ - mjames
+url: "https://github.com/datahub-project/datahub/"
+
+nodes:
+ - name: Different Owner Types
+ description: Testing multiple owners with different types
+ owners:
+ - type: DEVELOPER
+ users:
+ - dev1
+ groups:
+ - engineering
+ - type: DATAOWNER
+ users:
+ - owner1
+ groups:
+ - data_stewards
+ - type: PRODUCER
+ users:
+ - producer1
+ terms:
+ - name: Mixed Ownership
+ description: Term with different owner types
+ owners:
+ - type: STAKEHOLDER
+ users:
+ - stakeholder1
+ groups:
+ - business
+ - type: DEVELOPER
+ users:
+ - dev2
+ - type: DATAOWNER
+ groups:
+ - compliance
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json
new file mode 100644
index 00000000000000..4cec348708291a
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_different_types_golden.json
@@ -0,0 +1,138 @@
+[
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
+ "urn": "urn:li:glossaryNode:Different Owner Types",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
+ "customProperties": {},
+ "definition": "Testing multiple owners with different types",
+ "name": "Different Owner Types"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev1",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:engineering",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpuser:owner1",
+ "type": "DATAOWNER"
+ },
+ {
+ "owner": "urn:li:corpGroup:data_stewards",
+ "type": "DATAOWNER"
+ },
+ {
+ "owner": "urn:li:corpuser:producer1",
+ "type": "PRODUCER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Different Owner Types.Mixed Ownership",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Mixed Ownership",
+ "definition": "Term with different owner types",
+ "parentNode": "urn:li:glossaryNode:Different Owner Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:stakeholder1",
+ "type": "STAKEHOLDER"
+ },
+ {
+ "owner": "urn:li:corpGroup:business",
+ "type": "STAKEHOLDER"
+ },
+ {
+ "owner": "urn:li:corpuser:dev2",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:compliance",
+ "type": "DATAOWNER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryNode",
+ "entityUrn": "urn:li:glossaryNode:Different Owner Types",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Different Owner Types.Mixed Ownership",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-2te9j9",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml
new file mode 100644
index 00000000000000..8fb093b8b58993
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type.yml
@@ -0,0 +1,37 @@
+version: "1"
+source: DataHub
+owners:
+ users:
+ - mjames
+url: "https://github.com/datahub-project/datahub/"
+
+nodes:
+ - name: Multiple Owners
+ description: Testing multiple owners with same type
+ owners:
+ - type: DEVELOPER
+ users:
+ - dev1
+ - dev2
+ groups:
+ - engineering
+ - type: DEVELOPER
+ users:
+ - dev3
+ groups:
+ - qa
+ terms:
+ - name: Multiple Dev Owners
+ description: Term owned by multiple developers
+ owners:
+ - type: DEVELOPER
+ users:
+ - dev4
+ - dev5
+ groups:
+ - platform
+ - type: DEVELOPER
+ users:
+ - dev6
+ groups:
+ - infra
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json
new file mode 100644
index 00000000000000..9342682510d84b
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/multiple_owners_same_type_golden.json
@@ -0,0 +1,142 @@
+[
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
+ "urn": "urn:li:glossaryNode:Multiple Owners",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
+ "customProperties": {},
+ "definition": "Testing multiple owners with same type",
+ "name": "Multiple Owners"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev1",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpuser:dev2",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:engineering",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpuser:dev3",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:qa",
+ "type": "DEVELOPER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Multiple Owners.Multiple Dev Owners",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Multiple Dev Owners",
+ "definition": "Term owned by multiple developers",
+ "parentNode": "urn:li:glossaryNode:Multiple Owners",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev4",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpuser:dev5",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:platform",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpuser:dev6",
+ "type": "DEVELOPER"
+ },
+ {
+ "owner": "urn:li:corpGroup:infra",
+ "type": "DEVELOPER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryNode",
+ "entityUrn": "urn:li:glossaryNode:Multiple Owners",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Multiple Owners.Multiple Dev Owners",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-0l66l7",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml b/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml
new file mode 100644
index 00000000000000..22fc24e6695bc2
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/single_owner_types.yml
@@ -0,0 +1,39 @@
+version: "1"
+source: DataHub
+url: "https://github.com/datahub-project/datahub/"
+owners:
+ users:
+ - mjames
+
+nodes:
+ - name: Single Owner Types
+ description: Testing different single owner types
+ owners:
+ type: DEVELOPER
+ users:
+ - dev1
+ terms:
+ - name: Developer Owned
+ description: Term owned by developer
+ owners:
+ type: DEVELOPER
+ users:
+ - dev2
+ - name: Data Owner Owned
+ description: Term owned by data owner
+ owners:
+ type: DATAOWNER
+ users:
+ - dataowner1
+ - name: Producer Owned
+ description: Term owned by producer
+ owners:
+ type: PRODUCER
+ users:
+ - producer1
+ - name: Stakeholder Owned
+ description: Term owned by stakeholder
+ owners:
+ type: STAKEHOLDER
+ groups:
+ - stakeholders
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json b/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json
new file mode 100644
index 00000000000000..006e77f523a10b
--- /dev/null
+++ b/metadata-ingestion/tests/integration/business-glossary/single_owner_types_golden.json
@@ -0,0 +1,278 @@
+[
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
+ "urn": "urn:li:glossaryNode:Single Owner Types",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
+ "customProperties": {},
+ "definition": "Testing different single owner types",
+ "name": "Single Owner Types"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev1",
+ "type": "DEVELOPER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Single Owner Types.Developer Owned",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Developer Owned",
+ "definition": "Term owned by developer",
+ "parentNode": "urn:li:glossaryNode:Single Owner Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dev2",
+ "type": "DEVELOPER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Single Owner Types.Data Owner Owned",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Data Owner Owned",
+ "definition": "Term owned by data owner",
+ "parentNode": "urn:li:glossaryNode:Single Owner Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:dataowner1",
+ "type": "DATAOWNER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Single Owner Types.Producer Owned",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Producer Owned",
+ "definition": "Term owned by producer",
+ "parentNode": "urn:li:glossaryNode:Single Owner Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:producer1",
+ "type": "PRODUCER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:Single Owner Types.Stakeholder Owned",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "customProperties": {},
+ "name": "Stakeholder Owned",
+ "definition": "Term owned by stakeholder",
+ "parentNode": "urn:li:glossaryNode:Single Owner Types",
+ "termSource": "INTERNAL",
+ "sourceRef": "DataHub",
+ "sourceUrl": "https://github.com/datahub-project/datahub/"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpGroup:stakeholders",
+ "type": "STAKEHOLDER"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryNode",
+ "entityUrn": "urn:li:glossaryNode:Single Owner Types",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Data Owner Owned",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Developer Owned",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Producer Owned",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "glossaryTerm",
+ "entityUrn": "urn:li:glossaryTerm:Single Owner Types.Stakeholder Owned",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "datahub-business-glossary-2020_04_14-07_00_00-ruwyic",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
index 73b90df65c04fe..74cf9aa3b528f2 100644
--- a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
+++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
@@ -71,6 +71,134 @@ def test_glossary_ingest(
)
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_single_owner_types(
+ mock_datahub_graph_instance,
+ pytestconfig,
+ tmp_path,
+ mock_time,
+):
+ """Test basic single owner cases with different ownership types"""
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
+ output_mces_path: str = f"{tmp_path}/single_owner_types.json"
+ golden_mces_path: str = f"{test_resources_dir}/single_owner_types_golden.json"
+
+ pipeline = Pipeline.create(
+ get_default_recipe(
+ glossary_yml_file_path=f"{test_resources_dir}/single_owner_types.yml",
+ event_output_file_path=output_mces_path,
+ enable_auto_id=False,
+ )
+ )
+ pipeline.ctx.graph = mock_datahub_graph_instance
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=output_mces_path,
+ golden_path=golden_mces_path,
+ )
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_multiple_owners_same_type(
+ mock_datahub_graph_instance,
+ pytestconfig,
+ tmp_path,
+ mock_time,
+):
+ """Test multiple owners all having the same type"""
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
+ output_mces_path: str = f"{tmp_path}/multiple_owners_same_type.json"
+ golden_mces_path: str = (
+ f"{test_resources_dir}/multiple_owners_same_type_golden.json"
+ )
+
+ pipeline = Pipeline.create(
+ get_default_recipe(
+ glossary_yml_file_path=f"{test_resources_dir}/multiple_owners_same_type.yml",
+ event_output_file_path=output_mces_path,
+ enable_auto_id=False,
+ )
+ )
+ pipeline.ctx.graph = mock_datahub_graph_instance
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=output_mces_path,
+ golden_path=golden_mces_path,
+ )
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_multiple_owners_different_types(
+ mock_datahub_graph_instance,
+ pytestconfig,
+ tmp_path,
+ mock_time,
+):
+ """Test multiple owners with different types"""
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
+ output_mces_path: str = f"{tmp_path}/multiple_owners_different_types.json"
+ golden_mces_path: str = (
+ f"{test_resources_dir}/multiple_owners_different_types_golden.json"
+ )
+
+ pipeline = Pipeline.create(
+ get_default_recipe(
+ glossary_yml_file_path=f"{test_resources_dir}/multiple_owners_different_types.yml",
+ event_output_file_path=output_mces_path,
+ enable_auto_id=False,
+ )
+ )
+ pipeline.ctx.graph = mock_datahub_graph_instance
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=output_mces_path,
+ golden_path=golden_mces_path,
+ )
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_custom_ownership_urns(
+ mock_datahub_graph_instance,
+ pytestconfig,
+ tmp_path,
+ mock_time,
+):
+ """Test custom ownership URNs"""
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
+ output_mces_path: str = f"{tmp_path}/custom_ownership_urns.json"
+ golden_mces_path: str = f"{test_resources_dir}/custom_ownership_urns_golden.json"
+
+ pipeline = Pipeline.create(
+ get_default_recipe(
+ glossary_yml_file_path=f"{test_resources_dir}/custom_ownership_urns.yml",
+ event_output_file_path=output_mces_path,
+ enable_auto_id=False,
+ )
+ )
+ pipeline.ctx.graph = mock_datahub_graph_instance
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=output_mces_path,
+ golden_path=golden_mces_path,
+ )
+
+
@freeze_time(FROZEN_TIME)
def test_auto_id_creation_on_reserved_char():
id_: str = business_glossary.create_id(["pii", "secure % password"], None, False)
diff --git a/metadata-ingestion/tests/integration/git/test_git_clone.py b/metadata-ingestion/tests/integration/git/test_git_clone.py
index 773e84cbf7488b..60cf20fefcbdd1 100644
--- a/metadata-ingestion/tests/integration/git/test_git_clone.py
+++ b/metadata-ingestion/tests/integration/git/test_git_clone.py
@@ -1,12 +1,13 @@
-import doctest
import os
import pytest
from pydantic import SecretStr
+import datahub.ingestion.source.git.git_import
from datahub.configuration.common import ConfigurationWarning
from datahub.configuration.git import GitInfo, GitReference
from datahub.ingestion.source.git.git_import import GitClone
+from datahub.testing.doctest import assert_doctest
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
@@ -82,15 +83,8 @@ def test_github_branch():
assert config.branch_for_clone == "main"
-def test_sanitize_repo_url():
- import datahub.ingestion.source.git.git_import
-
- assert (
- doctest.testmod(
- datahub.ingestion.source.git.git_import, raise_on_error=True
- ).attempted
- == 3
- )
+def test_sanitize_repo_url() -> None:
+ assert_doctest(datahub.ingestion.source.git.git_import)
def test_git_clone_public(tmp_path):
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml
new file mode 100644
index 00000000000000..95391f6a73e635
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/data.model.lkml
@@ -0,0 +1,6 @@
+connection: "my_connection"
+
+include: "top_10_employee_income_source.view.lkml"
+
+explore: top_10_employee_income_source {
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml
new file mode 100644
index 00000000000000..6037bab33c44f3
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution/top_10_employee_income_source.view.lkml
@@ -0,0 +1,18 @@
+view: top_10_employee_income_source {
+ sql_table_name: "db.public.employee"
+ ;;
+ dimension: id {
+ type: number
+ sql: ${TABLE}.id ;;
+ }
+
+ dimension: name {
+ type: string
+ sql: ${TABLE}.name ;;
+ }
+
+ dimension: source {
+ type: string
+ sql: ${TABLE}.source ;;
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json
new file mode 100644
index 00000000000000..9b0dd78ca1e8e0
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/gms_schema_resolution_golden.json
@@ -0,0 +1,358 @@
+[
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "containerProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "platform": "looker",
+ "env": "PROD",
+ "project_name": "lkml_samples"
+ },
+ "name": "lkml_samples",
+ "env": "PROD"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:looker"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "LookML Project"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Folders"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "view: top_10_employee_income_source {\n sql_table_name: \"db.public.employee\"\n ;;\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}",
+ "viewLanguage": "lookml"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.BrowsePaths": {
+ "paths": [
+ "/Develop/lkml_samples/"
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1586847600000,
+ "actor": "urn:li:corpuser:datahub"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),Id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),Name)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),name)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,public.employee,PROD),source)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),source)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "top_10_employee_income_source",
+ "platform": "urn:li:dataPlatform:looker",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.OtherSchema": {
+ "rawSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "id",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "number",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "name",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "source",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ }
+ ],
+ "primaryKeys": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "looker.file.path": "top_10_employee_income_source.view.lkml",
+ "looker.model": "data"
+ },
+ "name": "top_10_employee_income_source",
+ "tags": []
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Develop"
+ },
+ {
+ "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "tag",
+ "entityUrn": "urn:li:tag:Dimension",
+ "changeType": "UPSERT",
+ "aspectName": "tagKey",
+ "aspect": {
+ "json": {
+ "name": "Dimension"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py
index 4cd2777dc7dcad..940e7f36675f79 100644
--- a/metadata-ingestion/tests/integration/lookml/test_lookml.py
+++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py
@@ -1,8 +1,8 @@
import logging
import pathlib
-from typing import Any, List
+from typing import Any, List, Optional, Tuple
from unittest import mock
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
import pydantic
import pytest
@@ -25,6 +25,7 @@
MetadataChangeEventClass,
UpstreamLineageClass,
)
+from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
from tests.test_helpers import mce_helpers
from tests.test_helpers.state_helpers import get_current_checkpoint_from_pipeline
@@ -62,7 +63,7 @@ def get_default_recipe(output_file_path, base_folder_path):
@freeze_time(FROZEN_TIME)
def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
- """Test backwards compatibility with previous form of config with new flags turned off"""
+ """Test backwards compatibility with a previous form of config with new flags turned off"""
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "expected_output.json"
@@ -1013,3 +1014,40 @@ def test_drop_hive(pytestconfig, tmp_path, mock_time):
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)
+
+
+@freeze_time(FROZEN_TIME)
+def test_gms_schema_resolution(pytestconfig, tmp_path, mock_time):
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
+ mce_out_file = "drop_hive_dot.json"
+
+ new_recipe = get_default_recipe(
+ f"{tmp_path}/{mce_out_file}",
+ f"{test_resources_dir}/gms_schema_resolution",
+ )
+
+ new_recipe["source"]["config"]["connection_to_platform_map"] = {
+ "my_connection": "hive"
+ }
+
+ return_value: Tuple[str, Optional[SchemaInfo]] = (
+ "fake_dataset_urn",
+ {
+ "Id": "String",
+ "Name": "String",
+ "source": "String",
+ },
+ )
+
+ with patch.object(SchemaResolver, "resolve_urn", return_value=return_value):
+ pipeline = Pipeline.create(new_recipe)
+ pipeline.run()
+ pipeline.pretty_print_summary()
+ pipeline.raise_from_status(raise_warnings=True)
+
+ golden_path = test_resources_dir / "gms_schema_resolution_golden.json"
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=tmp_path / mce_out_file,
+ golden_path=golden_path,
+ )
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 1665b1401a636a..5b557efdab0bb0 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -60,7 +60,7 @@
"site": "acryl",
"projects": ["default", "Project 2", "Samples"],
"extract_project_hierarchy": False,
- "page_size": 10,
+ "page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
@@ -673,7 +673,7 @@ def test_tableau_ingest_with_platform_instance(
"site": "acryl",
"platform_instance": "acryl_site1",
"projects": ["default", "Project 2"],
- "page_size": 10,
+ "page_size": 1000,
"ingest_tags": True,
"ingest_owner": True,
"ingest_tables_external": True,
diff --git a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
index 138319feb3db67..c7a1fab068a838 100644
--- a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
+++ b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py
@@ -3,6 +3,7 @@
from botocore.stub import Stubber
from freezegun import freeze_time
+import datahub.ingestion.source.aws.sagemaker_processors.models
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.sink.file import write_metadata_file
from datahub.ingestion.source.aws.sagemaker import (
@@ -13,6 +14,7 @@
job_type_to_info,
job_types,
)
+from datahub.testing.doctest import assert_doctest
from tests.test_helpers import mce_helpers
from tests.unit.sagemaker.test_sagemaker_source_stubs import (
describe_endpoint_response_1,
@@ -243,16 +245,5 @@ def test_sagemaker_ingest(tmp_path, pytestconfig):
)
-def test_doc_test_run():
- import doctest
-
- import datahub.ingestion.source.aws.sagemaker_processors.models
-
- assert (
- doctest.testmod(
- datahub.ingestion.source.aws.sagemaker_processors.models,
- raise_on_error=True,
- verbose=True,
- ).attempted
- == 1
- )
+def test_doc_test_run() -> None:
+ assert_doctest(datahub.ingestion.source.aws.sagemaker_processors.models)
diff --git a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
index 3284baf103e5af..c735feb5396086 100644
--- a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py
@@ -4,6 +4,7 @@
import pytest
from pydantic import ValidationError
+import datahub.ingestion.source.snowflake.snowflake_utils
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pattern_utils import UUID_REGEX
from datahub.ingestion.api.source import SourceCapability
@@ -26,6 +27,7 @@
)
from datahub.ingestion.source.snowflake.snowflake_utils import SnowsightUrlBuilder
from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source
+from datahub.testing.doctest import assert_doctest
from tests.test_helpers import test_connection_helpers
default_oauth_dict: Dict[str, Any] = {
@@ -658,3 +660,7 @@ def test_create_snowsight_base_url_ap_northeast_1():
).snowsight_base_url
assert result == "https://app.snowflake.com/ap-northeast-1.aws/account_locator/"
+
+
+def test_snowflake_utils() -> None:
+ assert_doctest(datahub.ingestion.source.snowflake.snowflake_utils)
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py b/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
index e5fa980bec4522..67222531d3bc15 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
@@ -1,7 +1,12 @@
-from datahub.sql_parsing.schema_resolver import SchemaResolver, _TableName
+from datahub.sql_parsing.schema_resolver import (
+ SchemaInfo,
+ SchemaResolver,
+ _TableName,
+ match_columns_to_schema,
+)
-def test_basic_schema_resolver():
+def create_default_schema_resolver(urn: str) -> SchemaResolver:
schema_resolver = SchemaResolver(
platform="redshift",
env="PROD",
@@ -9,18 +14,51 @@ def test_basic_schema_resolver():
)
schema_resolver.add_raw_schema_info(
- urn="urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)",
+ urn=urn,
schema_info={"name": "STRING"},
)
+ return schema_resolver
+
+
+def test_basic_schema_resolver():
+ input_urn = (
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+ )
+
+ schema_resolver = create_default_schema_resolver(urn=input_urn)
+
urn, schema = schema_resolver.resolve_table(
_TableName(database="my_db", db_schema="public", table="test_table")
)
- assert (
- urn
- == "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+
+ assert urn == input_urn
+
+ assert schema
+
+ assert schema["name"]
+
+ assert schema_resolver.schema_count() == 1
+
+
+def test_resolve_urn():
+ input_urn: str = (
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.public.test_table,PROD)"
+ )
+
+ schema_resolver = create_default_schema_resolver(urn=input_urn)
+
+ schema_resolver.add_raw_schema_info(
+ urn=input_urn,
+ schema_info={"name": "STRING"},
)
+
+ urn, schema = schema_resolver.resolve_urn(urn=input_urn)
+
+ assert urn == input_urn
+
assert schema
+
assert schema["name"]
assert schema_resolver.schema_count() == 1
@@ -62,3 +100,13 @@ def test_get_urn_for_table_not_lower_should_keep_capital_letters():
== "urn:li:dataset:(urn:li:dataPlatform:mssql,Uppercased-Instance.Database.DataSet.Table,PROD)"
)
assert schema_resolver.schema_count() == 0
+
+
+def test_match_columns_to_schema():
+ schema_info: SchemaInfo = {"id": "string", "Name": "string", "Address": "string"}
+
+ output_columns = match_columns_to_schema(
+ schema_info, input_columns=["Id", "name", "address", "weight"]
+ )
+
+ assert output_columns == ["id", "Name", "Address", "weight"]
diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py
index f0d4c3408271f7..0a869297837014 100644
--- a/metadata-ingestion/tests/unit/test_dbt_source.py
+++ b/metadata-ingestion/tests/unit/test_dbt_source.py
@@ -1,4 +1,3 @@
-import doctest
from datetime import timedelta
from typing import Dict, List, Union
from unittest import mock
@@ -22,6 +21,7 @@
OwnershipSourceTypeClass,
OwnershipTypeClass,
)
+from datahub.testing.doctest import assert_doctest
def create_owners_list_from_urn_list(
@@ -442,7 +442,7 @@ def test_dbt_cloud_config_with_defined_metadata_endpoint():
def test_infer_metadata_endpoint() -> None:
- assert doctest.testmod(dbt_cloud, raise_on_error=True).attempted > 0
+ assert_doctest(dbt_cloud)
def test_dbt_time_parsing() -> None:
diff --git a/metadata-ingestion/tests/unit/utilities/test_utilities.py b/metadata-ingestion/tests/unit/utilities/test_utilities.py
index 91819bff41e629..c333ceb136fffc 100644
--- a/metadata-ingestion/tests/unit/utilities/test_utilities.py
+++ b/metadata-ingestion/tests/unit/utilities/test_utilities.py
@@ -1,9 +1,10 @@
-import doctest
import re
from typing import List
+import datahub.utilities.logging_manager
from datahub.sql_parsing.schema_resolver import SchemaResolver
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
+from datahub.testing.doctest import assert_doctest
from datahub.utilities.delayed_iter import delayed_iter
from datahub.utilities.is_pytest import is_pytest_running
from datahub.utilities.urns.dataset_urn import DatasetUrn
@@ -328,15 +329,8 @@ def test_sqllineage_sql_parser_tables_with_special_names():
assert sorted(SqlLineageSQLParser(sql_query).get_columns()) == expected_columns
-def test_logging_name_extraction():
- import datahub.utilities.logging_manager
-
- assert (
- doctest.testmod(
- datahub.utilities.logging_manager, raise_on_error=True
- ).attempted
- > 0
- )
+def test_logging_name_extraction() -> None:
+ assert_doctest(datahub.utilities.logging_manager)
def test_is_pytest_running() -> None:
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index 2535d091f6ce52..7dad21a6417fc4 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -13,6 +13,9 @@ import org.apache.tools.ant.filters.ReplaceTokens
jar {
+ if (project.hasProperty('archiveAppendix')) {
+ archiveAppendix.set(project.archiveAppendix)
+ }
archiveClassifier = "lib"
}
@@ -98,6 +101,9 @@ task checkShadowJar(type: Exec) {
shadowJar {
zip64 = true
+ if (project.hasProperty('archiveAppendix')) {
+ archiveAppendix.set(project.archiveAppendix)
+ }
archiveClassifier = ''
// preventing java multi-release JAR leakage
// https://github.com/johnrengelman/shadow/issues/729
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
index 60ca7649331a00..8b83439a3008c1 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
@@ -5,6 +5,7 @@
import static com.linkedin.metadata.search.utils.ESUtils.toParentField;
import static com.linkedin.metadata.utils.SearchUtil.*;
+import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.template.LongMap;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.config.search.SearchConfiguration;
@@ -22,10 +23,13 @@
import com.linkedin.util.Pair;
import io.datahubproject.metadata.context.OperationContext;
import io.opentelemetry.extension.annotations.WithSpan;
+import java.time.OffsetDateTime;
+import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -48,6 +52,7 @@
@Slf4j
public class AggregationQueryBuilder {
private static final String URN_FILTER = "urn";
+ private static final String STRUCTURED_PROPERTIES_PREFIX = "structuredProperties.";
private final SearchConfiguration configs;
private final Set defaultFacetFields;
private final Set allFacetFields;
@@ -80,12 +85,13 @@ public List getAggregations(@Nonnull OperationContext opCont
*/
public List getAggregations(
@Nonnull OperationContext opContext, @Nullable List facets) {
- final Set facetsToAggregate;
+ final Set facetsToAggregate = new HashSet<>();
+ if (Boolean.TRUE.equals(
+ opContext.getSearchContext().getSearchFlags().isIncludeDefaultFacets())) {
+ facetsToAggregate.addAll(defaultFacetFields);
+ }
if (facets != null) {
- facetsToAggregate =
- facets.stream().filter(this::isValidAggregate).collect(Collectors.toSet());
- } else {
- facetsToAggregate = defaultFacetFields;
+ facets.stream().filter(this::isValidAggregate).forEach(facetsToAggregate::add);
}
return facetsToAggregate.stream()
.map(f -> facetToAggregationBuilder(opContext, f))
@@ -247,7 +253,7 @@ List extractAggregationMetadata(
return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever);
}
- private void processTermAggregations(
+ public void processTermAggregations(
final Map.Entry entry,
final List aggregationMetadataList) {
final Map oneTermAggResult =
@@ -264,6 +270,7 @@ private void processTermAggregations(
.setFilterValues(
new FilterValueArray(
SearchUtil.convertToFilters(oneTermAggResult, Collections.emptySet())));
+ updateAggregationEntity(aggregationMetadata);
aggregationMetadataList.add(aggregationMetadata);
}
@@ -300,7 +307,15 @@ private static void recurseTermsAgg(
private static void processTermBucket(
Terms.Bucket bucket, Map aggResult, boolean includeZeroes) {
- String key = bucket.getKeyAsString();
+ final String key = bucket.getKeyAsString();
+ String finalKey = key;
+ try {
+ // if the value is a date string, convert to milliseconds since epoch
+ OffsetDateTime time = OffsetDateTime.parse(key);
+ finalKey = String.valueOf(time.toEpochSecond() * 1000);
+ } catch (DateTimeParseException e) {
+ // do nothing, this is expected if the value is not a date
+ }
// Gets filtered sub aggregation doc count if exist
Map subAggs = recursivelyAddNestedSubAggs(bucket.getAggregations());
subAggs.forEach(
@@ -309,7 +324,7 @@ private static void processTermBucket(
String.format("%s%s%s", key, AGGREGATION_SEPARATOR_CHAR, entryKey), entryValue));
long docCount = bucket.getDocCount();
if (includeZeroes || docCount > 0) {
- aggResult.put(key, docCount);
+ aggResult.put(finalKey, docCount);
}
}
@@ -474,11 +489,24 @@ private AggregationMetadata buildAggregationMetadata(
@Nonnull final String displayName,
@Nonnull final LongMap aggValues,
@Nonnull final FilterValueArray filterValues) {
- return new AggregationMetadata()
- .setName(facetField)
- .setDisplayName(displayName)
- .setAggregations(aggValues)
- .setFilterValues(filterValues);
+ AggregationMetadata aggregationMetadata =
+ new AggregationMetadata()
+ .setName(facetField)
+ .setDisplayName(displayName)
+ .setAggregations(aggValues)
+ .setFilterValues(filterValues);
+ updateAggregationEntity(aggregationMetadata);
+ return aggregationMetadata;
+ }
+
+ public void updateAggregationEntity(@Nonnull final AggregationMetadata aggregationMetadata) {
+ if (aggregationMetadata.getName().startsWith(STRUCTURED_PROPERTIES_PREFIX)) {
+ aggregationMetadata.setEntity(
+ UrnUtils.getUrn(
+ String.format(
+ "urn:li:structuredProperty:%s",
+ aggregationMetadata.getName().replaceFirst(STRUCTURED_PROPERTIES_PREFIX, ""))));
+ }
}
private List>> getFacetFieldDisplayNameFromAnnotation(
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index fd663de40e0050..476f0114817be1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -1124,9 +1124,10 @@ public void testFacets() {
@Test
public void testNestedAggregation() {
Set expectedFacets = Set.of("platform");
+ OperationContext context =
+ getOperationContext().withSearchFlags(flags -> flags.setIncludeDefaultFacets(false));
SearchResult testResult =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult.getMetadata().getAggregations().size(), 1);
expectedFacets.forEach(
facet -> {
@@ -1143,8 +1144,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", "typeNames", "_entityType", "entity");
SearchResult testResult2 =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult2.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
@@ -1191,8 +1191,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", "typeNames", "entity");
SearchResult testResult3 =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResult3.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
@@ -1222,8 +1221,7 @@ public void testNestedAggregation() {
String singleNestedFacet = String.format("_entityType%sowners", AGGREGATION_SEPARATOR_CHAR);
expectedFacets = Set.of(singleNestedFacet);
SearchResult testResultSingleNested =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResultSingleNested.getMetadata().getAggregations().size(), 1);
Map expectedNestedFacetCounts = new HashMap<>();
expectedNestedFacetCounts.put("datajobāurn:li:corpuser:datahub", 2L);
@@ -1245,8 +1243,7 @@ public void testNestedAggregation() {
expectedFacets = Set.of("platform", singleNestedFacet, "typeNames", "origin");
SearchResult testResultNested =
- facetAcrossEntities(
- getOperationContext(), getSearchService(), "cypress", List.copyOf(expectedFacets));
+ facetAcrossEntities(context, getSearchService(), "cypress", List.copyOf(expectedFacets));
assertEquals(testResultNested.getMetadata().getAggregations().size(), 4);
expectedFacets.forEach(
facet -> {
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index 3969223981ec3f..c68997e25bcff7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -3,9 +3,7 @@
import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME;
import static com.linkedin.metadata.utils.SearchUtil.*;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.anySet;
-import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -53,6 +51,7 @@ public class AggregationQueryBuilderTest {
private static AspectRetriever aspectRetriever;
private static AspectRetriever aspectRetrieverV1;
+ private static String DEFAULT_FILTER = "_index";
@BeforeClass
public void setup() throws RemoteInvocationException, URISyntaxException {
@@ -267,16 +266,17 @@ public void testGetSpecificAggregationsHasFields() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("test1", "test2", "hasTest1"));
- Assert.assertEquals(aggs.size(), 3);
+ Assert.assertEquals(aggs.size(), 4);
Set facets = aggs.stream().map(AggregationBuilder::getName).collect(Collectors.toSet());
- Assert.assertEquals(ImmutableSet.of("test1", "test2", "hasTest1"), facets);
+ Assert.assertEquals(ImmutableSet.of("test1", "test2", "hasTest1", "_entityType"), facets);
// Case 2: Ask for fields that should NOT exist.
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("hasTest2"));
- Assert.assertEquals(aggs.size(), 0);
+ Assert.assertEquals(
+ aggs.size(), 1); // default has one field already, hasTest2 will not be in there
}
@Test
@@ -292,7 +292,7 @@ public void testAggregateOverStructuredProperty() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.ab.fgh.ten"));
- Assert.assertEquals(aggs.size(), 1);
+ Assert.assertEquals(aggs.size(), 2);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
@@ -307,12 +307,16 @@ public void testAggregateOverStructuredProperty() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.ab.fgh.ten", "structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(
+ aggs.size(), 3); // has one default filter (_entityType) both get mapped to _index
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
- Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword"));
+ Set.of(
+ "structuredProperties.ab_fgh_ten.keyword",
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -328,16 +332,12 @@ public void testAggregateOverStructuredPropertyNamespaced() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
- Assert.assertEquals(aggs.size(), 1);
- AggregationBuilder aggBuilder = aggs.get(0);
- Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
- TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
- // Check that field name is sanitized to correct field name
+ Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
- agg.field(),
- "structuredProperties.under_scores_and_dots_make_a_mess.keyword",
- "Terms aggregate must be on a keyword or subfield keyword");
-
+ aggs.stream()
+ .map(aggr -> ((TermsAggregationBuilder) aggr).field())
+ .collect(Collectors.toSet()),
+ Set.of("structuredProperties.under_scores_and_dots_make_a_mess.keyword", DEFAULT_FILTER));
// Two structured properties
aggs =
builder.getAggregations(
@@ -345,14 +345,15 @@ public void testAggregateOverStructuredPropertyNamespaced() {
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(aggs.size(), 3);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
- "structuredProperties.hello.keyword"));
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -368,7 +369,7 @@ public void testAggregateOverStructuredPropertyV1() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.ab.fgh.ten"));
- Assert.assertEquals(aggs.size(), 1);
+ Assert.assertEquals(aggs.size(), 2);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
@@ -385,14 +386,16 @@ public void testAggregateOverStructuredPropertyV1() {
List.of(
"structuredProperties.ab.fgh.ten",
"structuredProperties._versioned.hello.00000000000001.string"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(
+ aggs.size(), 3); // has two one filter (_entityType) both get mapped to _index
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -408,15 +411,14 @@ public void testAggregateOverStructuredPropertyNamespacedV1() {
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
- Assert.assertEquals(aggs.size(), 1);
- AggregationBuilder aggBuilder = aggs.get(0);
- Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
- TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
- // Check that field name is sanitized to correct field name
+ Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
- agg.field(),
- "structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
- "Terms aggregation must be on a keyword field or subfield.");
+ aggs.stream()
+ .map(aggr -> ((TermsAggregationBuilder) aggr).field())
+ .collect(Collectors.toSet()),
+ Set.of(
+ "structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
+ DEFAULT_FILTER));
// Two structured properties
aggs =
@@ -425,14 +427,15 @@ public void testAggregateOverStructuredPropertyNamespacedV1() {
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties._versioned.hello.00000000000001.string"));
- Assert.assertEquals(aggs.size(), 2);
+ Assert.assertEquals(aggs.size(), 3);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -489,7 +492,7 @@ public void testAggregateOverFieldsAndStructProp() {
"hasTest1",
"structuredProperties.ab.fgh.ten",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 5);
+ Assert.assertEquals(aggs.size(), 6);
Set facets =
aggs.stream()
.map(aggB -> ((TermsAggregationBuilder) aggB).field())
@@ -501,7 +504,8 @@ public void testAggregateOverFieldsAndStructProp() {
"test2.keyword",
"hasTest1",
"structuredProperties.ab_fgh_ten.keyword",
- "structuredProperties.hello.keyword"));
+ "structuredProperties.hello.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -558,7 +562,8 @@ public void testAggregateOverFieldsAndStructPropV1() {
"hasTest1",
"structuredProperties.ab.fgh.ten",
"structuredProperties.hello"));
- Assert.assertEquals(aggs.size(), 5);
+ Assert.assertEquals(
+ aggs.size(), 6); // has one default filter (_entityType) both get mapped to _index
Set facets =
aggs.stream()
.map(aggB -> ((TermsAggregationBuilder) aggB).field())
@@ -570,7 +575,8 @@ public void testAggregateOverFieldsAndStructPropV1() {
"test2.keyword",
"hasTest1",
"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword",
- "structuredProperties._versioned.hello.00000000000001.string.keyword"));
+ "structuredProperties._versioned.hello.00000000000001.string.keyword",
+ DEFAULT_FILTER));
}
@Test
@@ -613,6 +619,39 @@ public void testMissingAggregation() {
MISSING_SPECIAL_TYPE + AGGREGATION_SPECIAL_TYPE_DELIMITER + "test")));
}
+ @Test
+ public void testUpdateAggregationEntityWithStructuredProp() {
+ final AggregationMetadata aggregationMetadata = new AggregationMetadata();
+ aggregationMetadata.setName("structuredProperties.test_me.one");
+
+ SearchConfiguration config = new SearchConfiguration();
+ config.setMaxTermBucketSize(25);
+
+ AggregationQueryBuilder builder =
+ new AggregationQueryBuilder(
+ config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
+
+ builder.updateAggregationEntity(aggregationMetadata);
+ Assert.assertEquals(
+ aggregationMetadata.getEntity(), UrnUtils.getUrn("urn:li:structuredProperty:test_me.one"));
+ }
+
+ @Test
+ public void testUpdateAggregationEntityWithRegularFilter() {
+ final AggregationMetadata aggregationMetadata = new AggregationMetadata();
+ aggregationMetadata.setName("domains");
+
+ SearchConfiguration config = new SearchConfiguration();
+ config.setMaxTermBucketSize(25);
+
+ AggregationQueryBuilder builder =
+ new AggregationQueryBuilder(
+ config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
+
+ builder.updateAggregationEntity(aggregationMetadata);
+ Assert.assertNull(aggregationMetadata.getEntity());
+ }
+
@Test
public void testAddFiltersToMetadataWithStructuredPropsNoResults() {
final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one");
@@ -638,7 +677,7 @@ public void testAddFiltersToMetadataWithStructuredPropsNoResults() {
// ensure we add the correct structured prop aggregation here
Assert.assertEquals(aggregationMetadataList.size(), 1);
- // Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
+ Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
@@ -651,6 +690,7 @@ public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() {
final AggregationMetadata aggregationMetadata = new AggregationMetadata();
aggregationMetadata.setName("structuredProperties.test_me.one");
+ aggregationMetadata.setEntity(propertyUrn);
FilterValue filterValue =
new FilterValue().setValue("test123").setFiltered(false).setFacetCount(1);
aggregationMetadata.setFilterValues(new FilterValueArray(filterValue));
@@ -679,6 +719,7 @@ public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() {
criterion, aggregationMetadataList, mockAspectRetriever);
Assert.assertEquals(aggregationMetadataList.size(), 1);
+ Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn);
Assert.assertEquals(
aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one");
Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 718a00d067ce5a..393ca3ca5d4a64 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -298,7 +298,8 @@ public void testAggregationsInSearch() {
String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR);
SearchRequest searchRequest =
requestHandler.getSearchRequest(
- operationContext.withSearchFlags(flags -> flags.setFulltext(true)),
+ operationContext.withSearchFlags(
+ flags -> flags.setFulltext(true).setIncludeDefaultFacets(false)),
"*",
null,
null,
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
index a3d2067ae5db25..a3a7a8cda58a8d 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl
@@ -58,4 +58,10 @@ record SearchFlags {
* invoke query rewrite chain for filters based on configured rewriters
*/
rewriteQuery: optional boolean = true
+
+ /**
+ * Include default facets when getting facets to aggregate on in search requests.
+ * By default we include these, but custom aggregation requests don't need them.
+ */
+ includeDefaultFacets: optional boolean = true
}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
index 0d3fa1e2a4ecf0..d3d25083ff4677 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/AggregationMetadata.pdl
@@ -13,6 +13,11 @@ record AggregationMetadata {
*/
displayName: optional string
+ /**
+ * Entity associated with this facet
+ */
+ entity: optional Urn
+
/**
* List of aggregations showing the number of documents falling into each bucket. e.g, for platform aggregation, the bucket can be hive, kafka, etc
*/
diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
index 3ddb2d2e571da3..416e2c5c11e228 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl
@@ -89,25 +89,25 @@ record StructuredPropertyDefinition {
version: optional string
/**
- * Created Audit stamp
- */
- @Searchable = {
- "/time": {
- "fieldName": "createdTime",
- "fieldType": "DATETIME"
- }
- }
- created: optional AuditStamp
+ * Created Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "createdTime",
+ "fieldType": "DATETIME"
+ }
+ }
+ created: optional AuditStamp
- /**
- * Created Audit stamp
- */
- @Searchable = {
- "/time": {
- "fieldName": "lastModified",
- "fieldType": "DATETIME"
- }
- }
- lastModified: optional AuditStamp
+ /**
+ * Last Modified Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "lastModified",
+ "fieldType": "DATETIME"
+ }
+ }
+ lastModified: optional AuditStamp
}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl
new file mode 100644
index 00000000000000..fadcdfa5204e14
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertySettings.pdl
@@ -0,0 +1,64 @@
+namespace com.linkedin.structured
+
+import com.linkedin.common.AuditStamp
+
+/**
+ * Settings specific to a structured property entity
+ */
+@Aspect = {
+ "name": "structuredPropertySettings"
+}
+record StructuredPropertySettings {
+ /**
+ * Whether or not this asset should be hidden in the main application
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ isHidden: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as a search filter
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInSearchFilters: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed in the asset sidebar
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInAssetSummary: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as an asset badge on other
+ * asset's headers
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showAsAssetBadge: boolean = false
+
+ /**
+ * Whether or not this asset should be displayed as a column in the schema field table
+ * in a Dataset's "Columns" tab.
+ */
+ @Searchable = {
+ "fieldType": "BOOLEAN"
+ }
+ showInColumnsTable: boolean = false
+
+ /**
+ * Last Modified Audit stamp
+ */
+ @Searchable = {
+ "/time": {
+ "fieldName": "lastModifiedSettings",
+ "fieldType": "DATETIME"
+ }
+ }
+ lastModified: optional AuditStamp
+}
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index ee1481f29f7e9f..1c3eb5b574e204 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -602,6 +602,7 @@ entities:
keyAspect: structuredPropertyKey
aspects:
- propertyDefinition
+ - structuredPropertySettings
- institutionalMemory
- status
- name: form
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
index 0c62bdc1963261..28abb26be1f524 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
@@ -24,4 +24,5 @@ public class FeatureFlags {
private boolean editableDatasetNameEnabled = false;
private boolean showSeparateSiblings = false;
private boolean alternateMCPValidation = false;
+ private boolean showManageStructuredProperties = false;
}
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index a2eaa6853c7f85..9348416606d0a9 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -461,6 +461,7 @@ featureFlags:
alternateMCPValidation: ${ALTERNATE_MCP_VALIDATION:false} # Enables alternate MCP validation flow
showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view
editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI
+ showManageStructuredProperties: ${SHOW_MANAGE_STRUCTURED_PROPERTIES:true} # If turned on, show the manage structured properties button on the govern dropdown
entityChangeEvents:
enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true}
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
index 061feafac1b9b9..92abc50abbc4df 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json
@@ -63,10 +63,10 @@
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
index e9e2778a479d32..827789130d8bbb 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
@@ -169,10 +169,10 @@
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
@@ -941,15 +941,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1068,15 +1071,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2570,7 +2576,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2977,7 +2983,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3151,15 +3157,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3181,15 +3190,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3362,11 +3374,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3392,11 +3406,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
index 959cb5381fd9b8..b549cef0af84b2 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
@@ -932,15 +932,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1059,15 +1062,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2783,7 +2789,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -3365,7 +3371,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3539,15 +3545,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3569,15 +3578,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3750,11 +3762,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3780,11 +3794,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
@@ -5204,9 +5220,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "isRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -5225,9 +5241,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "hasRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -6068,23 +6084,29 @@
"doc" : "invoke query rewrite chain for filters based on configured rewriters",
"default" : true,
"optional" : true
+ }, {
+ "name" : "includeDefaultFacets",
+ "type" : "boolean",
+ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.",
+ "default" : true,
+ "optional" : true
} ]
}, {
"type" : "enum",
"name" : "Condition",
"namespace" : "com.linkedin.metadata.query.filter",
"doc" : "The matching condition in a filter criterion",
- "symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
+ "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
- "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+ "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
"IS_NULL" : "Represent the relation: field is null, e.g. platform is null",
"LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
@@ -6314,6 +6336,11 @@
"type" : "string",
"doc" : "Name of the filter to be displayed in the UI",
"optional" : true
+ }, {
+ "name" : "entity",
+ "type" : "com.linkedin.common.Urn",
+ "doc" : "Entity associated with this facet",
+ "optional" : true
}, {
"name" : "aggregations",
"type" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
index 3e0cd46aba0c05..c8be9d063eaea9 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
@@ -674,15 +674,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -801,15 +804,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2294,7 +2300,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2701,7 +2707,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -2875,15 +2881,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -2905,15 +2914,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3086,11 +3098,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3116,11 +3130,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
index 7f651a10139e2e..8c7595c5e505d8 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
@@ -674,15 +674,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -801,15 +804,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2288,7 +2294,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -2695,7 +2701,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -2869,15 +2875,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -2899,15 +2908,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3080,11 +3092,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3110,11 +3124,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
index c3e04add825c94..75e5c9a559076b 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
@@ -932,15 +932,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "tagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "tagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "tagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -1059,15 +1062,18 @@
"Searchable" : {
"/actor" : {
"fieldName" : "termAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/source" : {
"fieldName" : "termAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/time" : {
"fieldName" : "termAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
}
}
} ]
@@ -2777,7 +2783,7 @@
"Searchable" : {
"fieldName" : "managerLdap",
"fieldType" : "URN",
- "queryByDefault" : true
+ "queryByDefault" : false
}
}, {
"name" : "departmentId",
@@ -3359,7 +3365,7 @@
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
- "boostScore" : 5.0,
+ "boostScore" : 1.0,
"fieldName" : "fieldPaths",
"fieldType" : "TEXT",
"queryByDefault" : "true"
@@ -3533,15 +3539,18 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "fieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "fieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "fieldTagAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/tags/*/tag" : {
"boostScore" : 0.5,
@@ -3563,15 +3572,18 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "fieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "fieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "fieldTermAttributionDates",
- "fieldType" : "DATETIME"
+ "fieldType" : "DATETIME",
+ "queryByDefault" : false
},
"/terms/*/urn" : {
"boostScore" : 0.5,
@@ -3744,11 +3756,13 @@
"Searchable" : {
"/tags/*/attribution/actor" : {
"fieldName" : "editedFieldTagAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/source" : {
"fieldName" : "editedFieldTagAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/tags/*/attribution/time" : {
"fieldName" : "editedFieldTagAttributionDates",
@@ -3774,11 +3788,13 @@
"Searchable" : {
"/terms/*/attribution/actor" : {
"fieldName" : "editedFieldTermAttributionActors",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/source" : {
"fieldName" : "editedFieldTermAttributionSources",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
},
"/terms/*/attribution/time" : {
"fieldName" : "editedFieldTermAttributionDates",
@@ -5198,9 +5214,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "isRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
@@ -5219,9 +5235,9 @@
},
"Searchable" : {
"/*" : {
- "boostScore" : 2.0,
"fieldName" : "hasRelatedTerms",
- "fieldType" : "URN"
+ "fieldType" : "URN",
+ "queryByDefault" : false
}
}
}, {
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
index dbe1f45fae29e5..43d5e61c47bacb 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
@@ -78,7 +78,15 @@ public enum DataHubUsageEventType {
EMBED_PROFILE_VIEW_EVENT("EmbedProfileViewEvent"),
EMBED_PROFILE_VIEW_IN_DATAHUB_EVENT("EmbedProfileViewInDataHubEvent"),
EMBED_LOOKUP_NOT_FOUND_EVENT("EmbedLookupNotFoundEvent"),
- CREATE_BUSINESS_ATTRIBUTE("CreateBusinessAttributeEvent");
+ CREATE_BUSINESS_ATTRIBUTE("CreateBusinessAttributeEvent"),
+ CREATE_STRUCTURED_PROPERTY_CLICK_EVENT("CreateStructuredPropertyClickEvent"),
+ CREATE_STRUCTURED_PROPERTY_EVENT("CreateStructuredPropertyEvent"),
+ EDIT_STRUCTURED_PROPERTY_EVENT("EditStructuredPropertyEvent"),
+ DELETE_STRUCTURED_PROPERTY_EVENT("DeleteStructuredPropertyEvent"),
+ VIEW_STRUCTURED_PROPERTY_EVENT("ViewStructuredPropertyEvent"),
+ APPLY_STRUCTURED_PROPERTY_EVENT("ApplyStructuredPropertyEvent"),
+ UPDATE_STRUCTURED_PROPERTY_ON_ASSET_EVENT("UpdateStructuredPropertyOnAssetEvent"),
+ REMOVE_STRUCTURED_PROPERTY_EVENT("RemoveStructuredPropertyEvent");
private final String type;
diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json
index e0f26b908c4991..e01b4f6f47eaaf 100644
--- a/metadata-service/war/src/main/resources/boot/policies.json
+++ b/metadata-service/war/src/main/resources/boot/policies.json
@@ -36,6 +36,7 @@
"CREATE_BUSINESS_ATTRIBUTE",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES",
"MANAGE_SYSTEM_OPERATIONS"
@@ -185,6 +186,7 @@
"CREATE_BUSINESS_ATTRIBUTE",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES"
],
@@ -274,6 +276,7 @@
"MANAGE_TAGS",
"MANAGE_BUSINESS_ATTRIBUTE",
"MANAGE_STRUCTURED_PROPERTIES",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
"MANAGE_DOCUMENTATION_FORMS",
"MANAGE_FEATURES"
],
@@ -427,7 +430,8 @@
"GET_TIMESERIES_ASPECT_PRIVILEGE",
"GET_ENTITY_PRIVILEGE",
"GET_TIMELINE_PRIVILEGE",
- "ES_EXPLAIN_QUERY_PRIVILEGE"
+ "ES_EXPLAIN_QUERY_PRIVILEGE",
+ "VIEW_STRUCTURED_PROPERTIES_PAGE"
],
"displayName": "Readers - Metadata Policy",
"description": "Readers can view all assets.",
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
index bc676e94ecd4fa..d701c8fc8be035 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java
@@ -169,6 +169,12 @@ public class PoliciesConfig {
"Manage Structured Properties",
"Manage structured properties in your instance.");
+ public static final Privilege VIEW_STRUCTURED_PROPERTIES_PAGE_PRIVILEGE =
+ Privilege.of(
+ "VIEW_STRUCTURED_PROPERTIES_PAGE",
+ "View Structured Properties",
+ "View structured properties in your instance.");
+
public static final Privilege MANAGE_DOCUMENTATION_FORMS_PRIVILEGE =
Privilege.of(
"MANAGE_DOCUMENTATION_FORMS",