Skip to content

Commit

Permalink
Merge pull request #267 from DataRecce/feature/drc-321-show-warning-m…
Browse files Browse the repository at this point in the history
…essage-to-diff-actions-require-column

[Feature] Query the model when the run form lacks column information
  • Loading branch information
wcchang1115 authored Apr 9, 2024
2 parents 6da2a44 + f48255a commit ad8c0f4
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 81 deletions.
34 changes: 22 additions & 12 deletions js/src/components/histogram/HistogramDiffForm.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { HistogramDiffParams } from "@/lib/api/profile";
import { RunFormProps } from "../run/types";
import { useLineageGraphContext } from "@/lib/hooks/LineageGraphContext";
import _ from "lodash";
import { extractColumns } from "../valuediff/ValueDiffForm";
import { Box, FormControl, FormLabel, Select } from "@chakra-ui/react";
import useModelColumns from "@/lib/hooks/useModelColumns";

function isStringDataType(columnType: string) {
const stringDataTypes = [
Expand Down Expand Up @@ -60,15 +58,27 @@ export function HistogramDiffForm({
onParamsChanged,
setIsReadyToExecute,
}: HistogramDiffEditProps) {
const { lineageGraph } = useLineageGraphContext();
const node = _.find(lineageGraph?.nodes, {
name: params?.model,
});
const columns = node
? extractColumns(node).filter(
(c) => !isStringDataType(c.type) && !isDateTimeType(c.type)
)
: [];
const {
columns: allColumns,
isLoading,
error,
} = useModelColumns(params.model);
const columns = allColumns.filter(
(c) => !isStringDataType(c.type) && !isDateTimeType(c.type)
);

if (isLoading) {
return <Box>Loading...</Box>;
}

if (columns.length === 0 || error) {
return (
<Box>
Error: Please provide the &apos;catalog.json&apos; to list column
candidates
</Box>
);
}

return (
<Box m="16px">
Expand Down
26 changes: 17 additions & 9 deletions js/src/components/top-k/TopKDiffForm.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import { TopKDiffParams } from "@/lib/api/profile";
import { RunFormProps } from "../run/types";
import { useLineageGraphContext } from "@/lib/hooks/LineageGraphContext";
import _ from "lodash";
import { Box, FormControl, FormLabel, Select } from "@chakra-ui/react";
import { extractColumnNames } from "../valuediff/ValueDiffForm";
import { useEffect } from "react";
import useModelColumns from "@/lib/hooks/useModelColumns";

interface TopKDiffFormProps extends RunFormProps<TopKDiffParams> {}

Expand All @@ -13,16 +11,26 @@ export function TopKDiffForm({
onParamsChanged,
setIsReadyToExecute,
}: TopKDiffFormProps) {
const { lineageGraph } = useLineageGraphContext();
const node = _.find(lineageGraph?.nodes, {
name: params?.model,
});
const columns = node ? extractColumnNames(node) : [];
const { columns, isLoading, error } = useModelColumns(params.model);
const columnNames = columns.map((c) => c.name);

useEffect(() => {
setIsReadyToExecute(!!params.column_name);
}, [params, setIsReadyToExecute]);

if (isLoading) {
return <Box>Loading...</Box>;
}

if (columnNames.length === 0 || error) {
return (
<Box>
Error: Please provide the &apos;catalog.json&apos; to list column
candidates
</Box>
);
}

return (
<Box m="16px">
<FormControl>
Expand All @@ -35,7 +43,7 @@ export function TopKDiffForm({
onParamsChanged({ ...params, column_name: column });
}}
>
{columns.map((c) => (
{columnNames.map((c) => (
<option key={c} value={c}>
{c}
</option>
Expand Down
83 changes: 23 additions & 60 deletions js/src/components/valuediff/ValueDiffForm.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ValueDiffParams } from "@/lib/api/valuediff";
import { RunFormProps } from "../run/types";
import {
Box,
Checkbox,
FormControl,
FormLabel,
Expand All @@ -9,11 +9,8 @@ import {
} from "@chakra-ui/react";

import { Select } from "chakra-react-select";
import { useLineageGraphContext } from "@/lib/hooks/LineageGraphContext";
import { LineageGraphNode } from "../lineage/lineage";
import _ from "lodash";
import { useEffect, useState } from "react";
import { NodeColumnData, NodeData } from "@/lib/api/info";
import useModelColumns from "@/lib/hooks/useModelColumns";

interface ValueDiffFormParams {
model: string;
Expand All @@ -23,71 +20,24 @@ interface ValueDiffFormParams {

interface ValueDiffFormProp extends RunFormProps<ValueDiffFormParams> {}

export function extractColumns(node: LineageGraphNode) {
function getColumns(nodeData: NodeData) {
return nodeData && nodeData.columns ? Object.values(nodeData.columns) : [];
}

const baseColumns = getColumns(node.data.base!!);
const currentColumns = getColumns(node.data.current!!);

const union: NodeColumnData[] = [];
baseColumns.forEach((column) => {
if (!union.some((c) => c.name === column.name)) {
union.push(column);
}
});
currentColumns.forEach((column) => {
if (!union.some((c) => c.name === column.name)) {
union.push(column);
}
});

return union;
}

export function extractColumnNames(node: LineageGraphNode) {
function getNames(nodeData: NodeData) {
return nodeData && nodeData.columns
? Object.values(nodeData.columns).map((column) => column.name)
: [];
}

const baseColumns = getNames(node.data.base!!);
const currentColumns = getNames(node.data.current!!);

// keep the columns order
const union: string[] = [];
baseColumns.forEach((column) => {
if (!union.includes(column)) {
union.push(column);
}
});
currentColumns.forEach((column) => {
if (!union.includes(column)) {
union.push(column);
}
});

return union;
}

export function ValueDiffForm({
params,
onParamsChanged,
setIsReadyToExecute,
}: ValueDiffFormProp) {
const { lineageGraph } = useLineageGraphContext();
const [allColumns, setAllColumns] = useState<boolean>(
!params.columns || params.columns.length === 0
);

const model = params?.model;
const primaryKey = params?.primary_key;
const node = _.find(lineageGraph?.nodes, {
name: params?.model,
});
const nodePrimaryKey = node?.data.current?.primary_key;

const {
columns,
primaryKey: nodePrimaryKey,
isLoading,
error,
} = useModelColumns(params.model);

useEffect(() => {
if (!primaryKey && nodePrimaryKey) {
Expand All @@ -102,7 +52,7 @@ export function ValueDiffForm({
setIsReadyToExecute(primaryKey && model ? true : false);
}, [primaryKey, model, setIsReadyToExecute]);

const columnNames = node ? extractColumnNames(node) : [];
const columnNames = columns.map((c) => c.name);

// primaryKey can be array or string, map to array
const primaryKeys = Array.isArray(primaryKey)
Expand All @@ -111,6 +61,19 @@ export function ValueDiffForm({
? [primaryKey]
: undefined;

if (isLoading) {
return <Box>Loading...</Box>;
}

if (columnNames.length === 0 || error) {
return (
<Box>
Error: Please provide the &apos;catalog.json&apos; to list column
candidates
</Box>
);
}

return (
<VStack gap={5} m="8px 24px" paddingBottom="200px">
<FormControl>
Expand Down
18 changes: 18 additions & 0 deletions js/src/lib/api/info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,21 @@ export async function getServerInfo(): Promise<ServerInfoResult> {
const response = await axiosClient.get(`/api/info`);
return response.data;
}

export interface ModelInfoResult {
model: {
base: {
columns?: { [key: string]: NodeColumnData };
primary_key?: string;
};
current: {
columns?: { [key: string]: NodeColumnData };
primary_key?: string;
};
};
}

export async function getModelInfo(model: string): Promise<ModelInfoResult> {
const response = await axiosClient.get(`/api/model/${model}`);
return response.data;
}
92 changes: 92 additions & 0 deletions js/src/lib/hooks/useModelColumns.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { useState, useEffect, useMemo } from "react";
import { NodeColumnData, NodeData, getModelInfo } from "../api/info";
import { useLineageGraphContext } from "./LineageGraphContext";
import _ from "lodash";
import { LineageGraphNode } from "@/components/lineage/lineage";

export function extractColumns(node: LineageGraphNode) {
function getColumns(nodeData: NodeData) {
return nodeData && nodeData.columns ? Object.values(nodeData.columns) : [];
}

const baseColumns = getColumns(node.data.base!!);
const currentColumns = getColumns(node.data.current!!);

return unionColumns(baseColumns, currentColumns);
}

export function unionColumns(
baseColumns: NodeColumnData[],
currentColumns: NodeColumnData[]
) {
const union: NodeColumnData[] = [];
baseColumns.forEach((column) => {
if (!union.some((c) => c.name === column.name)) {
union.push(column);
}
});
currentColumns.forEach((column) => {
if (!union.some((c) => c.name === column.name)) {
union.push(column);
}
});

return union;
}

const useModelColumns = (model: string | undefined) => {
const { lineageGraph } = useLineageGraphContext();
const [columns, setColumns] = useState<NodeColumnData[]>([]);
const [primaryKey, setPrimaryKey] = useState<string>();
const [isLoading, setIsLoading] = useState<boolean>(true);
const [error, setError] = useState<Error | null>(null);

const node = _.find(lineageGraph?.nodes, {
name: model,
});

const nodeColumns = useMemo(() => {
return node ? extractColumns(node) : [];
}, [node]);

const nodePrimaryKey = node ? node.data.current?.primary_key : undefined;

useEffect(() => {
const fetchData = async () => {
try {
const data = await getModelInfo(node?.id!);
const modelInfo = data.model;
if (
!modelInfo ||
!modelInfo.base.columns ||
!modelInfo.current.columns
) {
setColumns([]);
return;
}
setPrimaryKey(modelInfo.current.primary_key);
const baseColumns = Object.values(modelInfo.base.columns);
const currentColumns = Object.values(modelInfo.current.columns);
setColumns(unionColumns(baseColumns, currentColumns));
} catch (error) {
setError(error as Error);
}
};

if (nodeColumns.length > 0) {
setColumns(nodeColumns);
setPrimaryKey(nodePrimaryKey);
setIsLoading(false);
} else if (node?.id === undefined) {
setColumns([]);
setIsLoading(false);
} else {
fetchData();
setIsLoading(false);
}
}, [node?.id, nodeColumns, nodePrimaryKey]);

return { columns, primaryKey, isLoading, error };
};

export default useModelColumns;
47 changes: 47 additions & 0 deletions recce/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,53 @@ def get_columns(self, model: str, base=False) -> List[Column]:
kwargs={"relation": relation},
manifest=self.manifest)

def get_model(self, model_id: str, base=False):
manifest = self.curr_manifest if base is False else self.base_manifest
manifest_dict = manifest.to_dict()

node = manifest_dict['nodes'].get(model_id)
if node is None:
return {}

node_name = node['name']
with self.adapter.connection_named('model'):
columns = [column for column in dbt_context.get_columns(node_name, base=base)]

child_map: List[str] = manifest_dict['child_map'][model_id]
cols_not_null = []
cols_unique = []

for child in child_map:
comps = child.split('.')
child_type = comps[0]
child_name = comps[2]

not_null_prefix = f'not_null_{node_name}_'
if child_type == 'test' and child_name.startswith(not_null_prefix):
cols_not_null.append(child_name[len(not_null_prefix):])
unique_prefix = f'unique_{node_name}_'
if child_type == 'test' and child_name.startswith(unique_prefix):
cols_unique.append(child_name[len(unique_prefix):])

columns_info = {}
primary_key = None
for c in columns:
col_name = c.column
col = dict(name=col_name, type=c.dtype)
if col_name in cols_not_null:
col['not_null'] = True
if col_name in cols_unique:
col['unique'] = True
if not primary_key:
primary_key = col_name
columns_info[col_name] = col

result = dict(columns=columns_info)
if primary_key:
result['primary_key'] = primary_key

return result

def load_artifacts_from_state(self, state_file: str = None):
if state_file is None:
raise Exception('The recce state file is not provided')
Expand Down
Loading

0 comments on commit ad8c0f4

Please sign in to comment.