diff --git a/src/main/metabase/api.ts b/src/main/metabase/api.ts index 7aff9c1..da1e5ff 100644 --- a/src/main/metabase/api.ts +++ b/src/main/metabase/api.ts @@ -1,5 +1,5 @@ import { ClientRequestConstructorOptions, net } from 'electron'; -import { isPostgresIdentifier } from '../../shared'; +import { postgresQuote } from '../../shared'; import { InitialQueryPayloads } from '../../types'; import { metabaseConfig, postgresConfig } from '../config'; import { getAppLanguage } from '../language'; @@ -29,10 +29,6 @@ function findAnonymizedAccessDbId(databases: Database[]) { } } -function postgresQuote(name: string) { - return isPostgresIdentifier(name) ? name : `"${name}"`; -} - const sqlHint = ` -- HINTS -- Change, add, or remove columns as desired. diff --git a/src/main/metabase/examples.ts b/src/main/metabase/examples.ts index 90658b5..1f5a2c6 100644 --- a/src/main/metabase/examples.ts +++ b/src/main/metabase/examples.ts @@ -1,4 +1,7 @@ -import { Table } from './types'; +import { postgresQuote } from '../../shared'; +import { Field, Table } from './types'; + +type Display = 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD. /** An example query card. */ type ExampleQuery = { @@ -6,7 +9,7 @@ type ExampleQuery = { sql: string; // SQL query. sizeX: number; // Grid of 18 units wide. sizeY: number; // Height of card in units. - display: 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD. + display: Display; visualizationSettings: Record; // To be typed later. // There's also row/col properties, but we'll make some rectangle @@ -19,18 +22,103 @@ type ExamplesSection = { queries: ExampleQuery[]; // Cards in section. }; +type ExampleInfo = { + sql: string; + name: string; +}; + function lines(...lines: string[]) { return lines.join('\n'); } -export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] { - const { fields, display_name } = table; // TODO: iterate and inspect fields +const numberFieldTypes = ['int2', 'int4', 'int8', 'float4', 'float8', 'numeric']; + +function rawGroupBySQL(column: string, table: string, displayName: string): ExampleInfo { + return { + name: `${displayName} by ${column}`, + sql: lines(`SELECT ${postgresQuote(column)}`, `FROM ${postgresQuote(table)}`), + }; +} - let name = table.name; - // if (requiresQuoting(name)) { - // name = `"${name}"` - // } +function countDistinctSQL(column: string, table: string): ExampleInfo { + return { + name: `Distinct ${column}`, + sql: lines(`SELECT count(distinct ${postgresQuote(column)}) as distinct_${column}`, `FROM ${postgresQuote(table)}`), + }; +} + +function avgSQL(column: string, table: string): ExampleInfo { + return { + name: `Average ${column}`, + sql: lines(`SELECT avg(${postgresQuote(column)}) as avg_${column}`, `FROM ${postgresQuote(table)}`), + }; +} + +function textGeneralizedSQL(column: string, table: string, displayName: string, averageLength: number): ExampleInfo { + const nChars = Math.ceil(averageLength / 4); + const stars = '*'.repeat(Math.ceil(averageLength - nChars)); + const bucket = `substring(${postgresQuote(column)}, 1, ${nChars})`; + + return { + name: `${displayName} by ${column}`, + sql: lines(`SELECT ${bucket} || ${stars}, count(*)`, `FROM ${postgresQuote(table)}`, `GROUP BY ${bucket}`), + }; +} +function yearlyGeneralizedSQL(column: string, table: string, displayName: string): ExampleInfo { + const bucket = `extract(year from ${postgresQuote(column)})`; + + return { + name: `${displayName} by ${column} year`, + sql: lines(`SELECT ${bucket} as ${column}_year, count(*)`, `FROM ${postgresQuote(table)}`, `GROUP BY ${bucket}`), + }; +} + +function makeExampleInfos(field: Field, table: Table, aidColumns: string[]): ExampleInfo[] { + try { + if (field.semantic_type === 'type/PK' || field.database_type === 'serial') { + // No sensible example for columns being just row IDs. + return []; + } else if (aidColumns.includes(field.name)) { + // Never SELECT/GROUP BY AIDs directly, also no point in generalizing. + return [countDistinctSQL(field.name, table.name)]; + } else if (field.database_type === 'text' && field.fingerprint) { + if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) { + // Few distinct values - can GROUP BY directly. + return [rawGroupBySQL(field.name, table.name, table.display_name)]; + } else { + const averageLength = field.fingerprint.type?.['type/Text']?.['average-length']; + + // The `< 20`: we want to generalize surnames and categories but not sentences, paragraphs or addresses. + if (averageLength && averageLength < 20) { + return [textGeneralizedSQL(field.name, table.name, table.display_name, averageLength)]; + } else { + return [countDistinctSQL(field.name, table.name)]; + } + } + } else if (numberFieldTypes.includes(field.database_type) && field.fingerprint) { + if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) { + // Few distinct values - can GROUP BY directly. + return [rawGroupBySQL(field.name, table.name, table.display_name)]; + } else { + // TODO: Construct stable generalization. Temporarily revert to the average. + return [avgSQL(field.name, table.name)]; + } + } else if (field.database_type === 'timestamp') { + // TODO: using timestamps fingerprint is possible, but we need to pull in some datetime lib. + return [yearlyGeneralizedSQL(field.name, table.name, table.display_name)]; + } else { + // Fallback to the count distinct for anything else. + return [countDistinctSQL(field.name, table.name)]; + } + } catch (err) { + console.warn(`Unable to make example query for ${table.name}, ${field.name}`, err); + return []; + } +} + +export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] { + const exampleInfos = table.fields.flatMap((field) => makeExampleInfos(field, table, aidColumns)); // const t = getT('example-queries'); // Let's worry about i18n later... return [ @@ -38,8 +126,8 @@ export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSect title: 'Overview', queries: [ { - name: `Count of ${display_name}`, - sql: lines('SELECT count(*)', `FROM ${name}`), + name: `Count of ${table.display_name}`, + sql: lines('SELECT count(*)', `FROM ${table.name}`), sizeX: 6, // 6 is a good default (3 cards per row). sizeY: 4, // 4 is a good default. display: 'scalar', @@ -48,19 +136,17 @@ export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSect ], }, { - // GROUP BY examples - title: `Distribution of ${display_name}`, - queries: [ - { - name: `${display_name} by `, - sql: lines('SELECT , count(*)', `FROM ${name}`, 'GROUP BY '), + title: `Overview of ${table.display_name} columns`, + queries: exampleInfos.map(({ name, sql }) => { + return { + name: name, + sql: sql, sizeX: 6, - sizeY: 4, // For a table we might need something taller. - display: 'table', // For now we show results only as 'table'. + sizeY: 4, // TODO: For a table we might need something taller. + display: 'table' as Display, // For now we show results only as 'table'. visualizationSettings: {}, - }, - ], + }; + }), }, - // ... ]; } diff --git a/src/shared/utils.ts b/src/shared/utils.ts index c64fac8..088fdb0 100644 --- a/src/shared/utils.ts +++ b/src/shared/utils.ts @@ -71,3 +71,7 @@ const tableNameRE = /^[a-z_][a-z0-9$_]*$/; export function isPostgresIdentifier(name: string): boolean { return !postgresReservedKeywords.includes(name) && tableNameRE.test(name); } + +export function postgresQuote(name: string): string { + return isPostgresIdentifier(name) ? name : `"${name}"`; +}