diff --git a/.env.example b/.env.example index 5fd4d62bf..a7c0dbea0 100644 --- a/.env.example +++ b/.env.example @@ -35,6 +35,9 @@ DAGSTER__CLICKHOUSE__HOST= DAGSTER__CLICKHOUSE__USER= DAGSTER__CLICKHOUSE__PASSWORD= +## sqlmesh +SQLMESH_DUCKDB_LOCAL_PATH=/tmp/oso.duckdb + ################### # DEPRECATED ################### diff --git a/warehouse/metrics_mesh/README.md b/warehouse/metrics_mesh/README.md new file mode 100644 index 000000000..4dce3977a --- /dev/null +++ b/warehouse/metrics_mesh/README.md @@ -0,0 +1,34 @@ +# OSO sqlmesh pipeline + +## Setup + +Make sure to set the following environment variables +in your .env file (at the root of the oso repo) + +``` +GOOGLE_PROJECT_ID=opensource-observer +SQLMESH_DUCKDB_LOCAL_PATH=/tmp/oso.duckdb +``` + +Make sure you've logged into Google Cloud on your terminal + +```bash +gcloud auth application-default login +``` + +Now install dependencies and download playground data into +a local DuckDB instance. + +```bash +poetry install +poetry shell +oso metrics local initialize +``` + +## Run + +```bash +cd warehouse/metrics_mesh +sqlmesh plan dev --start 2024-07-01 --end 2024-08-01 # to run for specific date rates (fast) +sqlmesh plan # to run the entire pipeline (slow) +``` diff --git a/warehouse/metrics_mesh/lib/local/utils.py b/warehouse/metrics_mesh/lib/local/utils.py index f1244cf53..af6ecb700 100644 --- a/warehouse/metrics_mesh/lib/local/utils.py +++ b/warehouse/metrics_mesh/lib/local/utils.py @@ -1,14 +1,16 @@ import typing as t import duckdb +import os from google.cloud import bigquery +project_id = os.getenv("GOOGLE_PROJECT_ID") def bq_to_duckdb(table_mapping: t.Dict[str, str], duckdb_path: str): """Copies the tables in table_mapping to tables in duckdb The table_mapping is in the form { "bigquery_table_fqn": "duckdb_table_fqn" } """ - bqclient = bigquery.Client() + bqclient = bigquery.Client(project=project_id) conn = duckdb.connect(duckdb_path) conn.sql("CREATE SCHEMA IF NOT EXISTS sources;") diff --git a/warehouse/oso_lets_go/cli.py b/warehouse/oso_lets_go/cli.py index dc9e7537d..1a1b5470d 100644 --- a/warehouse/oso_lets_go/cli.py +++ b/warehouse/oso_lets_go/cli.py @@ -2,15 +2,13 @@ A catchall for development environment tools related to the python tooling. """ -import os - -import click import dotenv +dotenv.load_dotenv() +import os +import click from metrics_mesh.lib.local.utils import initialize_local_duckdb, reset_local_duckdb -dotenv.load_dotenv() - @click.group() @click.option("--debug/--no-debug", default=False)