diff --git a/.gitignore b/.gitignore index 67bc771f84c1a..04c2d92769192 100644 --- a/.gitignore +++ b/.gitignore @@ -117,7 +117,8 @@ superset/translations/**/messages.mo docker/requirements-local.txt cache/ -docker/*local* +docker/.env-local +docker/.env .temp_cache diff --git a/README-UCLH.md b/README-UCLH.md index 87eabdc57f23f..c90830c086d5f 100644 --- a/README-UCLH.md +++ b/README-UCLH.md @@ -1,40 +1,81 @@ ## Docker & Compose Settings -[docs](https://superset.apache.org/docs/installation/docker-compose) + > Note that docker/.env sets the default environment variables for all the docker images used by docker compose, and that docker/.env-local can be used to override those defaults. Also note that docker/.env-local is referenced in our .gitignore, preventing developers from risking committing potentially sensitive configuration to the repository. -Create a _docker/.env-local_ file with the following keys: +via [docs](https://superset.apache.org/docs/installation/docker-compose) + +## Environment Variables + +You **must** copy `docker/.env.example` to `docker/.env` + +```bash +cp docker/.env.example docker/.env ``` -COMPOSE_PROJECT_NAME=EMAP-Insights +Then make local edits to `docker/.env-local` file which overrides values from `docker/.env`. + +For example, create a _docker/.env-local_ file with the following keys: +``` +# Must be lowercase with only alphanumeric characters, hyphens, and underscores +COMPOSE_PROJECT_NAME=emap-insights + +# Provide the name of the host machine (also HOSTNAME) HOST_NAME= # Set this to a unique secure random value on production DATABASE_PASSWORD=superset -SUPERSET_LOAD_EXAMPLES=false +SUPERSET_LOAD_EXAMPLES=yes # Make sure you set this to a unique secure random value on production # using something like `openssl rand -base64 42` SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET +# Specify the Superset image tag to use +TAG=4.1.1 +``` + +## Build or run +The shell scripts simply specify the tag, and then call docker compose with the appropriate docker-compose.yml file. + +```bash +./build.sh +``` -## Run +```bash +./up.sh +``` +is a quick way of writing ```bash +TAG=4.1.1 docker compose -f docker-compose-image-tag.yml up +``` +## Add DuckDb databases +You can now do this from the UI. +It's simplest to use a sqlalchemy connection string. +You must the database into `./data/duckdb` (which is a mounted volume). + +``` +duckdb:////var/data/duckdb/camino-gold.db ``` + ## Notes -See the details here for tag specification -https://superset.apache.org/docs/installation/docker-builds +- See the details here for tag specification: https://superset.apache.org/docs/installation/docker-builds + - e.g 4.1.1 is lean ... 250MB ish, 4.1.1-dev is not! (but includes postgres drivers and more) ... 1GB +- You may get warnings during initiaton about flask migrations. -e.g -- 4.1.1 is lean ... 250MB ish -- 4.1.1-dev is not! (but includes postgres drivers and more) ... 1GB +```bash +superset_init | ERROR [flask_migrate] Error: Can't locate revision identified by '74ad1125881c' +``` +These can probably be ignored but you can always delete the `emap-insights_db_data` volume if you want to be sure. -A lean start ... ```bash -TAG=4.1.1 docker compose -f docker-compose-image-tag.yml up -``` \ No newline at end of file +docker compose down +docker volume rm emap-insights_db_data +``` + + diff --git a/build.sh b/build.sh new file mode 100755 index 0000000000000..7d0f168101379 --- /dev/null +++ b/build.sh @@ -0,0 +1,8 @@ +# Run the docker compose file +set -a # automatically export all variables +source docker/.env +source docker/.env-local +set +a # stop automatically exporting + +echo "Building Superset at tag $TAG" +docker compose -f docker-compose-image-tag.yml build \ No newline at end of file diff --git a/docker-compose-image-tag.yml b/docker-compose-image-tag.yml index 037763e4bb6fb..8090377cb1ba1 100644 --- a/docker-compose-image-tag.yml +++ b/docker-compose-image-tag.yml @@ -15,10 +15,10 @@ # limitations under the License. # -x-host-name: &host-name ${HOST_NAME} +x-host-name: &host-name ${HOSTNAME} x-http-proxy: &http-proxy ${HTTP_PROXY} x-https-proxy: &https-proxy ${HTTPS_PROXY} -x-no-proxy: &no-proxy ${HOST_NAME},${HOST_NAME}.xuclh.nhs.uk,localhost,127.0.0.1 +x-no-proxy: &no-proxy ${HOSTNAME}.xuclh.nhs.uk,localhost,127.0.0.1 x-network: &network HOST_NAME: *host-name HTTP_PROXY: *http-proxy @@ -43,7 +43,7 @@ x-superset-volumes: - ./docker:/app/docker - superset_home:/app/superset_home # https://github.com/apache/superset/issues/9748#issuecomment-2099107789 - - ./data:/var/mydata + - ./data:/var/data - ./dashboards:/var/dashboards services: @@ -58,9 +58,13 @@ services: env_file: - docker/.env # default - docker/.env-local # optional override + environment: + <<: *network image: postgres:15 container_name: superset_db restart: unless-stopped + ports: + - 8089:5432 volumes: - db_home:/var/lib/postgresql/data - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d @@ -69,6 +73,8 @@ services: env_file: - docker/.env # default - docker/.env-local # optional override + environment: + <<: *network image: *superset-image container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] @@ -87,6 +93,8 @@ services: - docker/.env # default - docker/.env-local # optional override depends_on: *superset-depends-on + environment: + <<: *network user: "root" volumes: *superset-volumes healthcheck: diff --git a/docker-compose.yml b/docker-compose.yml index 6b91ff7691e94..96e59c447495c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,7 +42,7 @@ x-superset-volumes: &superset-volumes - superset_home:/app/superset_home - ./tests:/app/tests # https://github.com/apache/superset/issues/9748#issuecomment-2099107789 - - ./data:/var/mydata + - ./data:/var/data x-common-build: &common-build context: . diff --git a/docker/.env-local.example b/docker/.env-local.example new file mode 100644 index 0000000000000..97711f3e015c7 --- /dev/null +++ b/docker/.env-local.example @@ -0,0 +1,24 @@ +# Must be lowercase with only alphanumeric characters, hyphens, and underscores +COMPOSE_PROJECT_NAME=emap-insights + +# HOST_NAME is the name of the host machine (also HOSTNAME) +HOST_NAME= +HOSTNAME= + +# Set this to a unique secure random value on production +# DATABASE_PASSWORD= + +SUPERSET_LOAD_EXAMPLES=yes +SUPERSET_ENV=production +DEV_MODE=false + +# Make sure you set this to a unique secure random value on production +# using something like `openssl rand -base64 42` +# SUPERSET_SECRET_KEY= + +# Specify the Superset image tag to use +# 4.1.1 is lean (250MB) +# 4.1.1-dev includes postgres drivers and more (1GB) +TAG=4.1.1 + +PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev diff --git a/docker/.env b/docker/.env.example similarity index 92% rename from docker/.env rename to docker/.env.example index c0e61e8f1d76d..a7c1feabdd6e5 100644 --- a/docker/.env +++ b/docker/.env.example @@ -67,3 +67,7 @@ ENABLE_PLAYWRIGHT=false PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true BUILD_SUPERSET_FRONTEND_IN_DOCKER=true SUPERSET_LOG_LEVEL=info + +# You need this for docker/superset_config_docker.py to work +# ... again edits here will override docker/superset_config.py +PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev \ No newline at end of file diff --git a/docker/pythonpath_dev/.gitignore b/docker/pythonpath_dev/.gitignore index 376bb61ea96c0..f7f0ab2f4039c 100644 --- a/docker/pythonpath_dev/.gitignore +++ b/docker/pythonpath_dev/.gitignore @@ -20,4 +20,5 @@ # DON'T ignore the .gitignore !.gitignore !superset_config.py +!superset_config_docker.py !superset_config_local.example diff --git a/docker/pythonpath_dev/superset_config.py b/docker/pythonpath_dev/superset_config.py index 255cfd3fba856..100164592eb69 100644 --- a/docker/pythonpath_dev/superset_config.py +++ b/docker/pythonpath_dev/superset_config.py @@ -29,15 +29,6 @@ logger = logging.getLogger() -# Allow sqlite to be used -# via https://github.com/apache/superset/issues/9748 -# Superset configuration file -PREVENT_UNSAFE_DB_CONNECTIONS=False - -ENABLE_PROXY_FIX = False -WTF_CSRF_ENABLED = False -TALISMAN_ENABLED = False - DATABASE_DIALECT = os.getenv("DATABASE_DIALECT") DATABASE_USER = os.getenv("DATABASE_USER") DATABASE_PASSWORD = os.getenv("DATABASE_PASSWORD") diff --git a/docker/pythonpath_dev/superset_config_docker.py b/docker/pythonpath_dev/superset_config_docker.py new file mode 100644 index 0000000000000..b3bb52289f06f --- /dev/null +++ b/docker/pythonpath_dev/superset_config_docker.py @@ -0,0 +1,52 @@ +import logging +import os +import sys +import subprocess + +logger = logging.getLogger() + +# Allow sqlite to be used +# via https://github.com/apache/superset/issues/9748 +# Superset configuration file +PREVENT_UNSAFE_DB_CONNECTIONS=False + +# Add DuckDB setup +def setup_duckdb(): + try: + subprocess.check_call([ + sys.executable, + "-m", + "pip", + "install", + "duckdb-engine>=0.9.5,<0.10", + "--quiet" + ]) + logger.info("DuckDB engine installed successfully") + except subprocess.CalledProcessError as e: + logger.error(f"Failed to install DuckDB engine: {e}") + raise + +# Install DuckDB when config is loaded +setup_duckdb() + +# Update DuckDB configuration to use existing mounted volume +DUCKDB_DATA_PATH = "/var/data/duckdb" # Using the existing mount point +DUCKDB_CONN_PATH = os.getenv("DUCKDB_CONN_PATH", os.path.join(DUCKDB_DATA_PATH, "superset.db")) + +# Ensure the directory exists +os.makedirs(DUCKDB_DATA_PATH, exist_ok=True) + +# Add DuckDB to the databases dictionary +DATABASES = { + 'duckdb': { + 'allow_csv_upload': True, + 'allow_ctas': True, + 'allow_cvas': True, + 'allow_dml': True, + 'configuration_method': 'sqlalchemy_form', + 'default_driver': 'duckdb', + } +} + +# Add DuckDB to allowed databases +PREFERRED_DATABASES = ['sqlite', 'postgresql', 'duckdb', 'mssql'] \ No newline at end of file diff --git a/up.sh b/up.sh new file mode 100755 index 0000000000000..18959cdb006c8 --- /dev/null +++ b/up.sh @@ -0,0 +1,7 @@ +# Run the docker compose file +set -a # automatically export all variables +source docker/.env-local +set +a # stop automatically exporting + +echo "Starting Superset at tag $TAG" +docker compose -f docker-compose-image-tag.yml up \ No newline at end of file