Merge branch 'dynamic_building_outlines' into test_digitaltwin

GeospatialResearch · Jul 9, 2024 · edd51f3 · edd51f3
2 parents 5406faf + e6892dd
commit edd51f3
Show file tree

Hide file tree

Showing 151 changed files with 26,035 additions and 19,127 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,5 @@
+visualisation
+tmp
+docs
+fredt.tar
+build_dep.tar
diff --git a/.env.docker-override b/.env.docker-override
@@ -1,6 +1,16 @@
-# Ovverrides any values in .env that are being used for local development
+# Overrides any values in .env that are being used for local development
+# Values here are used inside the contianers using this file
+DATA_DIR=/stored_data
+DATA_DIR_SLR=/stored_data/slr_data
+DATA_DIR_REC=/stored_data/rec_data
+DATA_DIR_MODEL_OUTPUT=/stored_data/model_output
+DATA_DIR_GEOSERVER=/stored_data/geoserver
+FLOOD_MODEL_DIR=/bg_flood
 
 POSTGRES_PORT=5432
 POSTGRES_HOST=db_postgres
 
 MESSAGE_BROKER_HOST=message_broker
+
+GEOSERVER_HOST=http://geoserver
+GEOSERVER_PORT=8080
diff --git a/.env.template b/.env.template
@@ -3,7 +3,9 @@ DATA_DIR=U:/Research/FloodRiskResearch/DigitalTwin/stored_data
 DATA_DIR_REC=U:/Research/FloodRiskResearch/DigitalTwin/stored_data/rec_data
 DATA_DIR_MODEL_OUTPUT=U:/Research/FloodRiskResearch/DigitalTwin/stored_data/model_output
 DATA_DIR_GEOSERVER=U:/Research/FloodRiskResearch/DigitalTwin/stored_data/geoserver
-FLOOD_MODEL_DIR=U:/Research/FloodRiskResearch/DigitalTwin/BG-Flood/BG-Flood_v8.0
+FLOOD_MODEL_DIR=U:/Research/FloodRiskResearch/DigitalTwin/BG-Flood/BG_Flood_v0-9
+
+DEBUG_TRACEBACK=False
 
 POSTGRES_HOST=localhost
 POSTGRES_PORT=5431
@@ -18,12 +20,9 @@ GEOSERVER_PORT=8088
 GEOSERVER_ADMIN_NAME=admin
 GEOSERVER_ADMIN_PASSWORD=geoserver
 
-STATSNZ_API_KEY=
-LINZ_API_KEY=
-LRIS_API_KEY=
-MFE_API_KEY=
-NIWA_API_KEY=
-
+WWW_HOST=http://localhost
+WWW_PORT=8080
+CESIUM_ACCESS_TOKEN=
 
 # for NewZeaLiDAR
 # directory name for source LiDAR data from OpenTopography, parent dir is DATA_DIR

diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.sh text eol=lf
diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml
@@ -12,9 +12,10 @@ jobs:
         working-directory: ./sphinx
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
+        token: ${{ secrets.PUSH_PAGES_PAT }}
     - name: Install build dependencies
       run: |
         python -m pip install -r requirements.txt

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -44,10 +44,6 @@ jobs:
     defaults:
       run:
         shell: bash -l {0}
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [ 3.11 ]
     steps:
 
       - name: Checkout github repo including lfs files
@@ -64,15 +60,12 @@ jobs:
           key: ${{ runner.os }}-conda-${{ env.cache-name }}-${{ hashFiles('environment.yml') }}
 
       - name: Install package dependencies
-        uses: conda-incubator/setup-miniconda@v2
+        uses: mamba-org/setup-micromamba@v1
         with:
-          activate-environment: digitaltwin
           environment-file: environment.yml
-          auto-activate-base: false
-          channels: conda-forge
-          channel-priority: strict
-          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
-          python-version: ${{ matrix.python-version }}
+          # only cache environment
+          cache-environment: true
+          cache-downloads: false
       - run: |
           conda info
           conda list

diff --git a/.gitignore b/.gitignore
@@ -128,3 +128,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+/api_keys.env
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,99 @@
-FROM continuumio/miniconda3 as base
-
-WORKDIR app/
+FROM continuumio/miniconda3:23.10.0-1 AS build
+# Miniconda layer for building conda environment
+WORKDIR /app
 
+# Install mamba for faster conda solves
+RUN conda install -c conda-forge mamba
 
+# Create Conda environment
 COPY environment.yml .
-RUN conda env create -f environment.yml
+RUN mamba env create -f environment.yml
+
 # Make RUN commands use the new environment:
 SHELL ["conda", "run", "-n", "digitaltwin", "/bin/bash", "-c"]
 
+# Test that conda environment worked successfully
 RUN echo "Check GeoFabrics is installed to test environment"
 RUN python -c "import geofabrics"
 
-COPY selected_polygon.geojson .
-COPY src/ src/
+# Pack conda environment to be shared to runtime image
+RUN conda-pack --ignore-missing-files -n digitaltwin -o /tmp/env.tar \
+  && mkdir /venv \
+  && cd /venv \
+  && tar xf /tmp/env.tar \
+  && rm /tmp/env.tar
+RUN /venv/bin/conda-unpack
+
+
+FROM lparkinson/bg_flood:v0.9 AS runtime-base
+# BG_Flood stage for running the digital twin. Reduces image size significantly if we use a multi-stage build
+WORKDIR /app
+
+USER root
+
+# Install dependencies
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends ca-certificates curl wget acl \
+# Install firefox from mozilla .deb repository, not snap package as is default for ubuntu (snap does not work for docker)
+ && wget -q https://packages.mozilla.org/apt/repo-signing-key.gpg -O- | tee /etc/apt/keyrings/packages.mozilla.org.asc > /dev/null \
+ && echo "deb [signed-by=/etc/apt/keyrings/packages.mozilla.org.asc] https://packages.mozilla.org/apt mozilla main" | tee -a /etc/apt/sources.list.d/mozilla.list > /dev/null \
+ && echo $' \n\
+Package: * \n\
+Pin: origin packages.mozilla.org \n\
+Pin-Priority: 1000 \n\
+' | tee /etc/apt/preferences.d/mozilla \
+ && cat /etc/apt/preferences.d/mozilla \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends firefox \
+# Install geckodriver, webdriver for firefox, needed for selenium
+ && curl --proto "=https" -L https://github.com/mozilla/geckodriver/releases/download/v0.30.0/geckodriver-v0.30.0-linux64.tar.gz | tar xz -C /usr/local/bin \
+# Install health-checker tool that allows us to run commands when checking root endpoint to check if service is available
+ && wget -q https://github.com/gruntwork-io/health-checker/releases/download/v0.0.8/health-checker_linux_amd64 -O /usr/local/bin/health-checker \
+ && chmod +x /usr/local/bin/health-checker \
+# Cleanup image and remove junk
+ && rm -fr /var/lib/apt/lists/* \
+# Remove unused packages. Keep curl for health checking in docker-compose
+ && apt-get purge -y ca-certificates wget
+
+# Create stored data dir inside image, in case it does not get mounted (such as when deploying on AWS)
+RUN mkdir /stored_data && setfacl -R -m u:nonroot:rwx /stored_data
+
+USER nonroot
+
+# Copy python virtual environment from build layer
+COPY --chown=nonroot:nonroot --chmod=544 --from=build /venv /venv
 
+# Using python virtual environment, preload selenium with firefox so that first runtime is faster.
+SHELL ["/bin/bash", "-c"]
+RUN source /venv/bin/activate && \
+    selenium-manager --browser firefox --debug
+
+# Copy source files and essential runtime files
+COPY --chown=nonroot:nonroot --chmod=444 selected_polygon.geojson .
+COPY --chown=nonroot:nonroot --chmod=644 instructions.json .
+COPY --chown=nonroot:nonroot --chmod=544 src/ src/
+
+
+FROM runtime-base AS backend
+# Image build target for backend
+# Using separate build targets for each image because the Orbica platform does not allow for modifying entrypoints
+# and using multiple dockerfiles was creating increase complexity problems keeping things in sync
 EXPOSE 5000
-ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "digitaltwin", "gunicorn", "--bind", "0.0.0.0:5000", "src.app:app"]
+
+SHELL ["/bin/bash", "-c"]
+ENTRYPOINT source /venv/bin/activate && \
+           gunicorn --bind 0.0.0.0:5000 src.app:app
+
+
+FROM runtime-base AS celery_worker
+# Image build target for celery_worker
+
+EXPOSE 5001
+
+SHELL ["/bin/bash", "-c"]
+# Activate environment and run the health-checker in background and celery worker in foreground
+ENTRYPOINT source /venv/bin/activate && \
+           health-checker --listener 0.0.0.0:5001 --log-level error --script-timeout 10 \
+             --script "celery -A src.tasks inspect ping"  & \
+           source /venv/bin/activate && \
+           celery -A src.tasks worker -P threads --loglevel=INFO
diff --git a/README.md b/README.md
@@ -1,4 +1,7 @@
 # Flood Resilience Digital Twin (FReDT)
+![image](https://github.com/GeospatialResearch/Digital-Twins/assets/41398636/b7b9da6c-3895-46f5-99dc-4094003b2946)
+
+
 ## Introduction
 
 According to the National Emergency Management Agency, flooding is the greatest hazard in New Zealand, in terms of frequency, losses and civil defence emergencies. 
@@ -13,7 +16,7 @@ The Flood Resilience Digital Twin can provide a better understanding of the degr
 Digital Twin not only represents the current status of the visualised assets but also how they will perform/react to future situations. 
 The build twin when used to run flood models combined with other sources of information can allow us to make predictions.
 
-Data is collected from an open data portal provided by multiple organisations or data providers such as LINZ, StatsNZ, opentopography, NIWA, MFE, and more.
+Data is collected from open data portals provided by multiple organisations or data providers such as LINZ, StatsNZ, opentopography, NIWA, MFE, and more.
 The collected data is stored in the application database using PostgreSQL
 
 The reason for implementing a database are:
@@ -28,61 +31,44 @@ The following list defines the basic steps required to setup and run the digital
 
 ## Requirements
 * [Docker](https://www.docker.com/)
-* [Anaconda](https://www.anaconda.com/download)
-* [Node.js / NPM](https://nodejs.org/)
 
 
 ## Required Credentials:
 Create API keys for each of these services. You may need to create an account and log in
 * [Stats NZ API Key](https://datafinder.stats.govt.nz/my/api/)
 * [LINZ API Key](https://data.linz.govt.nz/my/api/)
 * [MFE API Key](https://data.mfe.govt.nz/my/api/)
-* [NIWA Application API Key](https://developer.niwa.co.nz/) - Create an app that has the Tide API enabled  
-* [Cesium access token](https://cesium.com/ion/tokens)
+* [NIWA Application API Key](https://developer.niwa.co.nz/) - Create an app that has the Tide API enabled
 
 
 ## Starting the Digital Twin application (localhost)
-1. Set up Docker, Anaconda, and NPM to work on your system.
-
-1. Clone this repository to your local machine (may be best to avoid network drives for software development since they are much slower)
-
-1. In the project root, in an Anaconda prompt, run the following commands to initialise the environment:
-   ```bash
-   #!/usr/bin/env bash
-   conda env create -f environment.yml
-   conda activate digitaltwin
-   ```
-   _While the environment is being created, you can continue with the other steps until using the environment._
+1. Clone this repository to your local machine.
 
 1. Create a file called `.env` in the project root, copy the contents of `.env.template` and fill in all blank fields unless a comment says you can leave it blank.
+Blank fields to fill in include things like the `POSTGRES_PASSWORD` variable and `CESIUM_ACCESS_TOKEN`. You may configure other variables as needed.
 
-1. Set any file paths in `.env` if needed, for example `FLOOD_MODEL_DIR` references a Geospatial Research Institute
-   network drive, so you may need to provide your own implementation of `BG_flood` here.  
-   Multiple instances of the digital twin can point to the same directories and share the cached data to improve speed.
-    
-1. Create a file `visualisation/.env.local`. In this, fill in 
-   `VUE_APP_CESIUM_ACCESS_TOKEN=[your_token_here]`, replace `[your_token_here]` with the Cesium Access Token
+1. Configure `DATA_DIRx` variables in `.env` such that they point to real directories accessible to your file system.
+   We have these mounted on UC network drives, so we can share lidar data between FReDT instances.
+
+1. Create a file called `api_keys.env`, copy the contents of `api_keys.env.template` and fill in the blank values with API credentials.
+   
+1. Set any file paths in `.env` if needed. Multiple instances of the digital twin can point to the same directories and share the cached data to improve speed.
 
-1. From project root, run the command `docker-compose up --build -d` to run the database, backend web servers, and helper services.  
+1. From project root, run the command `docker-compose up -d` to run the database, backend web servers, and helper services.  
 **If this fails on a WindowsToastNotification error on windows, just run it again and it should work.**
 
-1. Currently, the `visualisation` and `celery_worker` services are not set up to work with Docker, so these will be set up manually.
-   1. In one terminal, with the conda environment activated, go to the project root directory and run `celery -A src.tasks worker --loglevel=INFO --pool=solo` to run the backend celery service.
-   1. In another terminal open the `visualisation` directory and run `npm ci && npm run serve` to start the development visualisation server.
-
-1. You may inspect the logs of the backend in the celery window.
+1. You may inspect the logs of the backend using `docker-compose logs -f backend celery_worker`
 
 1. You may inspect the PostgreSQL database by logging in using the credentials you stored in the `.env` file and a database client such as `psql` or pgAdmin or DBeaver or PyCharm Professional.
 
 
 ## Using the Digital Twin application
-1. With the visualisation server running, visit the address shown in the visualisation server window, default [http://localhost:8080](http://localhost:8080)
-1. To run a flood model, hold SHIFT and hold the left mouse button to drag a box around the area you wish to run the model for.
-1. Once the model has completed running, you may need to click the button at the bottom of the screen requesting you to reload the flood model.
-1. To see a graph for flood depths over time at a location, hold CTRL and click the left mouse button on the area you wish to query.
+The current application is running only in headless mode.  Meaning, the front-end website is not active. 
+To interact with the application you send calls to the REST API. Example calls are shown in api_calls.py, and they can be replicated in other http clients such as Postman.
 
 
 ## Setup for developers
+Set up environment variables as above.
 
 ### Run single Docker service e.g. database
 To run only one isolated service (services defined in `docker-compose.yml`) use the following command:
@@ -94,22 +80,23 @@ e.g. To run only the database in detached mode:
 docker-compose up --build -d db_postgres
 ```
 
-### Run Celery locally (reccomended, since BG Flood does not yet work on Docker)
+### Run Celery locally (without docker)
 With the conda environment activated run:
 ```bash
 #!/usr/bin/env bash
-celery -A src.tasks worker --loglevel=INFO --pool=solo
+celery -A src.tasks worker -P threads --loglevel=INFO
 ```
 
-### Running the backend without web interface.
+### Running the backend as a processing script instead of web interface
 It will likely be useful to run processing using the digital twin, without running the web interface.
 To do so:
 1. Run `db_postgres` and `geoserver` services in docker.
 ```bash
 #!/usr/bin/env bash
 docker-compose up --build -d db_postgres geoserver
 ```
-2. For local testing, it may be useful to use the `src.run_all.py` script to run the processing.
+2. For local testing, it may be useful to use the `src.run_all.py` script to run the processing. From the project root run
+`python -m src.run_all`
 
 
 ## Tests

diff --git a/api_calls.py b/api_calls.py
@@ -30,10 +30,10 @@ def generate_flood_model() -> str:
     # Create request data for getting flood model data from a region over Kaiapoi
     request_data = {
         "bbox": {
-            "lat1": -43.38205648955185,
-            "lng1": 172.6487081332888,
-            "lng2": 172.66,
-            "lat2": -43.40
+            "lat1": -43.370613130921434,
+            "lng1": 172.65156000179044,
+            "lng2": 172.71678302522903,
+            "lat2": -43.400136655560765
         },
         "scenarioOptions": {
             "Projected Year": 2050,
@@ -60,12 +60,12 @@ def poll_for_completion(task_id: str) -> int:
         # 5 Second delay before retrying
         time.sleep(5)
         print("Polling backend for task completion...")
-
         # Get status of a task
         task_status_response = requests.get(f"{backend_url}/tasks/{task_id}")
+        response_body = task_status_response.json()
+        print(response_body)
         task_status_response.raise_for_status()
         # Load the body JSON into a python dict
-        response_body = json.loads(task_status_response.text)
         task_status = response_body["taskStatus"]
     task_value = response_body['taskValue']
     print(f"Task completed with value {task_value}")
@@ -83,7 +83,6 @@ def get_building_statuses(model_id: int) -> GeoDataFrame:
     return GeoDataFrame.from_features(building_json["features"])
 
 
-
 def get_depths_at_point(task_id: str):
     point = {"lat": -43.39, "lng": 172.65}
     # Send a request to get the depths at a point for a flood model associated with a task
@@ -97,6 +96,18 @@ def get_depths_at_point(task_id: str):
     print(response_body)
 
 
+def fetch_new_dataset_table():
+    # Update LiDAR datasets, takes a long time.
+    print("Refreshing LiDAR OpenTopography URLs to get newest LiDAR data")
+    update_datasets_response = requests.post(f"{backend_url}/datasets/update")
+    # Check for errors (400/500 codes)
+    update_datasets_response.raise_for_status()
+    # Load the body JSON into a python dict
+    response_body = json.loads(update_datasets_response.text)
+    # Read the task id
+    return response_body["taskId"]
+
+
 def stop_task(task_id: str):
     # Send a request to stop the task
     requests.delete(f"{backend_url}/tasks/{task_id}")

diff --git a/api_keys.env.template b/api_keys.env.template
@@ -0,0 +1,5 @@
+STATSNZ_API_KEY=
+LINZ_API_KEY=
+LRIS_API_KEY=
+MFE_API_KEY=
+NIWA_API_KEY=
Original file line number	Diff line number	Diff line change
Expand Up		@@ -128,3 +128,4 @@ dmypy.json

		# Pyre type checker
		.pyre/
		/api_keys.env