Skip to content

Commit

Permalink
Master merge conflict resolve
Browse files Browse the repository at this point in the history
  • Loading branch information
ahandan-crim committed Feb 5, 2024
2 parents 16c2bb9 + cd6157b commit 899d8d4
Show file tree
Hide file tree
Showing 109 changed files with 110,003 additions and 2,457 deletions.
106 changes: 106 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# run test suites

name: Tests
on:
- pull_request
- push

jobs:
# see: https://github.com/fkirc/skip-duplicate-actions
skip_duplicate:
continue-on-error: true
runs-on: ubuntu-latest
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@master
with:
concurrent_skipping: "same_content"
skip_after_successful_duplicate: "true"
do_not_skip: '["pull_request", "workflow_dispatch", "schedule"]'

# NOTE:
# Run all the steps even if there are no tests defined for a given domain sub-directory.
# This is to make sure that the environment definition is at the very least buildable.
tests:
needs: skip_duplicate
if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.allow-failure }}
env:
CACHE_NUMBER: 0 # increment to reset cache

# ensure conda env activation is performed automatically
defaults:
run:
shell: bash -el {0}

strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
# somehow mamba with python 3.12 doesn't resolve spacy although available...
python-version: ["3.9", "3.10", "3.11"]
allow-failure: [false]
domain: ["eo", "nlp"]

steps:
- uses: actions/checkout@v2
with:
fetch-depth: "0"

- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}
miniforge-variant: Mambaforge
miniforge-version: latest
activate-environment: github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}
use-mamba: true
use-only-tar-bz2: true

- name: Set cache date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV

- uses: actions/cache@v2
id: cache
with:
path: ${{ env.CONDA }}/envs/github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}
key: conda-python${{ matrix.python-version }}-${{ matrix.domain }}-${{ hashFiles('${{ matrix.domain }}/environment.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}

- name: Display Python
run: which python

- name: Update environment
if: steps.cache.outputs.cache-hit != 'true'
run: |
echo "python=${{ matrix.python-version }}" > ${{ env.CONDA }}/envs/github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}/conda-meta/pinned
mamba env update \
-n github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }} \
-f ${{ matrix.domain }}/environment.yml
- name: Display Packages
if: ${{ matrix.python-version != 'none' }}
run: pip freeze

- name: Display Environment Variables
run: |
hash -r
env | sort
- name: Check Tests
id: check_tests
run: |
echo "HAS_TEST_DIR=$(test -d ${{ matrix.domain }}/tests && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT
- name: Install Tests Dependencies
if: ${{ steps.check_tests.outputs.HAS_TEST_DIR == 'true' }}
run: pip install -r requirements-dev.txt

- name: Run Tests
if: ${{ steps.check_tests.outputs.HAS_TEST_DIR == 'true' }}
run: |
cd ${{ matrix.domain }}/notebooks
python -m pytest -vvv ../tests
20 changes: 20 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
### IDE
**/.idea/
**/.vscode/
**/*.code-workspace

### Caches
**/__pycache__/
**/cache/
**/*tree-tagger-linux*
**/.pytest_cache
**/condaenv.*.requirements.txt

## Chroma VDB caches
**/*.bin
**/*.pickle
**/*.sqlite3

### Binaries
**/*.jar

### Notebooks
# expect examples per domain
# disallow notebooks at root
./*.ipynb
**/.ipynb_checkpoints/

### Outputs
*.log
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Included custom configs change the value of MAKEFILE_LIST
# Extract the required reference beforehand so we can use it for help target
MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
# Include custom config if it is available
-include Makefile.config

# Application
APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..)
APP_NAME := $(shell basename $(APP_ROOT))
APP_DOMAINS ?= eo nlp
DOCKER_REPO ?= crim-ca/pavics-jupyter-images

DOCKER_BUILDS := $(addprefix docker-build-, $(APP_DOMAINS))
$(DOCKER_BUILDS): docker-build-%:
docker build -t $(DOCKER_REPO)/$*:latest "$(APP_ROOT)/$(*)" 2>&1 | tee "$(APP_ROOT)/make-$@.log"

.PHONY: docker-build
docker-build: $(DOCKER_BUILDS)
1 change: 0 additions & 1 deletion eo/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ dependencies:
- intake-stac
- pyproj
- rasterio
- sat-search
- shapely

# TODO: These next packages could possibly be added to a more generic 'vision' image, from which 'eo' would be built
Expand Down
12 changes: 9 additions & 3 deletions nlp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@ Changes

Unreleased (latest)
===================
0.4.1 (2023-11-24)

- n/a

0.5.0 (2023-12-13)
===================

Changes:
--------
- Update NLU demo notebook with latest pipeline improvements and a STAC wrapper to convert NL queries to STAC requests.
- Add `duckling` installation in the Docker to allow running it as child process rather than sibling Docker service.
- Update base image version in Dockerfile
- Add `mamba` cache cleanup to reduce image size

Fixes:
------
- ...
- Fix dependencies to make them functional across multiple Python versions.

0.4.0 (2022-12-23)
===================
Expand Down Expand Up @@ -127,4 +133,4 @@ Changes:

Fixes:
------
- na
- na
41 changes: 30 additions & 11 deletions nlp/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ FROM birdhouse/pavics-jupyter-base:0.5.0

# must update conda env as root, because of a permission error when having pip dependencies in the input yml file
USER root
# install pre-requirement for compiling some dependencies
RUN apt install -y pkg-config libpcre++-dev

COPY environment.yml /environment.yml
COPY notebook_config.yml /notebook_config.yml

# update env "birdy"
# use umask 0000 so that package files for the updated environment are usable by the user for the jupyter-conda-extension
RUN umask 0000 && mamba env update -f /environment.yml \
&& mamba clean -a
RUN umask 0000 && \
mamba env update -f /environment.yml && \
mamba clean -y -a

# Set the encoding to UTF-8, this is needed for heideltime to work properly
ENV LANG=C.UTF-8
Expand All @@ -19,21 +21,27 @@ RUN python -m spacy download en_core_web_trf

# Downloading the ner-large flair model
RUN mkdir flair_models && \
curl -L -o flair_models/ner-large https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null
curl -L -o flair_models/ner-large \
https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null

# Heideltime Tree-tagger Installation
RUN mkdir -p heideltime/tree-tagger-linux-3.2.3 && cd heideltime/tree-tagger-linux-3.2.3 && \
curl -o tree-tagger-linux-3.2.3.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
curl -o tagger-scripts.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
curl -o english.par.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
curl -o install-tagger.sh https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
curl -o tree-tagger-linux-3.2.3.tar.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
curl -o tagger-scripts.tar.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
curl -o english.par.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
curl -o install-tagger.sh \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
tar -xvzf tree-tagger-linux-3.2.3.tar.gz && \
sh install-tagger.sh && \
rm tree-tagger-linux-3.2.3.tar.gz tagger-scripts.tar.gz english.par.gz install-tagger.sh

# Download and extract heideltime.standalone.jar
RUN cd heideltime && \
curl -L -o heideltime-standalone-2.2.1.tar.gz https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
RUN cd heideltime && \
curl -L -o heideltime-standalone-2.2.1.tar.gz \
https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
tar -xzvf heideltime-standalone-2.2.1.tar.gz heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar && \
mv heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar . && \
rmdir heideltime-standalone && \
Expand All @@ -42,9 +50,20 @@ RUN cd heideltime && \
# Give read&write permission to jenkins for config
RUN chown -R jenkins heideltime

# Setup Haskell for Duckling server
RUN curl -sSL https://get.haskellstack.org/ | bash && \
git clone https://github.com/facebook/duckling && \
cd duckling && \
stack build && \
stack install && \
cd .. && \
rm -fr duckling
ENV PATH="/root/.local/bin:$PATH"

# Give ownership of the conda cache folder to jenkins, to enable installing packages by the user from JupyterLab
RUN mkdir /opt/conda/pkgs/cache && chown -R 1000:1000 /opt/conda/pkgs/cache

COPY notebook_config.yml /notebook_config.yml

# specify user because of problem running start-notebook.sh when being root
USER jenkins

File renamed without changes.
33 changes: 20 additions & 13 deletions nlp/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@ channels:
- conda-forge

dependencies:
- intake-esm==2021.1.15
- intake-stac==0.3.0
- sat-search==0.3.0
- intake-esm
- intake-stac>=0.4.0
- threddsclient==0.4.2
- openjdk==8.0.152
# python-flair=0.8 only works with numpy<=1.19.5
- python-flair=0.8
- numpy<=1.19.5
- spacy==3.1.0
- python-dateutil==2.7.5
- python-levenshtein==0.12.2
- requests=2.25.1
- pip==20.3.3
- openjdk==8.0.152
- python-flair
- numpy
- pydantic<2
- python-levenshtein
- requests
- pip>=22
- pip:
- textsearch==0.0.21
- spacy==3.1.0
- osmnx
- langchain
- spacy>=3.5,<4
- spacy-transformers
- transformers<4.31
- sentence_transformers
- chromadb
- shapely
- ipywidgets
- nltk
- pystac_client
27,431 changes: 27,431 additions & 0 deletions nlp/notebooks/NLU_demo.ipynb

Large diffs are not rendered by default.

File renamed without changes.
Loading

0 comments on commit 899d8d4

Please sign in to comment.