diff --git a/.env.example b/.env.example index 30fe89a..763ba82 100644 --- a/.env.example +++ b/.env.example @@ -3,9 +3,8 @@ ANTHROPIC_API_KEY="" AZURE_OPENAI_KEY= AZURE_OPENAI_ENDPOINT= -AZURE_OPENAI_API_VERSION="2024-02-15-preview" -AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="gpt-35-turbo-16k" - +AZURE_OPENAI_API_VERSION=2024-02-01 +AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=gpt-4o TOKENIZERS_PARALLELISM=false # === Frontend === @@ -37,7 +36,7 @@ MINIO_HOST=minio MINIO_PORT=9000 MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin -BUCKET_NAME='default-bucket-3' +BUCKET_NAME='scout-bucket' # === Deployment === DOCKER_BUILDER_CONTAINER=ipa-scout diff --git a/Makefile b/Makefile index 33c29c3..e00d273 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,25 @@ export # Setting up your python environment - 3.12.2 PYTHON_VERSION=3.12.2 +PYTHON = poetry run python + +install: ## Install all dependencies including dev packages + poetry install --with dev + poetry run pre-commit install + + +setup: install ## Complete setup including pre-commit hooks and NLTK data + $(PYTHON) -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')" + brew install poppler + test -f .env || cp .env.example .env + @echo "Don't forget to update your .env file with your API keys!" + alembic-upgrade: - alembic upgrade head + poetry run alembic upgrade head alembic-revision: - alembic revision --autogenerate + poetry run alembic revision --autogenerate # Resetting system to make testing easier -------------- # Make sure you have psql installed through brew `brew install postgresql` diff --git a/README.md b/README.md index 4a8b08d..8440315 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # 🔍 IPA scout -> ⚠️ Incubation Project: This project is an incubation project; as such, we DON’T recommend using it in any critical use case. This project is in active development and a work in progress. - -Scout automatically analyses new project documents w.r.t to IPA guidance and gate workbooks, flagging potential problems as soon as possible. This tool improves and expedites the work of assurance review teams. +> ⚠️ Incubation Project: This project is an incubation project; as such, we encourage teams to evaluate its performance before applying it to their assurance process. +> +Scout automatically analyses new project documents with respect to IPA guidance and gate workbooks, flagging potential problems as soon as possible. This tool improves and expedites the work of assurance review teams. There is a pipeline that ingests project documents, criteria, then uses an LLM to evaluate projects against the criteria. The project data, criteria and evaluation results are saved in a database. There is then an app that allows users to explore this data and identify potential issues with projects. @@ -22,31 +22,11 @@ cd scout ``` Make sure you have [poetry installed](https://python-poetry.org/docs/) for dependency management. -Install packages: -``` -poetry install --with dev -``` - -Set up pre-commit for linting, checking for secrets etc: -``` -pre-commit install -``` - -Install `nltk` data: -``` -poetry run python -c "import nltk; nltk.download('punkt_tab');nltk.download('averaged_perceptron_tagger_eng');nltk.download('averaged_perceptron_tagger_eng')" -``` - -Copy the `.env` file and add your environment variables e.g. API keys: -``` -cp .env.example .env -``` - -You may need to install `poppler` (this is a reqirement for the `pdf2image` Python library): +Install dependencies and copy .env file: ``` -brew install poppler +make setup ``` -(assuming you are using a Mac with Homebrew). +This make command assumes you are using a Mac with Homebrew. Run using: ``` @@ -96,7 +76,7 @@ You may wish to use the example data in the `example_data` folder - this contain 3. Make sure Python packages are installed: `poetry install`. 4. Make sure your database, minio and libreoffice services are running: `docker compose up db minio libreoffice`. 5. In the script, change your `project_directory_name`, `gate_review`, and `llm` to reflect your project (if you are using the example data, you won't need to change anything). -6. Run the script (outside Docker): `poetry run python scripts/analyse_project.py` (this takes a few minutes with the example data). +6. Run the script (outside Docker): `poetry run python scripts/analyse_project.py` (this takes a few minutes with the example data). The first time you run this script it will download an ML model used to process documents, this makes the first run particularly slow. 7. View your results in the frontend - run the app in Docker `docker compose up` and go to http://localhost:3000. More detailed documentation can be found in `docs/analyse_projects.md`. diff --git a/scout/utils/storage/filesystem.py b/scout/utils/storage/filesystem.py index 8929d43..d972e9a 100644 --- a/scout/utils/storage/filesystem.py +++ b/scout/utils/storage/filesystem.py @@ -20,19 +20,6 @@ load_dotenv() - -class TypeAdapter: - def __init__(self, model): - self.model = model - - def validate_python(self, data): - try: - return self.model(**data) - except ValidationError as e: - print(f"Validation error: {e}") - return None - - class S3StorageHandler(BaseStorageHandler): def __init__( self, @@ -62,11 +49,11 @@ def __init__( # Create the bucket if it doesn't exist try: self.s3_client.create_bucket(Bucket=self.bucket_name) - print(f"Successfully created bucket: {self.bucket_name}") + logger.info(f"Successfully created bucket: {self.bucket_name}") except self.s3_client.exceptions.BucketAlreadyOwnedByYou: - print(f"Bucket {self.bucket_name} already exists and is owned by you.") + logger.info(f"Bucket {self.bucket_name} already exists and is owned by you.") except Exception as e: - print(f"Error creating bucket: {e}") + logger.error(f"Error creating bucket: {e}") else: # Use no authentication for production mode logger.info("Connecting to S3...")