diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml
index 5cddd078..e635dd29 100644
--- a/.github/workflows/basic-tests.yml
+++ b/.github/workflows/basic-tests.yml
@@ -1,6 +1,6 @@
-name: 'Basic (Unit) Tests'
+# Install python and node. Run lint and unit tests.
-# **What it does**: Setups up python dependencies and runs tests.
+name: 'Basic (Unit) Tests'
# **Why we have it**: Automatically run tests to ensure code doesn't introduce regressions.
# **Who does it impact**: Python small-scale "unit" tests.
diff --git a/.github/workflows/external-pr-open.yml b/.github/workflows/external-pr-open.yml
index 5dea2f58..d3b76d31 100644
--- a/.github/workflows/external-pr-open.yml
+++ b/.github/workflows/external-pr-open.yml
@@ -1,7 +1,6 @@
# Ambuda workflow to build and publish docker image
-name: Build and test incoming
-
+name: Basic image build and test
env:
AMBUDA_VERSION: v0.1
AMBUDA_HOST_IP: 127.0.0.1
@@ -19,7 +18,6 @@ jobs:
build_and_test:
name: Build external pr image
runs-on: ubuntu-22.04
- environment: staging
permissions:
packages: write
contents: read
@@ -38,7 +36,7 @@ jobs:
- name: Build and Test docker image
id: build-to-test
- uses: docker/build-push-action@v3.2.0
+ uses: docker/build-push-action@v4.0.0
with:
context: .
file: build/containers/Dockerfile.final
diff --git a/.github/workflows/rel-pr-create.yml b/.github/workflows/rel-pr-create.yml
index 0b90ac38..9947a14d 100644
--- a/.github/workflows/rel-pr-create.yml
+++ b/.github/workflows/rel-pr-create.yml
@@ -1,14 +1,13 @@
-# Ambuda workflow to build and publish docker image
-
-name: Create Release PR on seeing new code
+# Check for changes in "main". Push changes to "releases".
+name: Push changes from main to releases
env:
GH_TOKEN: ${{ github.token }}
# head/branch where current changes reside befor merge
- PR_SOURCE_BRANCH: development
+ PR_SOURCE_BRANCH: main
# base - branch intended to change once the proposed changes are meged.
- PR_TARGET_BRANCH: releases
+ PR_TARGET_BRANCH: release
on:
workflow_dispatch:
@@ -28,16 +27,16 @@ jobs:
- uses: actions/checkout@v3
with:
ref: ${{ env.PR_TARGET_BRANCH }}
- - name: Find the recent PR merge on development
+ - name: Find the recent PR merge on ${{ env.PR_SOURCE_BRANCH }}
id: find_pr
run: |
- LAST_RUNTIME=$(date +'%Y-%m-%dT%H:%M:%S' --date '-30000 min')
+ LAST_RUNTIME=$(date +'%Y-%m-%dT%H:%M:%S' --date '-1800 min')
gh repo set-default ambuda-org/ambuda
echo "PR_NUMBER=$(gh pr list --state merged --base ${{ env.PR_SOURCE_BRANCH }} --search "merged:>$LAST_RUNTIME" -L 1 --json number| jq '.[].number')" >> $GITHUB_OUTPUT
create_pr:
runs-on: ubuntu-22.04
- name: Create PR on releases branch
+ name: Create PR on release branch
environment: staging
permissions:
packages: write
@@ -61,4 +60,4 @@ jobs:
base: ${{ env.PR_TARGET_BRANCH }}
branch: ${{ env.PR_TARGET_BRANCH }}-${{ env.PR_NUMBER }}
title: PR-${{ env.PR_NUMBER }} - merge
- body: development/PR-${{ env.PR_NUMBER }} merge is open
+ body: ${{ env.PR_SOURCE_BRANCH }}/PR-${{ env.PR_NUMBER }} merge is open
diff --git a/.github/workflows/rel-pr-merged.yml b/.github/workflows/rel-pr-merged.yml
index e634645a..af3ca7cf 100644
--- a/.github/workflows/rel-pr-merged.yml
+++ b/.github/workflows/rel-pr-merged.yml
@@ -1,7 +1,6 @@
-# Ambuda workflow to build and publish docker image
-
-name: Teardown staging
+# Cleanup staging. Usually occurs after a pr is merged or closed.
+name: Teardown staging deployment
env:
AMBUDA_VERSION: v0.1
@@ -13,7 +12,7 @@ on:
# - 'v*'
pull_request:
branches:
- - 'releases'
+ - 'release'
types:
- closed
diff --git a/.github/workflows/rel-pr-open.yml b/.github/workflows/rel-pr-open.yml
index 2888db20..1b9eb173 100644
--- a/.github/workflows/rel-pr-open.yml
+++ b/.github/workflows/rel-pr-open.yml
@@ -1,7 +1,6 @@
-# Ambuda workflow to build and publish docker image
-
-name: Build publish and staging
+# Build release image. Push image to ghcr.io. Deploy on staging environment.
+name: Release image build & publish
env:
AMBUDA_VERSION: v0.1
AMBUDA_HOST_IP: 127.0.0.1
@@ -9,9 +8,10 @@ env:
REGISTRY: ghcr.io
on:
+ workflow_dispatch:
pull_request:
branches:
- - 'releases'
+ - 'release'
types: [opened, reopened, synchronize]
jobs:
@@ -60,7 +60,7 @@ jobs:
- name: Build and push Docker images
id: publish
- uses: docker/build-push-action@v3.2.0
+ uses: docker/build-push-action@v4.0.0
with:
context: .
file: build/containers/Dockerfile.final
diff --git a/.gitignore b/.gitignore
index 7700e201..0cc3a8a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
# Large data files, including texts, parse data, and (in local dev)
# image uploads.
data/
+deploy/data_database
+deploy/data_files
# Autogenerated files (compiled CSS, testing, documentation, ...)
.coverage
diff --git a/Makefile b/Makefile
index 4bb2e287..ede40895 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,34 @@
+# Environment. Valid values are: local, staging, and prod
+AMBUDA_DEPLOYMENT_ENV=local
+AMBUDA_HOST_IP=0.0.0.0
+AMBUDA_HOST_PORT=5000
+
+# Control the verbosity of messages using a flag
+ifdef mode
+ ifeq ("$(origin mode)", "command line")
+ BUILD_MODE = $(mode)
+ endif
+else
+ BUILD_MODE = default
+endif
+
+ifdef ($(BUILD_MODE),dev)
+ IO_REDIRECT =
+ DOCKER_VERBOSITY =
+ DOCKER_LOG_LEVEL =
+ DOCKER_DETACH =
+else ifeq ($(BUILD_MODE),quiet)
+ IO_REDIRECT = &> /dev/null
+ DOCKER_VERBOSITY = -qq
+ DOCKER_LOG_LEVEL = --log-level ERROR
+ DOCKER_DETACH = --detach
+else ifeq ($(BUILD_MODE),default)
+ IO_REDIRECT =
+ DOCKER_VERBOSITY =
+ DOCKER_LOG_LEVEsL =
+ DOCKER_DETACH = --detach
+endif
+
# Needed because we have folders called "docs" and "test" that confuse `make`.
.PHONY: docs test py-venv-check clean
@@ -11,11 +42,6 @@ AMBUDA_NAME=ambuda
AMBUDA_IMAGE=${AMBUDA_NAME}:${AMBUDA_VERSION}-${GITBRANCH}-${GITCOMMIT}
AMBUDA_IMAGE_LATEST="$(AMBUDA_NAME)-rel:latest"
-# Environment. Valid values are: local, staging, and prod
-AMBUDA_DEPLOYMENT_ENV=local
-AMBUDA_HOST_IP=0.0.0.0
-AMBUDA_HOST_PORT=5090
-
py-venv-check:
ifeq ("$(VIRTUAL_ENV)","")
@echo "Error! Python venv not activated. Activate venv to proceed. Run: "
@@ -96,16 +122,25 @@ db-seed-all: py-venv-check
python -m ambuda.seed.dictionaries.shabdasagara
python -m ambuda.seed.dictionaries.vacaspatyam
-
-# Common development commands
+# Local run commands
# ===============================================
+.PHONY: devserver celery
+devserver:
+ make mode=dev docker-start
+
+# Run a local Celery instance for background tasks.
+celery:
+ celery -A ambuda.tasks worker --loglevel=INFO
+# Docker commands
+# ===============================================
+.PHONY: docker-setup-db docker-build docker-start docker-stop docker-logs
# Start DB using Docker.
docker-setup-db: docker-build
ifneq ("$(wildcard $(DB_FILE))","")
@echo "Ambuda using your existing database!"
else
- @docker --log-level ERROR compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose-dbsetup.yml up &> /dev/null
+ @docker ${DOCKER_LOG_LEVEL} compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose-dbsetup.yml up ${IO_REDIRECT}
@echo "Ambuda Database : ✔ "
endif
@@ -114,12 +149,12 @@ endif
docker-build:
@echo "> Ambuda build is in progress. Expect it to take 2-5 minutes."
@printf "%0.s-" {1..21} && echo
- @docker build -q -t ${AMBUDA_IMAGE} -t ${AMBUDA_IMAGE_LATEST} -f build/containers/Dockerfile.final ${PWD} > /dev/null
+ @docker build ${DOCKER_VEBOSITY} -t ${AMBUDA_IMAGE} -t ${AMBUDA_IMAGE_LATEST} -f build/containers/Dockerfile.final ${PWD} ${IO_REDIRECT}
@echo "Ambuda Image : ✔ (${AMBUDA_IMAGE}, ${AMBUDA_IMAGE_LATEST})"
# Start Docker services.
docker-start: docker-build docker-setup-db
- @docker --log-level ERROR compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml up --detach &> /dev/null
+ @docker ${DOCKER_LOG_LEVEL} compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml up ${DOCKER_DETACH} ${IO_REDIRECT}
@echo "Ambuda WebApp : ✔ "
@echo "Ambuda URL : http://${AMBUDA_HOST_IP}:${AMBUDA_HOST_PORT}"
@printf "%0.s-" {1..21} && echo
@@ -127,18 +162,18 @@ docker-start: docker-build docker-setup-db
# Stop docker services
docker-stop:
- @docker --log-level ERROR compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml stop
- @docker --log-level ERROR compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml rm
+ @docker ${DOCKER_LOG_LEVEL} compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml stop
+ @docker ${DOCKER_LOG_LEVEL} compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml rm
@echo "Ambuda URL stopped"
# Show docker logs
docker-logs:
@docker compose -p ambuda-${AMBUDA_DEPLOYMENT_ENV} -f deploy/${AMBUDA_DEPLOYMENT_ENV}/docker-compose.yml logs
-# Run a local Celery instance for background tasks.
-celery:
- celery -A ambuda.tasks worker --loglevel=INFO
+
+# Lint commands
+# ===============================================
# Check imports in Python code
lint-isort:
@echo "Running Python isort to organize module imports"
@@ -163,6 +198,8 @@ lint-check: js-lint py-lint
black . --diff
@echo 'Lint completed'
+# Test, coverage and documentation commands
+# ===============================================
# Run all Python unit tests.
test: py-venv-check
pytest .
diff --git a/ambuda/seed/lookup/__init__.py b/ambuda/seed/lookup/__init__.py
index 3050d1a0..f631330a 100644
--- a/ambuda/seed/lookup/__init__.py
+++ b/ambuda/seed/lookup/__init__.py
@@ -11,6 +11,7 @@ def run():
create_bot_user.run()
except Exception as ex:
raise Exception(
- "Error: Failed to create page statuses, "
+ "Error: Failed to create page statuses, "
"create roles, and creat bot user."
- f"Error: {ex}")
+ f"Error: {ex}"
+ )
diff --git a/ambuda/seed/texts/gretil.py b/ambuda/seed/texts/gretil.py
index 1b67244e..f55ba4cd 100644
--- a/ambuda/seed/texts/gretil.py
+++ b/ambuda/seed/texts/gretil.py
@@ -116,11 +116,10 @@ def run():
for spec in ALLOW:
add_document(engine, spec)
except Exception as ex:
- raise Exception(
- "Error: Failed to get latest from GRETIL. "
- f"Error: {ex}")
+ raise Exception("Error: Failed to get latest from GRETIL. " f"Error: {ex}")
log("Done.")
+
if __name__ == "__main__":
run()
diff --git a/ambuda/templates/about/people.html b/ambuda/templates/about/people.html
index 387dc400..53723fa9 100644
--- a/ambuda/templates/about/people.html
+++ b/ambuda/templates/about/people.html
@@ -57,7 +57,7 @@
{{ _('People') }}
Ashwin has worked on a variety of projects around open source software
development, digital humanities, cloud infrastructure and architecture, and
cybersecurity. His interests in Sanskrit include Vedic texts and Vedantic
-commentaries. Ashwin holds a B.S. in Computer Science in Stanford University
+commentaries. Ashwin holds a B.S. in Computer Science from Stanford University
and is currently pursuing a J.D. degree at Georgetown University Law Center,
where he works on technology law and policy.
{% endtrans %}
diff --git a/ambuda/templates/proofing/projects/edit.html b/ambuda/templates/proofing/projects/edit.html
index 71fbe0b8..988a18b7 100644
--- a/ambuda/templates/proofing/projects/edit.html
+++ b/ambuda/templates/proofing/projects/edit.html
@@ -15,12 +15,14 @@
{{ m.project_nav(project=project, active='edit') }}
{% set search_url = url_for("proofing.project.search", slug=project.slug) %}
+{% set replace_url = url_for("proofing.project.replace", slug=project.slug) %}
{% set ocr_url = url_for("proofing.project.batch_ocr", slug=project.slug) %}
diff --git a/ambuda/templates/proofing/projects/replace.html b/ambuda/templates/proofing/projects/replace.html
new file mode 100644
index 00000000..3bbb8f98
--- /dev/null
+++ b/ambuda/templates/proofing/projects/replace.html
@@ -0,0 +1,47 @@
+{% extends 'proofing/base.html' %}
+{% from "macros/forms.html" import field %}
+{% import "macros/proofing.html" as m %}
+
+{% block title %}Search and Replace {{ project.title }} | Ambuda{% endblock %}
+
+{% block content %}
+
+{{ m.project_header_nested('Search and Replace', project) }}
+{{ m.project_nav(project=project, active='edit') }}
+
+
+
Use this simple search and replace form to make edits across this project.
+
+
+
+
+{% if query %}
+
+
+{% macro sp(s, p, n) %}{% if n == 1 %}{{ s }}{% else %}{{ p }}{% endif %}{% endmacro %}
+
+{% set nr = results|length %}
+
Found {{ nr }} {{ sp("page", "pages", nr) }} that {{ sp("contains", "contain", nr) }} {{ query }}.
+
+
+{% for page in results %}
+{% set page_url = url_for("proofing.page.edit", project_slug=project.slug, page_slug=page.slug) %}
+-
+ {{ project.title }}/{{ page.slug }}
+
+ {% for match in page.matches %}
+
{{ match.query }}
+
{{ match.update }}
+ {%- endfor %}
+
+
+{% endfor %}
+
+
+
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/ambuda/views/auth.py b/ambuda/views/auth.py
index 47d5c5dd..7de9a360 100644
--- a/ambuda/views/auth.py
+++ b/ambuda/views/auth.py
@@ -5,12 +5,17 @@
https://www.uxmatters.com/mt/archives/2018/09/signon-signoff-and-registration.php
Security reference:
-
- https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html
- https://cheatsheetseries.owasp.org/cheatsheets/Forgot_Password_Cheat_Sheet.html
+
+Max lengths:
+- https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html suggests 64 characters for password
+- https://www.rfc-editor.org/errata_search.php?rfc=3696 specifies 254 characters for email address
+
"""
import secrets
+import sys
from datetime import datetime, timedelta
from typing import Optional
@@ -27,7 +32,15 @@
bp = Blueprint("auth", __name__)
+# maximum lengths of authentication fields
+MIN_EMAIL_ADDRESS_LEN = 4
+MAX_EMAIL_ADDRESS_LEN = 254
+MIN_PASSWORD_LEN = 8
+MAX_PASSWORD_LEN = 256
+MIN_USERNAME_LEN = 6
+MAX_USERNAME_LEN = 64
+# token lifetime
MAX_TOKEN_LIFESPAN_IN_HOURS = 24
# FIXME: redirect to site.index once user accounts are more useful.
POST_AUTH_ROUTE = "proofing.index"
@@ -79,51 +92,107 @@ def _is_valid_reset_token(row: db.PasswordResetToken, raw_token: str, now=None):
return True
-class SignupForm(FlaskForm):
- username = StringField(
- _l("Username"), [val.Length(min=6, max=25), val.DataRequired()]
+# The native val.Length() validator silently snips username and
+# password to the maximum length.
+# So, database stores the snipped username and password resulting
+# in information loss. For instance, user may have copy pasted 240
+# chars our form will only store MAX_##_LEN bytes of it to the db. Creating
+# our own validator will show a clear error that username cannot
+# exceed MAX_##_LEN bytes. I'm not sure if it is a bug or a feature
+# in the val.Length()
+# Copied from https://wtforms.readthedocs.io/en/2.3.x/validators/
+class FieldLength(object):
+ def __init__(self, min=None, max=None, message=None):
+ self.min = min or 0
+ self.max = max or sys.maxsize
+ if not message:
+ message = f"Field must be between {min} and {max} characters long."
+ self.message = message
+
+ def __call__(self, form, field):
+ field_len = field.data and len(field.data or [])
+ if not (self.min <= field_len <= self.max):
+ raise val.ValidationError(self.message)
+
+
+def get_field_validators(field_name: str, min_len: int, max_len: int):
+ field_name_capitalized = field_name.capitalize()
+ return [
+ val.DataRequired(),
+ FieldLength(
+ min=min_len,
+ max=max_len,
+ message=f"{field_name_capitalized} must be between {min_len} and {max_len} characters long",
+ ),
+ ]
+
+
+def get_username_validators():
+ validators = get_field_validators("username", MIN_USERNAME_LEN, MAX_USERNAME_LEN)
+ validators.append(
+ val.Regexp("^[^\s]*$", message="Username must not contain spaces")
)
- password = PasswordField(_l("Password"), [val.Length(min=8), val.DataRequired()])
- email = StringField(_l("Email address"), [val.DataRequired(), val.Email()])
+ return validators
+
+
+def get_legacy_username_validators():
+ return get_field_validators("username", MIN_USERNAME_LEN, MAX_USERNAME_LEN)
+
+
+def get_password_validators():
+ return get_field_validators("password", MIN_PASSWORD_LEN, MAX_PASSWORD_LEN)
+
+
+def get_email_validators():
+ validators = get_field_validators(
+ "email", MIN_EMAIL_ADDRESS_LEN, MAX_EMAIL_ADDRESS_LEN
+ )
+ validators.append(val.Email())
+ return validators
+
+
+class SignupForm(FlaskForm):
+ username = StringField(_l("Username"), get_username_validators())
+ password = PasswordField(_l("Password"), get_password_validators())
+ email = EmailField(_l("Email address"), get_email_validators())
recaptcha = RecaptchaField()
def validate_username(self, username):
+ # TODO: make username case insensitive
user = q.user(username.data)
if user:
raise val.ValidationError("Please use a different username.")
def validate_email(self, email):
session = q.get_session()
+ # TODO: make email case insensitive
user = session.query(db.User).filter_by(email=email.data).first()
if user:
raise val.ValidationError("Please use a different email address.")
class SignInForm(FlaskForm):
- username = StringField(
- _l("Username"), [val.Length(min=6, max=25), val.DataRequired()]
- )
- password = PasswordField(_l("Password"), [val.Length(min=8), val.DataRequired()])
+ username = StringField(_l("Username"), get_legacy_username_validators())
+ password = PasswordField(_l("Password"), get_password_validators())
class ResetPasswordForm(FlaskForm):
- email = EmailField("Email", [val.DataRequired()])
+ email = EmailField(_l("Email address"), get_email_validators())
recaptcha = RecaptchaField()
class ChangePasswordForm(FlaskForm):
#: Old password. No validation requirements, in case we change our password
#: criteria in the future.
- old_password = PasswordField(_l("Old password"), [val.DataRequired()])
+ old_password = PasswordField(_l("Old Password"), get_password_validators())
+
#: New password.
- new_password = PasswordField(
- _l("New password"), [val.Length(min=8), val.DataRequired()]
- )
+ new_password = PasswordField(_l("New password"), get_password_validators())
class ResetPasswordFromTokenForm(FlaskForm):
- password = PasswordField(_l("Password"), [val.DataRequired()])
- confirm_password = PasswordField(_l("Confirm password"), [val.DataRequired()])
+ password = PasswordField(_l("Password"), get_password_validators())
+ confirm_password = PasswordField(_l("Confirm password"), get_password_validators())
@bp.route("/register", methods=["GET", "POST"])
@@ -133,6 +202,7 @@ def register():
return redirect(url_for("site.index"))
form = SignupForm()
+ # save username and email in lowercase
if form.validate_on_submit():
user = q.create_user(
username=form.username.data,
@@ -157,6 +227,7 @@ def sign_in():
return redirect(url_for("site.index"))
form = SignInForm()
+ # TODO: make username case insensitive
if form.validate_on_submit():
user = q.user(form.username.data)
if user and user.check_password(form.password.data):
diff --git a/ambuda/views/proofing/project.py b/ambuda/views/proofing/project.py
index 6e7f554b..9a375790 100644
--- a/ambuda/views/proofing/project.py
+++ b/ambuda/views/proofing/project.py
@@ -96,6 +96,13 @@ class DeleteProjectForm(FlaskForm):
slug = StringField("Slug", validators=[DataRequired()])
+class ReplaceForm(SearchForm):
+ class Meta:
+ csrf = False
+
+ replace = StringField(_l("Replace"), validators=[DataRequired()])
+
+
@bp.route("/
/")
def summary(slug):
"""Show basic information about the project."""
@@ -279,6 +286,64 @@ def search(slug):
)
+@bp.route("//replace")
+@login_required
+def replace(slug):
+ """Search and replace a string across all of the project's pages.
+
+ This is useful to replace a string across the project in one shot.
+ """
+ project_ = q.project(slug)
+ if project_ is None:
+ abort(404)
+
+ form = ReplaceForm(request.args)
+ if not form.validate():
+ return render_template(
+ "proofing/projects/replace.html", project=project_, form=form
+ )
+
+ # search for "query" string and replace with "update" string
+ query = form.query.data
+ update = form.replace.data
+
+ results = []
+ for page_ in project_.pages:
+ if not page_.revisions:
+ continue
+
+ matches = []
+
+ latest = page_.revisions[-1]
+ for line in latest.content.splitlines():
+ if query in line:
+ matches.append(
+ {
+ "query": escape(line).replace(
+ query, Markup(f"{escape(query)}")
+ ),
+ "update": escape(line).replace(
+ query, Markup(f"{escape(update)}")
+ ),
+ }
+ )
+ if matches:
+ results.append(
+ {
+ "slug": page_.slug,
+ "matches": matches,
+ }
+ )
+ return render_template(
+ "proofing/projects/replace.html",
+ project=project_,
+ form=form,
+ query=query,
+ update=update,
+ results=results,
+ )
+
+
@bp.route("//batch-ocr", methods=["GET", "POST"])
@p2_required
def batch_ocr(slug):
diff --git a/build/containers/Dockerfile.final b/build/containers/Dockerfile.final
index 7fc6eaee..9f895069 100755
--- a/build/containers/Dockerfile.final
+++ b/build/containers/Dockerfile.final
@@ -113,6 +113,7 @@ COPY wsgi.py /app/
COPY alembic.ini /app/
COPY migrations/ /app/migrations/
COPY scripts/ /app/scripts/
+COPY cli.py /app/scripts/
RUN echo "************* Ambuda application build succeeded! ****************"
# ####################################
@@ -127,5 +128,5 @@ WORKDIR /app
RUN echo "************* Building FINAL ${BUILD_TYPE}*****************"
COPY --from=build-ambuda /venv /venv
COPY --from=build-ambuda /app /app
-CMD ["/app/scripts/start-server.sh"]
+CMD ["/app/scripts/start_server.sh"]
RUN echo "************* Ambuda deploy succeeded! *****************"
diff --git a/config.py b/config.py
index e0018011..bc8bf057 100644
--- a/config.py
+++ b/config.py
@@ -30,6 +30,10 @@
TESTING = "testing"
#: The development environment. For local development.
DEVELOPMENT = "development"
+#: The build environment. For build on github.
+BUILD = "build"
+#: The staging environment. For testing on staging.
+STAGING = "staging"
#: The production environment. For production serving.
PRODUCTION = "production"
@@ -180,6 +184,32 @@ class DevelopmentConfig(BaseConfig):
LOG_LEVEL = logging.INFO
+class BuildConfig(BaseConfig):
+ """For build on GitHub."""
+
+ AMBUDA_ENVIRONMENT = BUILD
+ DEBUG = True
+ #: If set, automatically reload Flask templates (including imports) when
+ #: they change on disk.
+ TEMPLATES_AUTO_RELOAD = False
+
+ #: Logger setup
+ LOG_LEVEL = logging.INFO
+
+
+class StagingConfig(BaseConfig):
+ """For staging."""
+
+ AMBUDA_ENVIRONMENT = STAGING
+ DEBUG = True
+ #: If set, automatically reload Flask templates (including imports) when
+ #: they change on disk.
+ TEMPLATES_AUTO_RELOAD = False
+
+ #: Logger setup
+ LOG_LEVEL = logging.INFO
+
+
class ProductionConfig(BaseConfig):
"""For production."""
@@ -204,7 +234,13 @@ def _validate_config(config: BaseConfig):
:param config: the config to test
"""
- assert config.AMBUDA_ENVIRONMENT in {TESTING, DEVELOPMENT, PRODUCTION}
+ assert config.AMBUDA_ENVIRONMENT in {
+ TESTING,
+ DEVELOPMENT,
+ BUILD,
+ STAGING,
+ PRODUCTION,
+ }
if not config.SQLALCHEMY_DATABASE_URI:
raise ValueError("This config does not define SQLALCHEMY_DATABASE_URI")
@@ -239,6 +275,8 @@ def load_config_object(name: str):
config_map = {
TESTING: UnitTestConfig,
DEVELOPMENT: DevelopmentConfig,
+ BUILD: BuildConfig,
+ STAGING: StagingConfig,
PRODUCTION: ProductionConfig,
}
config = config_map[name]
diff --git a/deploy/local/docker-compose-dbsetup.yml b/deploy/local/docker-compose-dbsetup.yml
index 3181a150..6ff8a272 100644
--- a/deploy/local/docker-compose-dbsetup.yml
+++ b/deploy/local/docker-compose-dbsetup.yml
@@ -3,9 +3,10 @@ version: '3.4'
services:
dbsetup:
image: ${AMBUDA_IMAGE}
- command: "/app/scripts/start-database.sh"
+ command: "/app/scripts/initialize_data.sh"
volumes:
- ${PWD}/deploy/data_database/:/app/data/database/
+ - ${PWD}/deploy/data_files/vidyut:/app/data/vidyut/
environment:
- FLASK_ENV=development
- FLASK_UPLOAD_FOLDER=/app/data/file-uploads
@@ -15,5 +16,6 @@ services:
- REDIS_URL=redis://redis:6579/0
- VIRTUAL_ENV=.
- AMBUDA_BOT_PASSWORD=insecure bot password
+ - VIDYUT_DATA_DIR=/app/data/vidyut/
- PATH=$PATH:/venv/bin/
-
\ No newline at end of file
+
diff --git a/deploy/local/docker-compose.yml b/deploy/local/docker-compose.yml
index 207da6be..04d25cba 100644
--- a/deploy/local/docker-compose.yml
+++ b/deploy/local/docker-compose.yml
@@ -5,7 +5,10 @@ services:
image: ${AMBUDA_IMAGE}
volumes:
- ${PWD}/deploy/data_database/:/app/data/database/
- - ${PWD}/deploy/data_files/:/app/data/file-uploads/
+ - ${PWD}/deploy/data_files/uploads:/app/data/file-uploads/
+ - ${PWD}/deploy/data_files/vidyut:/app/data/vidyut/
+ - ${PWD}/ambuda/static/:/app/ambuda/static/
+
ports:
- target: 5000
host_ip: ${AMBUDA_HOST_IP}
@@ -21,11 +24,13 @@ services:
- REDIS_URL=redis://redis:6579/0
- VIRTUAL_ENV=.
- AMBUDA_BOT_PASSWORD=insecure bot password
+ - VIDYUT_DATA_DIR=/app/data/vidyut/
- PATH=$PATH:/venv/bin/
+
celery:
image: ${AMBUDA_IMAGE}
- command: /app/scripts/start-celery.sh
+ command: /app/scripts/start_celery.sh
environment:
- REDIS_URL=redis://redis:6579/0
depends_on:
diff --git a/deploy/staging/docker-compose-dbsetup.yml b/deploy/staging/docker-compose-dbsetup.yml
index 6c4f981a..5a72e58a 100644
--- a/deploy/staging/docker-compose-dbsetup.yml
+++ b/deploy/staging/docker-compose-dbsetup.yml
@@ -3,9 +3,10 @@ version: '3.4'
services:
dbsetup:
image: ${AMBUDA_IMAGE}
- command: "/app/scripts/start-database.sh"
+ command: "/app/scripts/initialize_data.sh"
volumes:
- ${HOME}/deploy/data_database/:/app/data/database/
+ - ${HOME}/deploy/data_files/vidyut:/app/data/vidyut/
environment:
- FLASK_ENV=development
- FLASK_UPLOAD_FOLDER=/app/data/file-uploads
@@ -15,4 +16,6 @@ services:
- REDIS_URL=redis://redis:6579/0
- VIRTUAL_ENV=.
- AMBUDA_BOT_PASSWORD=insecure bot password
+ - VIDYUT_DATA_DIR=/app/data/vidyut/
+ - PATH=$PATH:/venv/bin/
\ No newline at end of file
diff --git a/deploy/staging/docker-compose.yml b/deploy/staging/docker-compose.yml
index 7fd34d0f..d8afb583 100644
--- a/deploy/staging/docker-compose.yml
+++ b/deploy/staging/docker-compose.yml
@@ -1,11 +1,13 @@
version: '3.4'
services:
- app:
+ ambuda-web:
image: ${AMBUDA_IMAGE}
volumes:
- ${HOME}/deploy/data_database/:/app/data/database/
- - ${HOME}/deploy/data_files/:/app/data/file-uploads
+ - ${HOME}/deploy/data_files/uploads:/app/data/file-uploads/
+ - ${HOME}/deploy/data_files/vidyut:/app/data/vidyut/
+
ports:
- target: 5000
host_ip: ${AMBUDA_HOST_IP}
@@ -13,7 +15,7 @@ services:
protocol: tcp
mode: host
environment:
- - FLASK_ENV=development
+ - FLASK_ENV=staging
- FLASK_UPLOAD_FOLDER=/app/data/file-uploads
- SQLALCHEMY_DATABASE_URI=sqlite:////app/data/database/database.db
- SECRET_KEY=insecure development secret key
@@ -21,15 +23,17 @@ services:
- REDIS_URL=redis://redis:6579/0
- VIRTUAL_ENV=.
- AMBUDA_BOT_PASSWORD=insecure bot password
-
+ - VIDYUT_DATA_DIR=/app/data/vidyut/
+ - PATH=$PATH:/venv/bin/
+
celery:
image: ${AMBUDA_IMAGE}
- command: /app/scripts/start-celery.sh
+ command: /app/scripts/start_celery.sh
environment:
- REDIS_URL=redis://redis:6579/0
depends_on:
- redis
-
+
redis:
image: redis:7.0.4
command: --port 6579
diff --git a/docs/creating-data-from-the-command-line.rst b/docs/creating-data-from-the-command-line.rst
index 7e345c29..97fb4341 100644
--- a/docs/creating-data-from-the-command-line.rst
+++ b/docs/creating-data-from-the-command-line.rst
@@ -10,7 +10,7 @@ Create a new user::
Make that user an administrator::
- ./cli.py add-role admin
+ ./cli.py add-role --username --role admin
Create a fake proofing project::
diff --git a/docs/installation.rst b/docs/installation.rst
index da395ee1..5b956767 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -79,7 +79,7 @@ Docker setup (beta)
This feature is still under development and may change. You can alternatively
run a local development environment using Docker by running:
- make start-docker
+ make docker-start
Data dependencies
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index c14c0c6e..84fdc832 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -15,7 +15,7 @@ following command::
Next, run the following commands to create a new admin user::
./cli.py create-user
- ./cli.py add-role admin
+ ./cli.py add-role --username --role admin
After that, you can bring up the development server::
diff --git a/scripts/initialize_data.sh b/scripts/initialize_data.sh
new file mode 100755
index 00000000..a0d3cf1a
--- /dev/null
+++ b/scripts/initialize_data.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+# Build database with dictionaries, texts and other data. Initilalize Vidyut data.
+
+set -e
+
+. /venv/bin/activate
+
+export PATH=$PATH:/venv/bin/
+export PYTHONPATH=$PYTHONPATH:/app
+
+
+function init_vidyut_data ()
+{
+ VIDYUT_DATA_URL="https://github.com/ambuda-org/vidyut-py/releases/download/0.2.0/data-0.2.0.zip"
+
+ if [ -z "${VIDYUT_DATA_DIR}" ];
+ then
+ echo "Error! VIDYUT_DATA_DIR is not set. Please set environment variable VIDYUT_DATA_DIR"
+ return 1
+ fi
+
+ if [ -z "${VIDYUT_DATA_URL}" ];
+ then
+ echo "Error! URL to fetch Vidyut data is not set. Please set environment variable VIDYUT_DATA_URL"
+ return 1
+ fi
+
+
+ VIDYUT_MARKER="${VIDYUT_DATA_DIR}/vidyut_is_here"
+ if [ -f $VIDYUT_MARKER ];
+ then
+ # TODO: calculate SHA256 of installed files and compare
+ echo "Vidyut data found!"
+ return 0
+ fi
+
+ echo "Fetching Vidyut data from ${VIDYUT_DATA_URL} to ${VIDYUT_DATA_DIR}."
+ mkdir -p $VIDYUT_DATA_DIR
+
+ VIDYUT_DATA_FILE=$(basename -- "$VIDYUT_DATA_URL")
+ VIDYUT_DATA_FILENAME_BASE="${VIDYUT_DATA_FILE%.*}"
+
+ wget -P ${VIDYUT_DATA_DIR} ${VIDYUT_DATA_URL} -q
+ unzip -d ${VIDYUT_DATA_DIR} -j ${VIDYUT_DATA_DIR}/${VIDYUT_DATA_FILE}
+ if [ $? -ne 0 ]; then
+ echo "Error! Failed to fetch from ${VIDYUT_DATA_URL}"
+ return 1
+ fi
+
+ # Successfully installed. Leave a mark.
+ touch $VIDYUT_MARKER
+
+ return 0
+}
+
+# Git required only for this run. Keeping git here to curb docker image size.
+apt-get -qq update && apt-get -qq install -y git wget unzip > /dev/null
+
+# Initialize SQLite database
+python scripts/initialize_database.py && echo "Database set up complete." || exit 1
+
+# Initialize Vidyut data
+init_vidyut_data && echo "Vidyut data initialization completed" || echo "Error pulling from Vidyut. Fetch vidyut data later."
+
+exit 0
\ No newline at end of file
diff --git a/scripts/setup_database.py b/scripts/initialize_database.py
similarity index 51%
rename from scripts/setup_database.py
rename to scripts/initialize_database.py
index 9668fcd1..38bb2874 100755
--- a/scripts/setup_database.py
+++ b/scripts/initialize_database.py
@@ -1,13 +1,12 @@
#! /usr/bin/python3
-"""Initializes the development database by creating all tables.
+"""Initializes the database by creating all tables.
-This module is used in `scripts/initialize_from_scratch.sh`.
+This module is called from `scripts/initialize_data.sh`.
-TODO: what are the implications of running `create_all` on app startup?
"""
import subprocess
-from os.path import exists as file_exists
+from pathlib import Path
from dotenv import load_dotenv
from sqlalchemy import create_engine
@@ -24,8 +23,9 @@
def get_sqlalchemy_uri():
- """parse sql alchemy db uri from config file """
+ """parse sql alchemy db uri from config file"""
+ # TODO: don't hard code to dev.
conf = config.load_config_object("development")
sql_uri = conf.SQLALCHEMY_DATABASE_URI
return sql_uri
@@ -37,18 +37,16 @@ def get_db_file_path(sql_uri):
db_file_path = sql_uri.replace("sqlite:///", "")
if db_file_path == sql_uri:
print(f"Error! Invalid SQLALCHEMY_DATABASE_URI {sql_uri}")
- return db_file_path
+ raise ValueError(f"Invalid SQLALCHEMY_DATABASE_URI: {sql_uri}")
+ return Path(db_file_path)
def run_module(module_name):
print(f'{"#"}' * 20)
print(f"Intializing {module_name}")
- if not module_name.run():
- print(f"Error! {module_name}.run() failed")
- return False
+ module_name.run()
print(f"{module_name} initialization successful!")
print(f'{"#"}' * 20)
- return True
def init_database(sql_uri, db_file_path):
@@ -64,58 +62,42 @@ def init_database(sql_uri, db_file_path):
run_module(texts.gretil)
run_module(dcs)
run_module(monier)
- except Exception as ex:
- print("Error: Failed to initialize database"
- f"Error: {ex}")
- return False
-
- if not alembic_migrations():
- return False
-
+ alembic_migrations()
+ except Exception as init_ex:
+ print(f"Error: Failed to initialize database. Error: {init_ex}")
+ raise init_ex
print(f"Success! Database initialized at {db_file_path}")
- return True
def alembic_migrations():
try:
subprocess.run(["/venv/bin/alembic", "ensure_version"])
- except subprocess.CalledProcessError as err:
- print(f"Error processing alembic ensure_versions - {err}")
- return False
- try:
subprocess.run(["/venv/bin/alembic", "stamp", "head"])
- except subprocess.CalledProcessError as err:
- print(f"Error processing alembic stamp head - {err}")
- return False
- return True
+ print("Success! Database version check completed.")
+ except subprocess.CalledProcessError as mig_ex:
+ print(f"Error processing alembic commands - {mig_ex}")
+ raise mig_ex
-def setup_database(db_file_path):
- """Lookup and Update to the latest migration."""
- if not file_exists(db_file_path):
- print(f"Database found at {db_file_path}...")
- return False
+def load_database(db_file_path):
+ """Database already initialized. Run lookup module (TODO: Legacy step. Check why?). Update to the latest migration."""
+ if not db_file_path.exists():
+ print(f"Database not found at {db_file_path}...")
+ raise FileNotFoundError("Database file not found")
try:
run_module(lookup)
- except Exception as ex:
- print("Error: Failed to initialize database"
- f"Error: {ex}")
- return False
- # Set the most recent revision as the current one.
- try:
subprocess.run(["/venv/bin/alembic", "upgrade", "head"])
- except subprocess.CalledProcessError as err:
- print(f"Error processing alembic upgrade head - {err}")
- return False
-
- print(f"Success! Database setup at {db_file_path}")
- return True
+ print(f"Success! Database is ready at {db_file_path}")
+ except Exception as load_ex:
+ print(f"Error: Failed to load database. Error: {load_ex}")
+ raise load_ex
def run():
"""
- Initialize db for fresh installs. Bootup db on restarts
+ Initialize db for fresh installs. Load db on restarts.
+ Return value is boolean as the caller is a shell script.
"""
load_dotenv()
@@ -124,18 +106,26 @@ def run():
db_file_path = get_db_file_path(sql_uri)
except Exception as err:
print(f"Failed to get db path - {err}")
+ return False
- if file_exists(db_file_path):
+ if db_file_path.exists():
print(f"Database found at {db_file_path}..")
- ret_setup = setup_database(db_file_path)
- if not ret_setup:
- print(f"Error! Database setup at {db_file_path}..")
+ try:
+ load_database(db_file_path)
+ except Exception as load_ex:
+ print(
+ f"Error! Failed to load database from {db_file_path}. Error: {load_ex}"
+ )
return False
else:
- print("Initialize Database not found")
- ret_init = init_database(sql_uri, db_file_path)
- if not ret_init:
- print(f"Error! Database setup at {db_file_path}..")
+ # This is a new deployment.
+ print("Initialize database")
+ try:
+ init_database(sql_uri, db_file_path)
+ except Exception as init_ex:
+ print(
+ f"Error! Failed to initialize database at {db_file_path}. Error: {init_ex}"
+ )
return False
return True
diff --git a/scripts/install_from_scratch.sh b/scripts/install_from_scratch.sh
index 5155ffbd..660236d9 100755
--- a/scripts/install_from_scratch.sh
+++ b/scripts/install_from_scratch.sh
@@ -101,7 +101,7 @@ To create some sample data for our proofing interface, try the commands below.
In these commands, arguments in must be supplied by you:
./cli.py create-user
- ./cli.py add-role admin
+ ./cli.py add-role --username --role admin
./cli.py create-project
To start the development server, run the following commands:
diff --git a/scripts/start-database.sh b/scripts/start-database.sh
deleted file mode 100755
index 521b33f4..00000000
--- a/scripts/start-database.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-# Entrypoint for running the devserver from within Docker. Before running
-# "make devserver", this file runs database setup / initialization scripts if a
-# database has not already been created.
-
-
-set -e
-
-. /venv/bin/activate
-
-export PATH=$PATH:/venv/bin/
-export PYTHONPATH=$PYTHONPATH:/app
-
-apt-get -qq update && apt-get -qq install -y git > /dev/null
-python scripts/setup_database.py
\ No newline at end of file
diff --git a/scripts/start-server.sh b/scripts/start-server.sh
deleted file mode 100755
index 8c7ed572..00000000
--- a/scripts/start-server.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-# Entrypoint for running the devserver from within Docker. Before running
-# "make devserver", this file runs database setup / initialization scripts if a
-# database has not already been created.
-
-
-set -e
-
-. /venv/bin/activate
-
-export PATH=$PATH:/venv/bin/
-
-# Extract file path from sqlite:///[file path]
-
-DB_FILE_PATH="${SQLALCHEMY_DATABASE_URI/sqlite:\/\/\//}"
-
-echo "Loading Database from $DB_FILE_PATH"
-
-# Update to the latest migration.
-python -m ambuda.seed.lookup
-
-/venv/bin/alembic upgrade head
-
-# Run the devserver, and live reload our CSS and JS.
-# "npx concurrently" does not work on Docker, but ./node_modules/.bin/concurrently does.
-# We also need to add "--host=0.0.0.0" to "flask run" to allow the host to access the
-# website that is running from the Docker container.
-echo "Flask start from /venv/bin/flask with 0.0.0.0 on port 5000"
-./node_modules/.bin/concurrently "/venv/bin/flask run -h 0.0.0.0 -p 5000"
-
diff --git a/scripts/start-celery.sh b/scripts/start_celery.sh
similarity index 100%
rename from scripts/start-celery.sh
rename to scripts/start_celery.sh
diff --git a/scripts/start_server.sh b/scripts/start_server.sh
new file mode 100755
index 00000000..285a6ab4
--- /dev/null
+++ b/scripts/start_server.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Docker entrypoint.
+set -e
+
+# Switch to python venv
+. /venv/bin/activate
+# Set PATH
+export PATH=$PATH:/venv/bin/
+
+echo "[$FLASK_ENV] Flask start from /venv/bin/flask with 0.0.0.0 on port 5000"
+if [ "$FLASK_ENV" == "development" ]
+then
+ # Start flask server in development mode
+ # Dynamically load css and js changes. Docker compose attaches to the ambuda/static directory on localhost.
+ ./node_modules/.bin/concurrently "/venv/bin/flask run -h 0.0.0.0 -p 5000" "npx tailwindcss -i /app/ambuda/static/css/style.css -o /app/ambuda/static/gen/style.css --watch" "npx esbuild /app/ambuda/static/js/main.js --outfile=/app/ambuda/static/gen/main.js --bundle --watch"
+else
+ # Build, Staging, and Production modes take this route. Load site static files that are within the container.
+ ./node_modules/.bin/concurrently "/venv/bin/flask run -h 0.0.0.0 -p 5000"
+fi
\ No newline at end of file