Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump fabric from 2.7.0 to 3.1.0 #112

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ambuda/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,14 @@ class Locale:
"saundaranandam",
"hamsadutam",
],
"upanishat": ["shivopanishat"],
"upanishat": [
"shivopanishat",
"isa"
],
"anye": [
"bodhicaryavatara",
"catuhshloki",

],
}

Expand Down
59 changes: 3 additions & 56 deletions ambuda/seed/texts/gretil.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,9 @@

import logging
import subprocess
from dataclasses import dataclass
from pathlib import Path

from sqlalchemy.orm import Session

import ambuda.database as db
from ambuda.seed.utils.data_utils import create_db
from ambuda.utils.tei_parser import Document, parse_document


@dataclass
class Spec:
slug: str
title: str
filename: str

from ambuda.seed.utils.data_utils import Spec, add_document, create_db

REPO = "https://github.com/ambuda-org/gretil.git"
PROJECT_DIR = Path(__file__).resolve().parents[3]
Expand Down Expand Up @@ -62,47 +49,6 @@ def fetch_latest_data():
subprocess.call("git reset --hard origin/main", shell=True, cwd=DATA_DIR)


def _create_new_text(session, spec: Spec, document: Document):
text = db.Text(slug=spec.slug, title=spec.title, header=document.header)
session.add(text)
session.flush()

n = 1
for section in document.sections:
db_section = db.TextSection(
text_id=text.id, slug=section.slug, title=section.slug
)
session.add(db_section)
session.flush()

for block in section.blocks:
db_block = db.TextBlock(
text_id=text.id,
section_id=db_section.id,
slug=block.slug,
xml=block.blob,
n=n,
)
session.add(db_block)
n += 1

session.commit()


def add_document(engine, spec: Spec):
document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename

with Session(engine) as session:
if session.query(db.Text).filter_by(slug=spec.slug).first():
# FIXME: update existing texts in-place so that we can capture
# changes. As a workaround for now, we can delete then re-create.
log(f"- Skipped {spec.slug} (already exists)")
else:
document = parse_document(document_path)
_create_new_text(session, spec, document)
log(f"- Created {spec.slug}")


def run():
logging.getLogger().setLevel(0)
log("Downloading the latest data ...")
Expand All @@ -114,7 +60,8 @@ def run():
engine = create_db()

for spec in ALLOW:
add_document(engine, spec)
document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename
add_document(engine, spec, document_path)
except Exception as ex:
raise Exception("Error: Failed to get latest from GRETIL.") from ex

Expand Down
59 changes: 56 additions & 3 deletions ambuda/seed/utils/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
import hashlib
import io
import logging
import os
import zipfile

import requests
from sqlalchemy import create_engine
from dataclasses import dataclass

import config
import requests
from ambuda import database as db
from ambuda.seed.utils.itihasa_utils import CACHE_DIR
from ambuda.utils.tei_parser import Document, parse_document
from sqlalchemy import create_engine
from sqlalchemy.orm import Session

LOG = logging.getLogger(__name__)

@dataclass
class Spec:
slug: str
title: str
filename: str


def fetch_text(url: str, read_from_cache: bool = True) -> str:
Expand Down Expand Up @@ -83,3 +94,45 @@ def create_db():

db.Base.metadata.create_all(engine)
return engine


def _create_new_text(session, spec: Spec, document: Document):
"""Create new text in the database."""
text = db.Text(slug=spec.slug, title=spec.title, header=document.header)
session.add(text)
session.flush()

n = 1
for section in document.sections:
db_section = db.TextSection(
text_id=text.id, slug=section.slug, title=section.slug
)
session.add(db_section)
session.flush()

for block in section.blocks:
db_block = db.TextBlock(
text_id=text.id,
section_id=db_section.id,
slug=block.slug,
xml=block.blob,
n=n,
)
session.add(db_block)
n += 1

session.commit()


def add_document(engine, spec: Spec, document_path):
" Add a document to the database. "

with Session(engine) as session: # noqa: F821
if session.query(db.Text).filter_by(slug=spec.slug).first():
# FIXME: update existing texts in-place so that we can capture
# changes. As a workaround for now, we can delete then re-create.
LOG.info(f"- Skipped {spec.slug} (already exists)") # noqa: F821
else:
document = parse_document(document_path)
_create_new_text(session, spec, document)
LOG.info(f"- Created {spec.slug}") # noqa: F821
12 changes: 11 additions & 1 deletion cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import ambuda
from ambuda import database as db
from ambuda import queries as q
from ambuda.seed.utils.data_utils import create_db
from ambuda.seed.utils.data_utils import Spec, add_document, create_db
from ambuda.tasks.projects import create_project_inner
from ambuda.tasks.utils import LocalTaskStatus

Expand Down Expand Up @@ -107,5 +107,15 @@ def create_project(title, pdf_path):
)


@cli.command()
@click.option("--title", help="title of the new text")
@click.option("--slug", help="slug of the new text")
@click.option("--tei-path", help="path to the source PDF")
def publish_text(slug, title, tei_path):
"""Publish a proofread text from a TEI-XML."""
spec = Spec(slug, title, tei_path)
add_document(engine, spec, tei_path)


if __name__ == "__main__":
cli()
54 changes: 25 additions & 29 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ celery = "5.2.7"
click = "8.1.3"
conllu = "4.5.2"
email-validator = "1.2.1"
fabric = "2.7.0"
fabric = "3.1.0"
Flask = "2.1.2"
Flask-Admin = "1.6.0"
Flask-Babel = "3.0.1"
Expand Down