kvchitrapu · dependabot · Jun 5, 2023 · Jun 5, 2023
diff --git a/ambuda/consts.py b/ambuda/consts.py
@@ -55,10 +55,14 @@ class Locale:
         "saundaranandam",
         "hamsadutam",
     ],
-    "upanishat": ["shivopanishat"],
+    "upanishat": [
+        "shivopanishat", 
+        "isa"
+    ],
     "anye": [
         "bodhicaryavatara",
         "catuhshloki",
+
     ],
 }
 

diff --git a/ambuda/seed/texts/gretil.py b/ambuda/seed/texts/gretil.py
@@ -2,22 +2,9 @@
 
 import logging
 import subprocess
-from dataclasses import dataclass
 from pathlib import Path
 
-from sqlalchemy.orm import Session
-
-import ambuda.database as db
-from ambuda.seed.utils.data_utils import create_db
-from ambuda.utils.tei_parser import Document, parse_document
-
-
-@dataclass
-class Spec:
-    slug: str
-    title: str
-    filename: str
-
+from ambuda.seed.utils.data_utils import Spec, add_document, create_db
 
 REPO = "https://github.com/ambuda-org/gretil.git"
 PROJECT_DIR = Path(__file__).resolve().parents[3]
@@ -62,47 +49,6 @@ def fetch_latest_data():
     subprocess.call("git reset --hard origin/main", shell=True, cwd=DATA_DIR)
 
 
-def _create_new_text(session, spec: Spec, document: Document):
-    text = db.Text(slug=spec.slug, title=spec.title, header=document.header)
-    session.add(text)
-    session.flush()
-
-    n = 1
-    for section in document.sections:
-        db_section = db.TextSection(
-            text_id=text.id, slug=section.slug, title=section.slug
-        )
-        session.add(db_section)
-        session.flush()
-
-        for block in section.blocks:
-            db_block = db.TextBlock(
-                text_id=text.id,
-                section_id=db_section.id,
-                slug=block.slug,
-                xml=block.blob,
-                n=n,
-            )
-            session.add(db_block)
-            n += 1
-
-    session.commit()
-
-
-def add_document(engine, spec: Spec):
-    document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename
-
-    with Session(engine) as session:
-        if session.query(db.Text).filter_by(slug=spec.slug).first():
-            # FIXME: update existing texts in-place so that we can capture
-            # changes. As a workaround for now, we can delete then re-create.
-            log(f"- Skipped {spec.slug} (already exists)")
-        else:
-            document = parse_document(document_path)
-            _create_new_text(session, spec, document)
-            log(f"- Created {spec.slug}")
-
-
 def run():
     logging.getLogger().setLevel(0)
     log("Downloading the latest data ...")
@@ -114,7 +60,8 @@ def run():
         engine = create_db()
 
         for spec in ALLOW:
-            add_document(engine, spec)
+            document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename
+            add_document(engine, spec, document_path)
     except Exception as ex:
         raise Exception("Error: Failed to get latest from GRETIL.") from ex
 

diff --git a/ambuda/seed/utils/data_utils.py b/ambuda/seed/utils/data_utils.py
@@ -1,14 +1,25 @@
 import hashlib
 import io
+import logging
 import os
 import zipfile
-
-import requests
-from sqlalchemy import create_engine
+from dataclasses import dataclass
 
 import config
+import requests
 from ambuda import database as db
 from ambuda.seed.utils.itihasa_utils import CACHE_DIR
+from ambuda.utils.tei_parser import Document, parse_document
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+LOG = logging.getLogger(__name__)
+
+@dataclass
+class Spec:
+    slug: str
+    title: str
+    filename: str
 
 
 def fetch_text(url: str, read_from_cache: bool = True) -> str:
@@ -83,3 +94,45 @@ def create_db():
 
     db.Base.metadata.create_all(engine)
     return engine
+
+
+def _create_new_text(session, spec: Spec, document: Document):
+    """Create new text in the database."""
+    text = db.Text(slug=spec.slug, title=spec.title, header=document.header)
+    session.add(text)
+    session.flush()
+
+    n = 1
+    for section in document.sections:
+        db_section = db.TextSection(
+            text_id=text.id, slug=section.slug, title=section.slug
+        )
+        session.add(db_section)
+        session.flush()
+
+        for block in section.blocks:
+            db_block = db.TextBlock(
+                text_id=text.id,
+                section_id=db_section.id,
+                slug=block.slug,
+                xml=block.blob,
+                n=n,
+            )
+            session.add(db_block)
+            n += 1
+
+    session.commit()
+
+
+def add_document(engine, spec: Spec, document_path):
+    " Add a document to the database. "
+
+    with Session(engine) as session:  # noqa: F821
+        if session.query(db.Text).filter_by(slug=spec.slug).first():
+            # FIXME: update existing texts in-place so that we can capture
+            # changes. As a workaround for now, we can delete then re-create.
+            LOG.info(f"- Skipped {spec.slug} (already exists)")  # noqa: F821
+        else:
+            document = parse_document(document_path)
+            _create_new_text(session, spec, document)
+            LOG.info(f"- Created {spec.slug}")  # noqa: F821
diff --git a/cli.py b/cli.py
@@ -11,7 +11,7 @@
 import ambuda
 from ambuda import database as db
 from ambuda import queries as q
-from ambuda.seed.utils.data_utils import create_db
+from ambuda.seed.utils.data_utils import Spec, add_document, create_db
 from ambuda.tasks.projects import create_project_inner
 from ambuda.tasks.utils import LocalTaskStatus
 
@@ -107,5 +107,15 @@ def create_project(title, pdf_path):
         )
 
 
+@cli.command()
+@click.option("--title", help="title of the new text")
+@click.option("--slug", help="slug of the new text")
+@click.option("--tei-path", help="path to the source PDF")
+def publish_text(slug, title, tei_path):
+    """Publish a proofread text from a TEI-XML."""
+    spec = Spec(slug, title, tei_path)
+    add_document(engine, spec, tei_path)
+
+
 if __name__ == "__main__":
     cli()
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ celery = "5.2.7"
 click = "8.1.3"
 conllu = "4.5.2"
 email-validator = "1.2.1"
-fabric = "2.7.0"
+fabric = "3.1.0"
 Flask = "2.1.2"
 Flask-Admin = "1.6.0"
 Flask-Babel = "3.0.1"