Skip to content

Commit

Permalink
Fix syncing area citations in translations
Browse files Browse the repository at this point in the history
  • Loading branch information
saviit committed Aug 30, 2023
1 parent ffff7d5 commit 623c641
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 37 deletions.
37 changes: 19 additions & 18 deletions timApp/document/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,17 @@ def apply_citation(new_doc: DocInfo, src_doc: Document):


def find_lang_matching_cite_source(
rd: str, rp: str, tr_doc: Document
) -> tuple[Document, str] | tuple[None, None]:
tr_doc: Document, rd: str, rp: str | None = None, ra: str | None = None
) -> tuple[Document | None, str | None]:
"""
Find document and paragraph id from cited source Translation whose language matches
the Translation we are currently creating.
Note that the return value may be (None, None).
Note some elements of the return value may be None.
:param rd: source document id
:param rp: source document paragraph id
:param ra: source document paragraph area name
:param tr_doc: Translation that is citing the source document
:return: the matched source Translation and paragraph id as a tuple, or (None, None).
:return: the matched source Translation and paragraph id as a tuple.
"""
matched_doc = None
par_id = None
Expand All @@ -65,7 +66,9 @@ def find_lang_matching_cite_source(
matched_doc = source_tr
# Find matching paragraph hash for translated citation par
for p in source_tr.document:
if p.get_attr("rp") == rp:
if (rp and p.get_attr("rp") == rp) or (
ra and p.get_attr("area") == ra
):
par_id = p.id
break
break
Expand All @@ -87,24 +90,23 @@ def add_reference_pars(
# If one is not found, the original citation should be used.
citation_doc_id = par.get_attr("rd")
citation_par_id = par.get_attr("rp")
area_citation = par.get_attr("ra")

if citation_doc_id:
matched_doc, citation_par_id = find_lang_matching_cite_source(
citation_doc_id, citation_par_id, doc
doc, citation_doc_id, citation_par_id
)
if not matched_doc or not citation_par_id:
# cited document or paragraph doesn't exist, so just use the original citation
matched_doc = original_doc
citation_par_id = par.id

# can also be an area reference
area_citation = par.get_attr("ra")

if area_citation:
if area_citation and matched_doc:
ref_par = par.create_area_reference(
doc, area_citation, r="tr", rd=matched_doc.doc_id
)
else:
if not matched_doc or not citation_par_id:
# cited document or paragraph doesn't exist, so just use the original citation
matched_doc = original_doc
citation_par_id = par.id

from timApp.document.docparagraph import create_reference

ref_par = create_reference(
Expand All @@ -120,10 +122,9 @@ def add_reference_pars(
# For area citations to work correctly in translations,
# we need to add explicit area/area_end tags to translated
# area paragraphs
is_translated_par = r == "tr"
area_start = par.get_attr("area")
area_end = par.get_attr("area_end")
if is_translated_par:
if r == "tr":
area_start = par.get_attr("area")
area_end = par.get_attr("area_end")
if area_start:
ref_par.set_attr("area", area_start)
elif area_end:
Expand Down
61 changes: 42 additions & 19 deletions timApp/document/translation/synchronize_translations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from difflib import SequenceMatcher

from timApp.document.document import Document
from timApp.document.docparagraph import create_reference
from timApp.document.docparagraph import create_reference, DocParagraph
from timApp.document.editing.documenteditresult import DocumentEditResult
from timApp.document.docinfo import DocInfo
from timApp.document.documents import find_lang_matching_cite_source
Expand Down Expand Up @@ -30,34 +30,57 @@ def update_par_content(
# behaviour be controllable to end user via a document setting? Current behaviour
# might not be desirable, eg. if a citation should actually be in the original
# source language (one _can_ avoid this by creating another document which has no translations).
# TODO: Fix area reference citations: area citations do not currently
# have the correct 'rd' attr. Perhaps fix in 'find_lang_matching_cite_source'?
ref_par = orig.get_paragraph(par_id)
rd = ref_par.get_attr("rd", None)
rp = ref_par.get_attr("rp", None)
ra = ref_par.get_attr("ra", None)

matched_doc, rp = find_lang_matching_cite_source(rd, rp, tr_doc)
matched_doc, rp = find_lang_matching_cite_source(tr_doc, rd, rp, ra)
rd = matched_doc.id if matched_doc else None

tr_par = create_reference(
tr_doc,
doc_id=rd if rd else orig.doc_id,
par_id=rp if rp else par_id,
r="tr",
add_rd=ref_par.is_citation_par(),
)
if ra:
# Only add the area citation if it doesn't already exist,
# or replace it with a translated area citation if it was
# from the original but a corresponding translated one exists.
# TODO: in order to keep the (translated) citation up-to-date,
# we may eventually want to replace the existing area
# with the one re-created here
area_par = None
for p in tr_doc.get_paragraphs():
area_par = p if p.get_attr("ra") == ra else None
if area_par and not area_par.get_attr("rd") == rd:
# the citation points to the original, delete it
tr_doc.delete_paragraph(area_par.id)

if orig.get_paragraph(par_id).is_setting():
tr_par.set_attr("settings", "")
tr_doc.insert_paragraph_obj(
tr_par,
insert_before_id=tr_ids[before_i] if before_i < len(tr_ids) else None,
)
tr_par = DocParagraph.create_area_reference(
tr_doc,
area_name=ra,
r="tr",
rd=matched_doc.id,
)
else:
tr_par = None
else:
tr_par = create_reference(
tr_doc,
doc_id=rd if rd else orig.doc_id,
par_id=rp if rp else par_id,
r="tr",
add_rd=ref_par.is_citation_par(),
)

if tr_par:
if orig.get_paragraph(par_id).is_setting():
tr_par.set_attr("settings", "")
tr_doc.insert_paragraph_obj(
tr_par,
insert_before_id=tr_ids[before_i] if before_i < len(tr_ids) else None,
)


def synchronize_translations(doc: DocInfo, edit_result: DocumentEditResult):
"""Synchronizes the translations of a document by adding missing paragraphs to the translations and deleting non-existing
paragraphs.
"""Synchronizes the translations of a document by adding missing paragraphs to the translations
and deleting non-existing paragraphs.
:param edit_result: The changes that were made to the document.
:param doc: The document that was edited and whose translations need to be synchronized.
Expand Down

0 comments on commit 623c641

Please sign in to comment.