Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update site to support manual translation via Rosetta portal #280

Merged
merged 19 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 26 additions & 56 deletions backend/app/api_views.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import json
import random
from math import ceil

from rest_framework import status
from rest_framework.decorators import api_view
from rest_framework.response import Response

from django.db.models import Q, Prefetch
from django.core.paginator import Paginator
from django.views.decorators.csrf import csrf_exempt
from django.utils.translation import gettext_lazy as _

from app.view_helpers import (
get_map_squares_by_arondissement,
get_arrondissement_geojson,
tag_confidence
tag_confidence, tag_helper
)

from .models import (
Expand All @@ -36,6 +35,8 @@
CorpusAnalysisResultsSerializer
)

from .translation_db import TRANSLATIONS, translate_tag


# TODO(ra): See if we can move this elsewhere.
PHOTOGRAPHER_SEARCH_ORDER_BY = [
Expand All @@ -48,52 +49,6 @@
]


def tag_helper(tag_name, page=None):
all_yolo_results = PhotoAnalysisResult.objects.filter(name='yolo_model')

if not all_yolo_results.count():
return []

relevant_results = []
print('yolo results here: ', len(all_yolo_results))
for result in all_yolo_results:
data = result.parsed_result()
if tag_name in data['labels']:
relevant_results.append(result)

print('relevant results: ', len(relevant_results))

# TODO(ra) Fix the results per page math... it looks like it's stepping
# through src photo indexes
results_per_page = 20
result_count = len(relevant_results)
page_count = ceil(result_count / results_per_page)

if page:
first_result = results_per_page * (page-1)
last_result = first_result + results_per_page
print(first_result, last_result)
relevant_results_this_page = relevant_results[first_result:last_result]
else:
relevant_results_this_page = relevant_results

print(relevant_results_this_page)

# sort by confidence
by_confidence = []
for result in relevant_results_this_page:
data = result.parsed_result()
confidence = 0
for box in data['boxes']:
# an image may have several tag_name in labels, find greatest confidence
if box['label'] == tag_name:
confidence = max(confidence, box['confidence'])
by_confidence.append((result, confidence))

sorted_analysis_obj = sorted(by_confidence, key=lambda obj: obj[1], reverse=True)
return [result[0].photo for result in sorted_analysis_obj], result_count, page_count


@api_view(['GET'])
def photo(request, map_square_number, folder_number, photo_number):
"""
Expand All @@ -105,6 +60,14 @@ def photo(request, map_square_number, folder_number, photo_number):
return Response(serializer.data)


@api_view(['GET'])
def translation(request, language_code):
"""
API endpoint to get text translation dictionary
"""
return Response(TRANSLATIONS)


@api_view(['GET'])
def previous_next_photos(request, map_square_number, folder_number, photo_number):
"""
Expand Down Expand Up @@ -159,7 +122,10 @@ def all_map_squares(request):
"""
map_square_obj = MapSquare.objects.all().prefetch_related("photo_set")
serializer = MapSquareSerializerWithoutPhotos(map_square_obj, many=True)
return Response(serializer.data)
return Response({
map_square["number"]: map_square
for map_square in serializer.data
})


@api_view(['GET'])
Expand Down Expand Up @@ -346,6 +312,7 @@ def get_photos_by_analysis(request, analysis_name, object_name=None):
serializer = PhotoSerializer(sorted_photo_obj, many=True)
return Response(serializer.data)


@api_view(['GET'])
def get_images_with_text(request):
"""
Expand Down Expand Up @@ -381,7 +348,8 @@ def get_photos_by_tag(request, tag_name):
"""
API endpoint to get all photos associated with a tag (specified by tag_name)
"""
sorted_photo_obj, _, _ = tag_helper(tag_name)
en_tag = translate_tag(tag_name)
sorted_photo_obj, _, _ = tag_helper(en_tag)
serializer = PhotoSerializer(sorted_photo_obj, many=True)
return Response(serializer.data)

Expand All @@ -395,6 +363,7 @@ def photo_tag_helper(map_square_number, folder_number, photo_number):
else:
return None


@api_view(['GET'])
def get_random_photos(request):

Expand All @@ -404,6 +373,7 @@ def get_random_photos(request):
serializer = SimplePhotoSerializerForCollage(random_photos, many=True)
return Response(serializer.data)


@api_view(['GET'])
def get_photo_tags(request, map_square_number, folder_number, photo_number):
"""
Expand Down Expand Up @@ -476,19 +446,19 @@ def explore(request):
API endpoint for the explore view, which gives users a filtered view
to all of the photos in the collection
"""
tag = request.data.get('selectedTag')
ALL = _("ALL")

tag = translate_tag(
request.data.get('selectedTag'), default=ALL
)
page = int(request.data.get('page', 1))
page_size = int(request.data.get('pageSize', 10))

ALL = 'All'

query = Q()

# Filter by tags
if tag != ALL:
query |= Q(analyses__name='yolo_model', analyses__result__icontains=tag)

if tag != ALL:
prefetch = Prefetch('analyses', queryset=PhotoAnalysisResult.objects.filter(name='yolo_model'))
photos = Photo.objects.filter(query).prefetch_related(prefetch).distinct()
photos_with_analysis = [
Expand Down
119 changes: 119 additions & 0 deletions backend/app/management/commands/compile_translations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Django management command launch_site
"""

import io
import os
import shutil

from tqdm import tqdm
from translate_po.main import (
recognize_po_file, read_lines,
translate as translate_line
)

from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management import call_command

def save_lines(file: str, lines: list):
""" Save lines from memory into a file.
:parameter file:
:parameter lines:
"""
with io.open(file, 'w', encoding='utf8') as infile:
infile.write("""
msgid ""
msgstr ""
""")
for keys, values in lines.metadata.items():
infile.write(f'"{keys}:{values}\\n"\n')
infile.write('\n')
for line in lines:
infile.write(line.__unicode__())

def translate(fro, to, src_dir, dest_dir, fuzzy=False):
# Work around using parser-based translate_po.main.run function
# due to conflict with Django BaseCommand parser
class Arguments:
def __init__(self, **kwargs):
[setattr(self, attr, val) for attr, val in kwargs.items()]

arguments = Arguments(fro=fro, to=to, src=src_dir, dest=dest_dir)

for file in os.listdir(src_dir):
if not recognize_po_file(file):
continue
old_file = os.path.join(arguments.dest, file)
new_file = os.path.join(arguments.src, file)

print(f"Translating {old_file}...")
entries = read_lines(old_file)
for entry in tqdm(entries):
if entry.translated() or entry.obsolete or entry.fuzzy:
continue
line_parts = entry.msgid.split('\n')
translated_line_parts = [(
translate_line(line_part, arguments)
if line_part.strip(" ") else line_part
) for line_part in line_parts]
entry.msgstr = '\n'.join(translated_line_parts)
if fuzzy:
entry.flags.append("fuzzy")

save_lines(new_file, entries)


class Command(BaseCommand):
"""
Custom django-admin command to build project translation

https://testdriven.io/blog/multiple-languages-in-django/
"""

help = "Custom django-admin command to compile translations in translation_db.py"

def add_arguments(self, parser):
parser.add_argument("--no_auto_trans", action="store_true")
parser.add_argument("--rebuild", action="store_true")
parser.add_argument(
"--main_lang", type=str, action="store", default="en"
)
parser.add_argument(
"--mark_fuzzy", action="store_true",
help="Mark auto-translations as fuzzy"
)

def handle(self, *args, **options):
no_auto_translate: bool = options.get("no_auto_trans")
main_lang: str = options.get("main_lang")
rebuild: bool = options.get("rebuild")
mark_fuzzy: bool = options.get("mark_fuzzy")

def iter_locale_paths():
for locale_path in settings.LOCALE_PATHS:
for language_code, _ in settings.LANGUAGES:
yield locale_path, language_code

# Make locale paths
for locale_path, language_code in iter_locale_paths():
messages_path = os.path.join(locale_path, language_code)
if rebuild and os.path.exists(messages_path):
shutil.rmtree(messages_path)
os.makedirs(
messages_path,
exist_ok=True
)

call_command("makemessages", all=True, ignore=["env"])
if not no_auto_translate:
for locale_path, language_code in iter_locale_paths():
if language_code == main_lang:
continue
po_dir = os.path.join(locale_path, language_code, "LC_MESSAGES")
translate(
fro=main_lang, to=language_code,
src_dir=po_dir, dest_dir=po_dir,
fuzzy=mark_fuzzy
)
call_command("compilemessages", ignore=["env"])
Loading
Loading