Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not keep all annotations in RAM on backup #9099

Merged
merged 5 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#
# SPDX-License-Identifier: MIT

import codecs
import io
import json
import mimetypes
import os
import re
Expand All @@ -21,6 +23,7 @@
from zipfile import ZipFile

import django_rq
import json_stream
SpecLad marked this conversation as resolved.
Show resolved Hide resolved
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
Expand Down Expand Up @@ -596,22 +599,21 @@ def serialize_data():
target_manifest_file = os.path.join(target_dir, self.MANIFEST_FILENAME) if target_dir else self.MANIFEST_FILENAME
zip_object.writestr(target_manifest_file, data=JSONRenderer().render(task))

def _write_annotations(self, zip_object, target_dir=None):
def _write_annotations(self, zip_object: ZipFile, target_dir: Optional[str] = None) -> None:
@json_stream.streamable_list
def serialize_annotations():
job_annotations = []
db_jobs = self._get_db_jobs()
db_job_ids = (j.id for j in db_jobs)
for db_job_id in db_job_ids:
annotations = dm.task.get_job_data(db_job_id)
annotations_serializer = LabeledDataSerializer(data=annotations)
annotations_serializer.is_valid(raise_exception=True)
job_annotations.append(self._prepare_annotations(annotations_serializer.data, self._label_mapping))

return job_annotations
yield self._prepare_annotations(annotations_serializer.data, self._label_mapping)

annotations = serialize_annotations()
target_annotations_file = os.path.join(target_dir, self.ANNOTATIONS_FILENAME) if target_dir else self.ANNOTATIONS_FILENAME
zip_object.writestr(target_annotations_file, data=JSONRenderer().render(annotations))
with zip_object.open(target_annotations_file, 'w') as f:
json.dump(annotations, codecs.getwriter('utf-8')(f), separators=(',', ':'))

def _export_task(self, zip_obj, target_dir=None):
self._write_data(zip_obj, target_dir)
Expand Down
1 change: 1 addition & 0 deletions cvat/requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ djangorestframework>=3.15.2,<4
drf-spectacular==0.26.2
furl==2.1.0
google-cloud-storage==1.42.0
json-stream>=2.0
lxml>=5.2.1,<6
natsort==8.0.0
numpy~=1.22.2
Expand Down
20 changes: 11 additions & 9 deletions cvat/requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SHA1:9c45ee6ba604552349bcaf41a8f35abbc7c62ddd
# SHA1:02cd495ccf64874404b603b505a50d84acc316cc
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand Down Expand Up @@ -29,7 +29,7 @@ botocore==1.20.112
# s3transfer
cachetools==5.5.1
# via google-auth
certifi==2024.12.14
certifi==2025.1.31
# via
# clickhouse-connect
# msrest
Expand All @@ -52,7 +52,7 @@ coreschema==0.0.4
# via coreapi
crontab==1.0.1
# via rq-scheduler
cryptography==44.0.0
cryptography==44.0.1
# via
# azure-storage-blob
# datumaro
Expand All @@ -71,7 +71,7 @@ dj-pagination==2.5.0
# via -r cvat/requirements/base.in
dj-rest-auth[with-social]==5.0.2
# via -r cvat/requirements/base.in
django==4.2.18
django==4.2.19
# via
# -r cvat/requirements/base.in
# dj-rest-auth
Expand Down Expand Up @@ -119,7 +119,7 @@ easyprocess==1.1
# via pyunpack
entrypoint2==1.1
# via pyunpack
fonttools==4.55.8
fonttools==4.56.0
# via matplotlib
freezegun==1.5.1
# via rq-scheduler
Expand All @@ -142,7 +142,7 @@ google-crc32c==1.6.0
# via google-resumable-media
google-resumable-media==2.7.2
# via google-cloud-storage
googleapis-common-protos==1.66.0
googleapis-common-protos==1.67.0
# via google-api-core
h5py==3.12.1
# via datumaro
Expand Down Expand Up @@ -175,7 +175,9 @@ joblib==1.4.2
# nltk
# scikit-learn
json-stream==2.3.3
# via datumaro
# via
# -r cvat/requirements/base.in
# datumaro
json-stream-rs-tokenizer==0.4.27
# via json-stream
jsonschema==4.17.3
Expand All @@ -184,7 +186,7 @@ kiwisolver==1.4.7
# via matplotlib
limits==4.0.1
# via python-logstash-async
lxml==5.3.0
lxml==5.3.1
# via
# -r cvat/requirements/base.in
# datumaro
Expand Down Expand Up @@ -285,7 +287,7 @@ python3-openid==3.2.0
# via django-allauth
python3-saml==1.16.0
# via django-allauth
pytz==2024.2
pytz==2025.1
# via
# clickhouse-connect
# pandas
Expand Down
Loading