Skip to content

Commit

Permalink
do not keep all annotations in RAM on backup
Browse files Browse the repository at this point in the history
  • Loading branch information
Eldies committed Feb 12, 2025
1 parent df5678a commit 9dfbced
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: MIT

import io
import json
import mimetypes
import os
import re
Expand All @@ -21,11 +22,13 @@
from zipfile import ZipFile

import django_rq
import json_stream
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.utils import timezone
from rest_framework import serializers, status
from rest_framework.compat import SHORT_SEPARATORS
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import JSONParser
from rest_framework.renderers import JSONRenderer
Expand Down Expand Up @@ -596,22 +599,24 @@ def serialize_data():
target_manifest_file = os.path.join(target_dir, self.MANIFEST_FILENAME) if target_dir else self.MANIFEST_FILENAME
zip_object.writestr(target_manifest_file, data=JSONRenderer().render(task))

def _write_annotations(self, zip_object, target_dir=None):
def _write_annotations(self, zip_object: ZipFile, target_dir: Optional[str] = None) -> None:
@json_stream.streamable_list
def serialize_annotations():
job_annotations = []
db_jobs = self._get_db_jobs()
db_job_ids = (j.id for j in db_jobs)
for db_job_id in db_job_ids:
annotations = dm.task.get_job_data(db_job_id)
annotations_serializer = LabeledDataSerializer(data=annotations)
annotations_serializer.is_valid(raise_exception=True)
job_annotations.append(self._prepare_annotations(annotations_serializer.data, self._label_mapping))

return job_annotations
yield self._prepare_annotations(annotations_serializer.data, self._label_mapping)

annotations = serialize_annotations()
target_annotations_file = os.path.join(target_dir, self.ANNOTATIONS_FILENAME) if target_dir else self.ANNOTATIONS_FILENAME
zip_object.writestr(target_annotations_file, data=JSONRenderer().render(annotations))
with TmpDirManager.get_tmp_directory() as temp_dir:
tmp_json_file = os.path.join(temp_dir, "tmp.json")
with open(tmp_json_file, 'w') as f:
json.dump(annotations, f, separators=SHORT_SEPARATORS)
zip_object.write(tmp_json_file, arcname=target_annotations_file)

def _export_task(self, zip_obj, target_dir=None):
self._write_data(zip_obj, target_dir)
Expand Down

0 comments on commit 9dfbced

Please sign in to comment.