diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 140171494..0ecdb9a6d 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -import resource +# import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index a6152f3c2..727244e20 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -11,6 +11,31 @@ class DatasetInstanceSerializer(serializers.ModelSerializer): class Meta: model = DatasetInstance fields = "__all__" + + +class DatasetInstanceSerializerOptimized(serializers.ModelSerializer): + class Meta: + model = DatasetInstance + fields = [ + "id", + "instance_id", + "name", + "description", + "dataset_type", + "is_archived", + "public_to_managers", + "organisation_id", + "workspace_id", + "created_by", + "created_at", + "last_updated", + "file_format", + "file_path", + "size", + "record_count", + "status" + ] + class DatasetInstanceUploadSerializer(serializers.Serializer): diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 7e6b4227c..9a96338f1 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -244,6 +244,8 @@ def retrieve(self, request, pk, *args, **kwargs): ), ], ) + + def list(self, request, *args, **kwargs): # Org Owners and superusers see all datasets if request.user.is_superuser: @@ -257,7 +259,6 @@ def list(self, request, *args, **kwargs): queryset = DatasetInstance.objects.filter( organisation_id=request.user.organization ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) - if "dataset_visibility" in request.query_params: dataset_visibility = request.query_params["dataset_visibility"] if dataset_visibility == "all_public_datasets": @@ -267,18 +268,15 @@ def list(self, request, *args, **kwargs): queryset = queryset.filter(public_to_managers=True) elif dataset_visibility == "my_datasets": queryset = queryset.filter(users__id=request.user.id) - # Filter the queryset based on the query params if "dataset_type" in dict(request.query_params): queryset = queryset.filter( dataset_type__exact=request.query_params["dataset_type"] ) - # Serialize the distinct items and sort by instance ID serializer = DatasetInstanceSerializer( queryset.distinct().order_by("instance_id"), many=True ) - # Add status fields to the serializer data for dataset_instance in serializer.data: # Get the task statuses for the dataset instance @@ -288,14 +286,84 @@ def list(self, request, *args, **kwargs): dataset_instance_time, dataset_instance_result, ) = get_dataset_upload_status(dataset_instance["instance_id"]) - # Add the task status and time to the dataset instance response dataset_instance["last_upload_status"] = dataset_instance_status dataset_instance["last_upload_date"] = dataset_instance_date dataset_instance["last_upload_time"] = dataset_instance_time dataset_instance["last_upload_result"] = dataset_instance_result - return Response(serializer.data) + + + # def get_queryset(self): + def list_optimized(self, request): + try: + # Base queryset determination based on user role + if request.user.is_superuser: + queryset = DatasetInstance.objects.all() + elif request.user.role == User.ORGANIZATION_OWNER: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ) + else: + queryset = DatasetInstance.objects.filter( + organisation_id=request.user.organization + ).filter(Q(public_to_managers=True) | Q(users__id=request.user.id)) + + # Apply optional filters based on query parameters + if "dataset_visibility" in request.query_params: + dataset_visibility = request.query_params["dataset_visibility"] + if dataset_visibility == "all_public_datasets": + if ( + request.user.role == User.WORKSPACE_MANAGER + and not request.user.is_superuser + ): + queryset = queryset.filter(public_to_managers=True) + elif dataset_visibility == "my_datasets": + queryset = queryset.filter(users__id=request.user.id) + + if "dataset_type" in request.query_params: + queryset = queryset.filter( + dataset_type__exact=request.query_params["dataset_type"] + ) + + if "archived_datasets" in request.query_params: + archived_datasets = request.query_params["archived_datasets"] == "true" + queryset = queryset.filter(is_archived=archived_datasets) + + # Add sorting by custom criteria + if ( + "sort_type" in request.query_params + and request.query_params["sort_type"] == "recently_updated" + ): + queryset = queryset.order_by(F("last_updated").desc(nulls_last=True)) + else: + queryset = queryset.order_by(F("instance_id").asc()) + + # Serialize the distinct items using the optimized serializer + serializer = DatasetInstanceSerializerOptimized(queryset.distinct(), many=True) + + # Add additional status fields to each dataset instance + for dataset_instance in serializer.data: + ( + dataset_instance_status, + dataset_instance_date, + dataset_instance_time, + dataset_instance_result, + ) = get_dataset_upload_status(dataset_instance["id"]) + + dataset_instance["last_upload_status"] = dataset_instance_status + dataset_instance["last_upload_date"] = dataset_instance_date + dataset_instance["last_upload_time"] = dataset_instance_time + dataset_instance["last_upload_result"] = dataset_instance_result + + return Response(serializer.data, status=status.HTTP_200_OK) + except Exception as e: + return Response( + {"message": "An error occurred: {}".format(str(e))}, + status=status.HTTP_400_BAD_REQUEST, + ) + + @is_organization_owner @action(methods=["GET"], detail=True, name="Download Dataset in CSV format")