AI4Bharat · Quaxguy · Nov 18, 2024 · Nov 20, 2024 · Nov 21, 2024
diff --git a/.env.example b/.env.example
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 config.env
 all_data
 vhosts
+.env.example
 # Ignore all json and csv files
 *.json
 *.csv
@@ -19,6 +20,8 @@ frontend/
 
 # Env
 .env*
+.env.dev*
+.env.example*
 virtualenv/
 myvenv/
 venv/

diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -15,4 +15,4 @@ COPY . /usr/src/backend/
 
 RUN ["chmod", "+x", "/usr/src/backend/entrypoint.sh"]
 
-ENTRYPOINT ["sh", "/usr/src/backend/entrypoint.sh"]
+ENTRYPOINT ["sh", "/usr/src/backend/entrypoint.sh"]
diff --git a/backend/dataset/tests.py b/backend/dataset/tests.py
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.

diff --git a/backend/dataset/validatemod.py b/backend/dataset/validatemod.py
@@ -0,0 +1,89 @@
+import csv
+import json
+from datetime import datetime
+
+def MultiModelInteractionValidator(file):
+    required_fields = {
+        "instance_id",
+        "parent_interaction_ids",
+        "multiple_interaction_json",
+        "language",
+        "datetime",
+    }
+    optional_fields = {
+        "eval_form_json",
+        "no_of_turns",
+        "no_of_models",
+    }
+    valid_languages = [
+        "English", "Assamese", "Bengali", "Bodo", "Dogri", "Gujarati",
+        "Hindi", "Kannada", "Kashmiri", "Konkani", "Maithili",
+        "Malayalam", "Manipuri", "Marathi", "Nepali", "Odia",
+        "Punjabi", "Sanskrit", "Santali", "Sindhi", "Sinhala",
+        "Tamil", "Telugu", "Urdu"
+    ]
+
+    errors = []
+
+    # Open the file using the uploaded file object
+    reader = csv.DictReader(file.read().decode("utf-8-sig").splitlines())
+
+    for index, row in enumerate(reader, start=1):
+        row_fields = set(row.keys())
+
+        # Check required fields
+        missing_required = required_fields - row_fields
+        if missing_required:
+            errors.append(f"Row {index} missing required fields: {missing_required}")
+
+        # Check unexpected fields
+        unexpected_fields = row_fields - (required_fields | optional_fields)
+        if unexpected_fields:
+            errors.append(f"Row {index} has unexpected fields: {unexpected_fields}")
+
+        # Validate specific fields
+        if row["instance_id"] is None:
+            errors.append(f"Row {index} must have 'instance_id'")
+
+        # Validate parent_interaction_ids
+        if row["parent_interaction_ids"]:
+            if not (row["parent_interaction_ids"].startswith("[") and row["parent_interaction_ids"].endswith("]")):
+                errors.append(f"Row {index}: parent_interaction_ids should be a JSON array format")
+
+        # Validate multiple_interaction_json
+        try:
+            interactions = json.loads(row["multiple_interaction_json"])
+            for interaction in interactions:
+                if "prompt" not in interaction:
+                    errors.append(f"Row {index}: Each interaction must contain 'prompt'")
+                if "prompt_output_pair_id" not in interaction:
+                    errors.append(f"Row {index}: Each interaction must contain 'prompt_output_pair_id'")
+                if "model_responses_json" not in interaction:
+                    errors.append(f"Row {index}: Each interaction must contain 'model_responses_json'")
+                for model_response in interaction["model_responses_json"]:
+                    for response_key, response_value in model_response.items():
+                        if "model_name" not in response_value:
+                            errors.append(f"Row {index}: {response_key} should contain 'model_name'")
+                        if "output" not in response_value:
+                            errors.append(f"Row {index}: {response_key} should contain 'output'")
+        except json.JSONDecodeError:
+            errors.append(f"Row {index}: multiple_interaction_json must be valid JSON")
+
+        # Validate language
+        if row["language"] not in valid_languages:
+            errors.append(f"Row {index}: Invalid language '{row['language']}'")
+
+        # Validate datetime
+        if row["datetime"]:
+            try:
+                datetime.fromisoformat(row["datetime"])
+            except ValueError:
+                errors.append(f"Row {index}: Invalid datetime format for '{row['datetime']}'")
+
+        # Validate optional fields
+        for field in optional_fields:
+            if row[field]:
+                if field in ["no_of_turns", "no_of_models"] and not row[field].isdigit():
+                    errors.append(f"Row {index}: {field} should be an integer if provided")
+
+    return errors
diff --git a/backend/dataset/views.py b/backend/dataset/views.py
@@ -3,7 +3,7 @@
 import re
 from base64 import b64encode
 from urllib.parse import parse_qsl
-
+from validatemod import MultiModelInteractionValidator
 from django.apps import apps
 from django.db.models import Q
 from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
@@ -382,7 +382,17 @@ def upload(self, request, pk):
                 },
                 status=status.HTTP_400_BAD_REQUEST,
             )
-
+
+        # Conditionally apply MultiModelInteractionValidator based on dataset type
+        if dataset_type == "MultiModelInteraction":
+              # Replace with the specific type identifier for MultiModelInteraction
+            validator = MultiModelInteractionValidator()
+            validation_errors = validator.validate_data(dataset_string)
+            if validation_errors:
+                return Response(
+                {"message": "Validation failed", "errors": validation_errors},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
         # Uplod the dataset to the dataset instance
         upload_data_to_data_instance.delay(
             pk=pk,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,4 +15,4 @@ COPY . /usr/src/backend/

		RUN ["chmod", "+x", "/usr/src/backend/entrypoint.sh"]

		ENTRYPOINT ["sh", "/usr/src/backend/entrypoint.sh"]
		ENTRYPOINT ["sh", "/usr/src/backend/entrypoint.sh"]
Original file line number	Diff line number	Diff line change
		@@ -1,3 +0,0 @@
		from django.test import TestCase

		# Create your tests here.