Skip to content

Commit

Permalink
feat(file): write checksum of file in hook view
Browse files Browse the repository at this point in the history
  • Loading branch information
anehx authored Nov 10, 2023
1 parent 99c4851 commit 0011c81
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 29 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ A list of configuration options which you need
- Check the docker-compose file for an example on how to set up generation with s3 hooks
- `ALEXANDRIA_THUMBNAIL_WIDTH`: Width of generated thumbnails
- `ALEXANDRIA_THUMBNAIL_HEIGHT`: Height of generated thumbnails
- `ALEXANDRIA_ENABLE_CHECKSUM`: Set to `false` to disable file checksums. Checksums are calculated after upload to allow later verification (not implemented in Alexandria)

For development, you can also set the following environemnt variables to help you:

Expand Down
19 changes: 19 additions & 0 deletions alexandria/core/migrations/0009_file_checksum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 3.2.22 on 2023-11-09 14:58

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("alexandria_core", "0008_document_date"),
]

operations = [
migrations.AddField(
model_name="file",
name="checksum",
field=models.CharField(
blank=True, max_length=255, null=True, verbose_name="checksum"
),
),
]
1 change: 1 addition & 0 deletions alexandria/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ class File(UUIDModel):
document = models.ForeignKey(
Document, on_delete=models.CASCADE, related_name="files"
)
checksum = models.CharField(_("checksum"), max_length=255, null=True, blank=True)

UNDEFINED = "undefined"
COMPLETED = "completed"
Expand Down
1 change: 1 addition & 0 deletions alexandria/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class Meta:
"download_url",
"upload_url",
"upload_status",
"checksum",
)


Expand Down
39 changes: 23 additions & 16 deletions alexandria/core/tests/snapshots/snap_test_api.py

Large diffs are not rendered by default.

82 changes: 82 additions & 0 deletions alexandria/core/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def test_hook_view(
],
}

settings.ALEXANDRIA_ENABLE_CHECKSUM = False
settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION = enabled

if status_code == HTTP_201_CREATED:
Expand Down Expand Up @@ -233,6 +234,7 @@ def test_manual_thumbnail(
file_name,
status_code,
):
settings.ALEXANDRIA_ENABLE_CHECKSUM = False
settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION = enabled

file = file_factory(variant=variant, name=file_name)
Expand Down Expand Up @@ -416,3 +418,83 @@ def test_document_delete_some_tags(admin_client, tag_factory, document_factory):
assert set(Tag.objects.all().values_list("slug", flat=True)) == set(
[tag_1.slug, tag_2.slug, tag_3.slug]
)


@pytest.mark.parametrize(
"enabled,status_code", [(True, HTTP_201_CREATED), (False, HTTP_403_FORBIDDEN)]
)
def test_checksum(
admin_client,
minio_mock,
mock_s3storage,
document_factory,
settings,
enabled,
status_code,
):
data = {
"EventName": "s3:ObjectCreated:Put",
"Key": "alexandria-media/218b2504-1736-476e-9975-dc5215ef4f01_test.png",
"Records": [
{
"eventVersion": "2.0",
"eventSource": "minio:s3",
"awsRegion": "",
"eventTime": "2020-07-17T06:38:23.221Z",
"eventName": "s3:ObjectCreated:Put",
"userIdentity": {"principalId": "minio"},
"requestParameters": {
"accessKey": "minio",
"region": "",
"sourceIPAddress": "172.20.0.1",
},
"responseElements": {
"x-amz-request-id": "162276DB8350E531",
"x-minio-deployment-id": "5db7c8da-79cb-4d3a-8d40-189b51ca7aa6",
"x-minio-origin-endpoint": "http://172.20.0.2:9000",
},
"s3": {
"s3SchemaVersion": "1.0",
"configurationId": "Config",
"bucket": {
"name": "alexandria-media",
"ownerIdentity": {"principalId": "minio"},
"arn": "arn:aws:s3:::alexandria-media",
},
"object": {
"key": "218b2504-1736-476e-9975-dc5215ef4f01_test.png",
"size": 299758,
"eTag": "af1421c17294eed533ec99eb82b468fb",
"contentType": "application/pdf",
"userMetadata": {"content-variant": "application/pdf"},
"versionId": "1",
"sequencer": "162276DB83A9F895",
},
},
"source": {
"host": "172.20.0.1",
"port": "",
"userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) QtWebEngine/5.15.0 Chrome/80.0.3987.163 Safari/537.36",
},
}
],
}

settings.ALEXANDRIA_ENABLE_CHECKSUM = enabled
settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION = False

document = document_factory()
file = File.objects.create(
document=document, name="test.png", pk="218b2504-1736-476e-9975-dc5215ef4f01"
)

resp = admin_client.post(reverse("hook"), data=data)
assert resp.status_code == status_code

if status_code == HTTP_201_CREATED:
file.refresh_from_db()

assert (
file.checksum
== "sha256:778caf7d8d81a7ff8041003ef01afe00a85750d15086a3cb267fd8d23d8dd285"
)
16 changes: 13 additions & 3 deletions alexandria/core/thumbs.py → alexandria/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import os
from pathlib import Path
from tempfile import TemporaryDirectory
Expand All @@ -13,7 +14,7 @@
from .storage_clients import client


def create_thumbnail(file):
def get_file(file):
# TODO: this should be run by a task queue
data = client.get_object(file.object_name)

Expand All @@ -24,6 +25,10 @@ def create_thumbnail(file):
for d in data.stream(32 * 1024):
f.write(d)

return temp_dir, temp_filepath


def create_thumbnail(file, temp_dir, temp_filepath):
manager = PreviewManager(str(temp_dir.name))

preview_kwargs = {}
Expand Down Expand Up @@ -55,6 +60,11 @@ def create_thumbnail(file):
thumb_file.upload_status = File.COMPLETED if result.ok else File.ERROR
thumb_file.save()

temp_dir.cleanup()

return result.ok


def get_checksum(temp_filepath):
with open(temp_filepath, "rb") as f:
checksum = hashlib.sha256(f.read()).hexdigest()

return f"sha256:{checksum}"
32 changes: 24 additions & 8 deletions alexandria/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from . import models, serializers
from .filters import CategoryFilterSet, DocumentFilterSet, FileFilterSet, TagFilterSet
from .storage_clients import client
from .thumbs import create_thumbnail
from .utils import create_thumbnail, get_checksum, get_file


class PermissionViewMixin:
Expand Down Expand Up @@ -185,7 +185,8 @@ def generate_thumbnail(self, request, pk=None):
_("File already has thumbnail, cannot generate multiple thumbnails.")
)

etag = create_thumbnail(file)
temp_dir, temp_filepath = get_file(file)
etag = create_thumbnail(file, temp_dir, temp_filepath)

if not etag:
return Response(
Expand All @@ -199,8 +200,11 @@ def generate_thumbnail(self, request, pk=None):


@require_http_methods(["HEAD", "POST"])
def hook_view(request):
if not settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION:
def hook_view(request): # noqa: C901
if (
not settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION
and not settings.ALEXANDRIA_ENABLE_CHECKSUM
):
return HttpResponse(status=HTTP_403_FORBIDDEN)

if request.method == "HEAD":
Expand All @@ -226,13 +230,25 @@ def hook_view(request):
response_statuses.append(HTTP_200_OK)
continue

temp_dir, temp_filepath = get_file(file)

file.upload_status = models.File.COMPLETED
update_fields = ["upload_status"]

if settings.ALEXANDRIA_ENABLE_CHECKSUM:
file.checksum = get_checksum(temp_filepath)
update_fields.append("checksum")

file.save()

created = create_thumbnail(file)
if created is False:
response_statuses.append(HTTP_200_OK)
continue
if settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION:
created = create_thumbnail(file, temp_dir, temp_filepath)

if created is False:
response_statuses.append(HTTP_200_OK)
continue

temp_dir.cleanup()

response_statuses.append(HTTP_201_CREATED)

Expand Down
3 changes: 3 additions & 0 deletions alexandria/settings/alexandria.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@ def default(default_dev=env.NOTSET, default_prod=env.NOTSET):
)
ALEXANDRIA_THUMBNAIL_WIDTH = env.int("ALEXANDRIA_THUMBNAIL_WIDTH", default=None)
ALEXANDRIA_THUMBNAIL_HEIGHT = env.int("ALEXANDRIA_THUMBNAIL_HEIGHT", default=None)

# Checksums
ALEXANDRIA_ENABLE_CHECKSUM = env.bool("ALEXANDRIA_ENABLE_CHECKSUM", default=True)
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ services:
# same as postgres password above
# - DATABASE_PASSWORD=
minio:
image: minio/minio:RELEASE.2023-07-21T21-12-44Z
image: minio/minio:RELEASE.2023-11-06T22-26-08Z
volumes:
- minio_data:/data
ports:
Expand All @@ -40,7 +40,7 @@ services:
- MINIO_NOTIFY_WEBHOOK_ENDPOINT_ALEXANDRIA=https://your-endpoint.tdl
command: server data --console-address ":9090"
mc:
image: minio/mc:RELEASE.2023-07-21T20-44-27Z
image: minio/mc:RELEASE.2023-11-06T04-19-23Z
restart: on-failure
environment:
- MINIO_ROOT_USER=very
Expand Down

0 comments on commit 0011c81

Please sign in to comment.