Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor test_capture_job.py #3415

Merged
merged 6 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions perma_web/api/tests/test_current_user_authorization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from .utils import ApiResourceTestCase
from perma.tests.test_capture_job import create_capture_job
from perma.models import LinkUser, Link
from perma.models import LinkUser, Link, CaptureJob


def create_capture_job(user, human=True):
link = Link(created_by=user, submitted_url="http://example.com")
link.save()
capture_job = CaptureJob(created_by=user, link=link, human=human, status='pending')
capture_job.save()
return capture_job


class CurrentUserAuthorizationTestCase(ApiResourceTestCase):
Expand Down
129 changes: 129 additions & 0 deletions perma_web/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,132 @@ def logged_in_user(page, urls, user):
password.type(user.password)
page.locator("button.btn.login").click()
return page


### ###
### New Fixtures ###
### ###

# As we modernize the test suite, we can start putting new fixtures here.
# The separation should make it easier to work out, going forward, what can be deleted.

import factory
from factory.django import DjangoModelFactory
import humps

from datetime import timezone as tz
from django.db.models import signals

from perma.models import LinkUser, Link, CaptureJob


### internal helpers ###

# functions used within this file to set up fixtures

def register_factory(cls):
"""
Decorator to take a factory class and inject test fixtures. For example,
@register_factory
class UserFactory
will inject the fixtures "user_factory" (equivalent to UserFactory) and "user" (equivalent to UserFactory()).
This is basically the same as the @register decorator provided by the pytest_factoryboy package,
but because it's simpler it seems to work better with RelatedFactory and SubFactory.
"""
snake_case_name = humps.decamelize(cls.__name__)

@pytest.fixture
def factory_fixture(db):
return cls

@pytest.fixture
def instance_fixture(db):
return cls()

globals()[snake_case_name] = factory_fixture
globals()[snake_case_name.rsplit('_factory', 1)[0]] = instance_fixture

return cls


### model factories ###

@register_factory
class LinkUserFactory(DjangoModelFactory):
class Meta:
model = LinkUser

first_name = factory.Faker('first_name')
last_name = factory.Faker('last_name')
email = factory.Sequence(lambda n: 'user%[email protected]' % n)
is_active = True
is_confirmed=True

password = factory.PostGenerationMethodCall('set_password', 'pass')


@register_factory
class CaptureJobFactory(DjangoModelFactory):
class Meta:
model = CaptureJob
exclude = ('create_link',)

created_by = factory.SubFactory(LinkUserFactory)
submitted_url = factory.Faker('url')

create_link = True
link = factory.Maybe(
'create_link',
yes_declaration=factory.RelatedFactory(
'conftest.LinkFactory',
factory_related_name='capture_job',
create_pending_capture_job=False,
created_by=factory.SelfAttribute('..created_by'),
submitted_url=factory.SelfAttribute('..submitted_url'),
),
no_declaration=None
)


@register_factory
class InvalidCaptureJobFactory(CaptureJobFactory):
submitted_url = 'not-a-valid-url'
status = 'invalid'
message = {'url': ['Not a valid URL.']}
create_link = False


@register_factory
class PendingCaptureJobFactory(CaptureJobFactory):
status = 'pending'


@register_factory
class InProgressCaptureJobFactory(PendingCaptureJobFactory):
status = 'in_progress'
capture_start_time = factory.Faker('future_datetime', end_date='+1m', tzinfo=tz.utc)
step_count = factory.Faker('pyfloat', min_value=1, max_value=10)
step_description = factory.Faker('text', max_nb_chars=15)


@register_factory
@factory.django.mute_signals(signals.pre_save)
class LinkFactory(DjangoModelFactory):
class Meta:
model = Link
exclude = ('create_pending_capture_job', 'created_by')

created_by = factory.SubFactory(LinkUserFactory)
submitted_url = factory.Faker('url')

create_pending_capture_job = True
capture_job = factory.Maybe(
'create_pending_capture_job',
yes_declaration=factory.SubFactory(
PendingCaptureJobFactory,
created_by=factory.SelfAttribute('..created_by'),
submitted_url=factory.SelfAttribute('..submitted_url'),
create_link=False
),
no_declaration=None
)
171 changes: 78 additions & 93 deletions perma_web/perma/tests/test_capture_job.py
Original file line number Diff line number Diff line change
@@ -1,124 +1,109 @@
from concurrent.futures import ThreadPoolExecutor
from datetime import timedelta
import json
from multiprocessing.pool import ThreadPool
import pytest

from django.conf import settings
from django.test import TransactionTestCase
from django.db import connections
from django.utils import timezone
from rest_framework.settings import api_settings

from perma.models import CaptureJob, Link, LinkUser
from perma.models import CaptureJob
from perma.celery_tasks import clean_up_failed_captures

# TODO:
# - check retry behavior

# lives outside CaptureJobTestCase so it can be used by other tests
def create_capture_job(user, human=True):
link = Link(created_by=user, submitted_url="http://example.com")
link.save()
capture_job = CaptureJob(created_by=user, link=link, human=human, status='pending')
capture_job.save()
return capture_job
def test_job_queue_order(link_user_factory, pending_capture_job_factory):
""" Jobs should be processed round-robin, one per user. """

user_one = link_user_factory()
user_two = link_user_factory()

class CaptureJobTestCase(TransactionTestCase):
jobs = [
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_two, human=True),

fixtures = [
'fixtures/users.json',
'fixtures/folders.json',
]

def setUp(self):
super(CaptureJobTestCase, self).setUp()

self.user_one = LinkUser.objects.get(pk=1)
self.user_two = LinkUser.objects.get(pk=2)

self.maxDiff = None # let assertListEqual compare large lists

### TESTS ###
pending_capture_job_factory(created_by=user_two),

def test_job_queue_order(self):
""" Jobs should be processed round-robin, one per user. """
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_one, human=True),
pending_capture_job_factory(created_by=user_two, human=True),
]

jobs = [
create_capture_job(self.user_one),
create_capture_job(self.user_one),
create_capture_job(self.user_one),
create_capture_job(self.user_two),
expected_order = [
0, 3, # u1, u2
1, 8, # u1, u2
2, 5, 6, 7, # remaining u1 jobs
4 # robots queue
]

create_capture_job(self.user_two, human=False),
# test CaptureJob.queue_position
for i, job in enumerate(jobs):
queue_position = job.queue_position()
expected_queue_position = expected_order.index(i)+1
assert queue_position == expected_queue_position, f"Job {i} has queue position {queue_position}, should be {expected_queue_position}."

create_capture_job(self.user_one),
create_capture_job(self.user_one),
create_capture_job(self.user_one),
create_capture_job(self.user_two),
]
# test CaptureJob.get_next_job
expected_next_jobs = [jobs[i] for i in expected_order]
next_jobs = [CaptureJob.get_next_job(reserve=True) for i in range(len(jobs))]
assert next_jobs == expected_next_jobs

expected_order = [
0, 3, # u1, u2
1, 8, # u1, u2
2, 5, 6, 7, # remaining u1 jobs
4 # robots queue
]

# test CaptureJob.queue_position
for i, job in enumerate(jobs):
queue_position = job.queue_position()
expected_queue_position = expected_order.index(i)+1
self.assertEqual(queue_position, expected_queue_position, "Job %s has queue position %s, should be %s." % (i, queue_position, expected_queue_position))
@pytest.mark.django_db(transaction=True)
def test_race_condition_prevented(pending_capture_job_factory):
""" Fetch two jobs at the same time in threads and make sure same job isn't returned to both. """
jobs = [
pending_capture_job_factory(),
pending_capture_job_factory()
]

# test CaptureJob.get_next_job
expected_next_jobs = [jobs[i] for i in expected_order]
next_jobs = [CaptureJob.get_next_job(reserve=True) for i in range(len(jobs))]
self.assertListEqual(next_jobs, expected_next_jobs)
def get_next_job(i):
job = CaptureJob.get_next_job(reserve=True)
for connection in connections.all():
connection.close()
return job

def test_race_condition_prevented(self):
""" Fetch two jobs at the same time in threads and make sure same job isn't returned to both. """
jobs = [
create_capture_job(self.user_one),
create_capture_job(self.user_one)
]
CaptureJob.TEST_PAUSE_TIME = .1
with ThreadPoolExecutor(max_workers=2) as e:
fetched_jobs = e.map(get_next_job, range(2))
CaptureJob.TEST_PAUSE_TIME = 0

def get_next_job(i):
return CaptureJob.get_next_job(reserve=True)
assert set(jobs) == set(fetched_jobs)

CaptureJob.TEST_PAUSE_TIME = .1
fetched_jobs = ThreadPool(2).map(get_next_job, range(2))
CaptureJob.TEST_PAUSE_TIME = 0

self.assertSetEqual(set(jobs), set(fetched_jobs))
@pytest.mark.django_db(transaction=True)
def test_race_condition_not_prevented(pending_capture_job_factory):
"""
Make sure that test_race_condition_prevented is passing for the right reason --
should fail if race condition protection is disabled.
"""
CaptureJob.TEST_ALLOW_RACE = True
with pytest.raises(AssertionError, match="Extra items in the left set"):
test_race_condition_prevented(pending_capture_job_factory)
CaptureJob.TEST_ALLOW_RACE = False

def test_race_condition_not_prevented(self):
"""
Make sure that test_race_condition_prevented is passing for the right reason --
should fail if race condition protection is disabled.
"""
CaptureJob.TEST_ALLOW_RACE = True
self.assertRaisesRegex(AssertionError, r'^Items in the', self.test_race_condition_prevented)
CaptureJob.TEST_ALLOW_RACE = False

def test_hard_timeout(self):
create_capture_job(self.user_one)
def test_hard_timeout(pending_capture_job):

# simulate a failed run_next_capture()
job = CaptureJob.get_next_job(reserve=True)
# simulate a failed run_next_capture()
job = CaptureJob.get_next_job(reserve=True)

# capture_start_time should be set accurately on the server side
self.assertLess((job.capture_start_time - timezone.now()).total_seconds(), 60)
# capture_start_time should be set accurately on the server side
assert (job.capture_start_time - timezone.now()).total_seconds() < 60

# clean_up_failed_captures shouldn't affect job, since timeout hasn't passed
clean_up_failed_captures()
job.refresh_from_db()
self.assertEqual(job.status, "in_progress")
# clean_up_failed_captures shouldn't affect job, since timeout hasn't passed
clean_up_failed_captures()
job.refresh_from_db()
assert job.status == "in_progress"

# once job is sufficiently old, clean_up_failed_captures should mark it as failed
job.capture_start_time -= timedelta(seconds=settings.CELERY_TASK_TIME_LIMIT+60)
job.save()
clean_up_failed_captures()
job.refresh_from_db()
self.assertEqual(job.status, "failed")
# once job is sufficiently old, clean_up_failed_captures should mark it as failed
job.capture_start_time -= timedelta(seconds=settings.CELERY_TASK_TIME_LIMIT+60)
job.save()
clean_up_failed_captures()
job.refresh_from_db()
assert job.status == "failed"

# failed jobs will have a message indicating failure reason
self.assertEqual(json.loads(job.message)[api_settings.NON_FIELD_ERRORS_KEY][0], "Timed out.")
# failed jobs will have a message indicating failure reason
assert json.loads(job.message)[api_settings.NON_FIELD_ERRORS_KEY][0] == "Timed out."
2 changes: 2 additions & 0 deletions perma_web/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ PyNaCl # encryption
beautifulsoup4 # parses html of responses
coverage # record code coverage
django-admin-smoke-tests # basic tests for the Django admin
factory-boy # create django model fixtures on demand
fakeredis[lua] # simulate redis backend for tests
flake8 # code linting
hypothesis # run tests with lots of generated input
Expand All @@ -88,6 +89,7 @@ pytest-django # tools for django in pytest
pytest-django-ordering # runs transaction-wrapped tests before table truncating tests
pytest-xdist # run tests in parallel
pytest # test runner
pyhumps # a helper for dealing with camel case and snake case
pytest-playwright # integration tests
pytest-django-liveserver-ssl # integration tests w/SSL
django-extensions # runserver_plus for SSL
Expand Down
Loading
Loading