Skip to content

Commit

Permalink
Unique jobs key
Browse files Browse the repository at this point in the history
  • Loading branch information
davegaeddert committed Jan 17, 2024
1 parent 79830e0 commit 84d3c9d
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 40 deletions.
22 changes: 0 additions & 22 deletions bolt-jobs/bolt/jobs/gid.py

This file was deleted.

104 changes: 86 additions & 18 deletions bolt-jobs/bolt/jobs/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from bolt.db.models import Model

from .gid import GlobalID


def load_job(job_class_path, parameters):
module_path, class_name = job_class_path.rsplit(".", 1)
Expand All @@ -21,14 +19,14 @@ def to_json(args, kwargs):
serialized_args = []
for arg in args:
if isinstance(arg, Model):
serialized_args.append(GlobalID.from_instance(arg))
serialized_args.append(ModelInstanceParameter.from_instance(arg))
else:
serialized_args.append(arg)

serialized_kwargs = {}
for key, value in kwargs.items():
if isinstance(value, Model):
serialized_kwargs[key] = GlobalID.from_instance(value)
serialized_kwargs[key] = ModelInstanceParameter.from_instance(value)
else:
serialized_kwargs[key] = value

Expand All @@ -38,21 +36,49 @@ def to_json(args, kwargs):
def from_json(data):
args = []
for arg in data["args"]:
if GlobalID.is_gid(arg):
args.append(GlobalID.to_instance(arg))
if ModelInstanceParameter.is_gid(arg):
args.append(ModelInstanceParameter.to_instance(arg))
else:
args.append(arg)

kwargs = {}
for key, value in data["kwargs"].items():
if GlobalID.is_gid(value):
kwargs[key] = GlobalID.to_instance(value)
if ModelInstanceParameter.is_gid(value):
kwargs[key] = ModelInstanceParameter.to_instance(value)
else:
kwargs[key] = value

return args, kwargs


class ModelInstanceParameter:
"""
A string representation of a model instance,
so we can convert a single parameter (model instance itself)
into a string that can be serialized and stored in the database.
"""

@staticmethod
def from_instance(instance):
return f"gid://{instance._meta.package_label}/{instance._meta.model_name}/{instance.pk}"

@staticmethod
def to_instance(s):
if not s.startswith("gid://"):
raise ValueError("Invalid ModelInstanceParameter string")
package, model, pk = s[6:].split("/")
from bolt.packages import packages

model = packages.get_model(package, model)
return model.objects.get(pk=pk)

@staticmethod
def is_gid(x):
if not isinstance(x, str):
return False
return x.startswith("gid://")


class JobType(type):
"""
Metaclass allows us to capture the original args/kwargs
Expand All @@ -68,7 +94,15 @@ def __call__(self, *args, **kwargs):


class Job(metaclass=JobType):
def run(self):
raise NotImplementedError

def run_in_background(self, start_at: datetime.datetime | None = None):
from .models import JobRequest

if unique_existing := self._get_existing_unique_job_or_request():
return unique_existing

try:
# Try to automatically annotate the source of the job
caller = inspect.stack()[1]
Expand All @@ -78,22 +112,56 @@ def run_in_background(self, start_at: datetime.datetime | None = None):

parameters = JobParameters.to_json(self._init_args, self._init_kwargs)

from .models import JobRequest

priority = self.get_priority()
retries = self.get_retries()

return JobRequest.objects.create(
job_class=f"{self.__module__}.{self.__class__.__name__}",
job_class=self._job_class_str(),
parameters=parameters,
priority=priority,
priority=self.get_priority(),
source=source,
retries=retries,
retries=self.get_retries(),
start_at=start_at,
)

def run(self):
raise NotImplementedError
def _job_class_str(self):
return f"{self.__module__}.{self.__class__.__name__}"

def _get_existing_unique_job_or_request(self):
"""
Find pending or running versions of this job that already exist.
Note this doesn't include instances that may have failed and are
not yet queued for retry.
"""
from .models import Job, JobRequest

job_class = self._job_class_str()
unique_key = self.get_unique_key()

if not unique_key:
return None

try:
return JobRequest.objects.get(
job_class=job_class,
unique_key=unique_key,
)
except JobRequest.DoesNotExist:
pass

try:
return Job.objects.get(
job_class=job_class,
unique_key=unique_key,
)
except Job.DoesNotExist:
pass

return None

def get_unique_key(self) -> str:
"""
A unique key to prevent duplicate jobs from being queued.
Enabled by returning a non-empty string.
"""
raise ""

def get_priority(self) -> int:
return 0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Bolt 5.0.dev20240117193239 on 2024-01-17 19:41

from bolt.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("boltqueue", "0013_alter_job_options_alter_jobresult_options_and_more"),
]

operations = [
migrations.AddField(
model_name="job",
name="unique_key",
field=models.CharField(blank=True, db_index=True, max_length=255),
),
migrations.AddField(
model_name="jobrequest",
name="unique_key",
field=models.CharField(blank=True, db_index=True, max_length=255),
),
migrations.AddField(
model_name="jobresult",
name="unique_key",
field=models.CharField(blank=True, db_index=True, max_length=255),
),
migrations.AddIndex(
model_name="job",
index=models.Index(
fields=["job_class", "unique_key"], name="job_class_unique_key"
),
),
migrations.AddIndex(
model_name="jobrequest",
index=models.Index(
fields=["job_class", "unique_key"], name="job_request_class_unique_key"
),
),
]
20 changes: 20 additions & 0 deletions bolt-jobs/bolt/jobs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class JobRequest(models.Model):
retries = models.IntegerField(default=0)
retry_attempt = models.IntegerField(default=0)

unique_key = models.CharField(max_length=255, blank=True, db_index=True)

start_at = models.DateTimeField(blank=True, null=True, db_index=True)

# context
Expand All @@ -48,6 +50,12 @@ class JobRequest(models.Model):

class Meta:
ordering = ["priority", "-created_at"]
indexes = [
# Used to dedupe unique in-process jobs
models.Index(
name="job_request_class_unique_key", fields=["job_class", "unique_key"]
),
]

def __str__(self):
return f"{self.job_class} [{self.uuid}]"
Expand All @@ -65,6 +73,7 @@ def convert_to_job(self):
source=self.source,
retries=self.retries,
retry_attempt=self.retry_attempt,
unique_key=self.unique_key,
)

# Delete the pending JobRequest now
Expand Down Expand Up @@ -107,11 +116,18 @@ class Job(models.Model):
source = models.TextField(blank=True)
retries = models.IntegerField(default=0)
retry_attempt = models.IntegerField(default=0)
unique_key = models.CharField(max_length=255, blank=True, db_index=True)

objects = JobQuerySet.as_manager()

class Meta:
ordering = ["-created_at"]
indexes = [
# Used to dedupe unique in-process jobs
models.Index(
name="job_class_unique_key", fields=["job_class", "unique_key"]
),
]

def run(self):
# This is how we know it has been picked up
Expand Down Expand Up @@ -149,6 +165,7 @@ def convert_to_result(self, *, status, error=""):
source=self.source,
retries=self.retries,
retry_attempt=self.retry_attempt,
unique_key=self.unique_key,
)

# Delete the Job now
Expand Down Expand Up @@ -219,6 +236,7 @@ class JobResult(models.Model):
source = models.TextField(blank=True)
retries = models.IntegerField(default=0)
retry_attempt = models.IntegerField(default=0)
unique_key = models.CharField(max_length=255, blank=True, db_index=True)

# Retries
retry_job_request_uuid = models.UUIDField(blank=True, null=True)
Expand Down Expand Up @@ -250,6 +268,8 @@ def retry_job(self, delay: int | None = None):
priority=self.priority,
source=self.source,
retries=self.retries,
unique_key=self.unique_key,
# For the retry
retry_attempt=retry_attempt,
start_at=start_at,
)
Expand Down

0 comments on commit 84d3c9d

Please sign in to comment.