-
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Improves metadata about batches. * Supports multiple batches per downloader/processor job. * Adds organisms model for retrieving NCBI taxonmy ids.
- Loading branch information
1 parent
629709f
commit ce380e5
Showing
45 changed files
with
1,683 additions
and
597 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# Project specific files | ||
workers/volume | ||
foreman/volume | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from data_refinery_models.models.surveys import SurveyJob, SurveyJobKeyValue | ||
from data_refinery_models.models.batches import ( | ||
BatchStatuses, | ||
Batch, | ||
BatchKeyValue | ||
) | ||
from data_refinery_models.models.jobs import ( | ||
DownloaderJob, | ||
ProcessorJob, | ||
DownloaderJobsToBatches, | ||
ProcessorJobsToBatches | ||
) | ||
from data_refinery_models.models.organism import Organism |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from django.db import models | ||
from django.utils import timezone | ||
|
||
|
||
class TimeTrackedModel(models.Model): | ||
"""Base model with auto created_at and updated_at fields.""" | ||
|
||
created_at = models.DateTimeField(editable=False) | ||
updated_at = models.DateTimeField() | ||
|
||
def save(self, *args, **kwargs): | ||
""" On save, update timestamps """ | ||
current_time = timezone.now() | ||
if not self.id: | ||
self.created_at = current_time | ||
self.updated_at = current_time | ||
return super(TimeTrackedModel, self).save(*args, **kwargs) | ||
|
||
class Meta: | ||
abstract = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from enum import Enum | ||
from django.db import models | ||
from data_refinery_models.models.base_models import TimeTrackedModel | ||
from data_refinery_models.models.surveys import SurveyJob | ||
|
||
|
||
class BatchStatuses(Enum): | ||
"""Valid values for the status field of the Batch model.""" | ||
|
||
NEW = "NEW" | ||
DOWNLOADED = "DOWNLOADED" | ||
PROCESSED = "PROCESSED" | ||
|
||
|
||
class Batch(TimeTrackedModel): | ||
"""Represents a batch of data. | ||
The definition of a Batch is intentionally that vague. What a batch | ||
is will vary from source to source. It could be a single file, or | ||
a group of files with some kind of logical grouping such as an | ||
experiment. | ||
""" | ||
|
||
survey_job = models.ForeignKey(SurveyJob, on_delete=models.PROTECT) | ||
source_type = models.CharField(max_length=256) | ||
size_in_bytes = models.IntegerField() | ||
download_url = models.CharField(max_length=4096) | ||
raw_format = models.CharField(max_length=256, null=True) | ||
processed_format = models.CharField(max_length=256, null=True) | ||
pipeline_required = models.CharField(max_length=256) | ||
platform_accession_code = models.CharField(max_length=32) | ||
experiment_accession_code = models.CharField(max_length=32) | ||
experiment_title = models.CharField(max_length=256) | ||
status = models.CharField(max_length=20) | ||
release_date = models.DateField() | ||
last_uploaded_date = models.DateField() | ||
name = models.CharField(max_length=1024) | ||
|
||
# This field will denote where in our system the file can be found. | ||
internal_location = models.CharField(max_length=256, null=True) | ||
|
||
# This corresponds to the organism taxonomy ID from NCBI. | ||
organism_id = models.IntegerField() | ||
# This is the organism name as it appeared in the experiment. | ||
organism_name = models.CharField(max_length=256) | ||
|
||
class Meta: | ||
db_table = "batches" | ||
|
||
|
||
class BatchKeyValue(TimeTrackedModel): | ||
"""Tracks additional fields for Batches. | ||
Useful for fields that would be sparsely populated if they were | ||
their own columns. I.e. one source may have an extra field or two | ||
that are worth tracking but are specific to that source. | ||
""" | ||
|
||
batch = models.ForeignKey(Batch, on_delete=models.CASCADE) | ||
key = models.CharField(max_length=256) | ||
value = models.CharField(max_length=256) | ||
|
||
class Meta: | ||
db_table = "batch_key_values" |
Oops, something went wrong.