diff --git a/.gitignore b/.gitignore
index 5fbeb8b24..5073009a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Project specific files
 workers/volume
+foreman/volume
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
index 8fce14b0e..7305e1a17 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,9 @@ supported by Greene Lab.
 
 ## Getting Started
 
-Note: The following steps assume you have already installed PostgreSQL (>=9.4) and Python (Most versions should work, but this has been tested with Python 3.5) on Ubuntu (Tested with 16.04. It should be possible to use other versions or even a Mac though).
+Note: The following steps assume you have already installed PostgreSQL (>=9.4)
+and Python (>=3.5) on Ubuntu (Tested with 16.04. It should be possible to use
+other versions or even a Mac though).
 
 Run `./install.sh` to set up the virtualenv. It will activate the `dr_env`
 for you the first time. This virtualenv is valid for the entire data_refinery
@@ -18,6 +20,18 @@ instructions on doing so.
 
 ## Development
 
+R files in this repo follow
+[Google's R Style Guide](https://google.github.io/styleguide/Rguide.xml).
+Python Files in this repo follow
+[PEP 8](https://www.python.org/dev/peps/pep-0008/). All files (including
+python and R) have a line limit of 100 characters.
+
+A `setup.cfg` file has been included in the root of this repo which specifies
+the line length limit for the autopep8 and flake8 linters. If you run either
+of those programs from anywhere within the project's directory tree they will
+enforce a limit of 100 instead of 80. This will also be true for editors which
+rely on them.
+
 It can be useful to have an interactive python interpreter running within the
 context of the Docker container. The `run_shell.sh` script has been provided
 for this purpose. It is in the top level directory so that if you wish to
diff --git a/data_models/data_refinery_models/migrations/0001_initial.py b/data_models/data_refinery_models/migrations/0001_initial.py
index 78cc7d03c..92850c9fb 100644
--- a/data_models/data_refinery_models/migrations/0001_initial.py
+++ b/data_models/data_refinery_models/migrations/0001_initial.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Generated by Django 1.11 on 2017-05-02 18:50
+# Generated by Django 1.10.6 on 2017-05-26 15:12
 from __future__ import unicode_literals
 
 from django.db import migrations, models
@@ -22,14 +22,20 @@ class Migration(migrations.Migration):
                 ('updated_at', models.DateTimeField()),
                 ('source_type', models.CharField(max_length=256)),
                 ('size_in_bytes', models.IntegerField()),
-                ('download_url', models.CharField(max_length=2048)),
+                ('download_url', models.CharField(max_length=4096)),
                 ('raw_format', models.CharField(max_length=256, null=True)),
                 ('processed_format', models.CharField(max_length=256, null=True)),
                 ('pipeline_required', models.CharField(max_length=256)),
-                ('accession_code', models.CharField(max_length=32)),
+                ('platform_accession_code', models.CharField(max_length=32)),
+                ('experiment_accession_code', models.CharField(max_length=32)),
+                ('experiment_title', models.CharField(max_length=256)),
                 ('status', models.CharField(max_length=20)),
+                ('release_date', models.DateField()),
+                ('last_uploaded_date', models.DateField()),
+                ('name', models.CharField(max_length=1024)),
                 ('internal_location', models.CharField(max_length=256, null=True)),
-                ('organism', models.IntegerField()),
+                ('organism_id', models.IntegerField()),
+                ('organism_name', models.CharField(max_length=256)),
             ],
             options={
                 'db_table': 'batches',
@@ -60,12 +66,38 @@ class Migration(migrations.Migration):
                 ('success', models.NullBooleanField()),
                 ('num_retries', models.IntegerField(default=0)),
                 ('worker_id', models.CharField(max_length=256, null=True)),
-                ('batch', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.Batch')),
             ],
             options={
                 'db_table': 'downloader_jobs',
             },
         ),
+        migrations.CreateModel(
+            name='DownloaderJobsToBatches',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('created_at', models.DateTimeField(editable=False)),
+                ('updated_at', models.DateTimeField()),
+                ('batch', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.Batch')),
+                ('downloader_job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.DownloaderJob')),
+            ],
+            options={
+                'db_table': 'downloader_jobs_to_batches',
+            },
+        ),
+        migrations.CreateModel(
+            name='Organism',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('created_at', models.DateTimeField(editable=False)),
+                ('updated_at', models.DateTimeField()),
+                ('name', models.CharField(max_length=256)),
+                ('taxonomy_id', models.IntegerField()),
+                ('is_scientific_name', models.BooleanField(default=False)),
+            ],
+            options={
+                'db_table': 'organisms',
+            },
+        ),
         migrations.CreateModel(
             name='ProcessorJob',
             fields=[
@@ -78,12 +110,24 @@ class Migration(migrations.Migration):
                 ('pipeline_applied', models.CharField(max_length=256)),
                 ('num_retries', models.IntegerField(default=0)),
                 ('worker_id', models.CharField(max_length=256, null=True)),
-                ('batch', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.Batch')),
             ],
             options={
                 'db_table': 'processor_jobs',
             },
         ),
+        migrations.CreateModel(
+            name='ProcessorJobsToBatches',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('created_at', models.DateTimeField(editable=False)),
+                ('updated_at', models.DateTimeField()),
+                ('batch', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.Batch')),
+                ('processor_job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.ProcessorJob')),
+            ],
+            options={
+                'db_table': 'processor_jobs_to_batches',
+            },
+        ),
         migrations.CreateModel(
             name='SurveyJob',
             fields=[
@@ -118,6 +162,6 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name='batch',
             name='survey_job',
-            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_refinery_models.SurveyJob'),
+            field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='data_refinery_models.SurveyJob'),
         ),
     ]
diff --git a/data_models/data_refinery_models/models.py b/data_models/data_refinery_models/models.py
deleted file mode 100644
index 4111afb1e..000000000
--- a/data_models/data_refinery_models/models.py
+++ /dev/null
@@ -1,138 +0,0 @@
-from django.db import models
-from django.utils import timezone
-from enum import Enum
-
-
-class TimeTrackedModel(models.Model):
-    created_at = models.DateTimeField(editable=False)
-    updated_at = models.DateTimeField()
-
-    def save(self, *args, **kwargs):
-        """ On save, update timestamps """
-        current_time = timezone.now()
-        if not self.id:
-            self.created_at = current_time
-        self.updated_at = current_time
-        return super(TimeTrackedModel, self).save(*args, **kwargs)
-
-    class Meta:
-        abstract = True
-
-
-class SurveyJob(TimeTrackedModel):
-    source_type = models.CharField(max_length=256)
-    success = models.NullBooleanField(null=True)
-
-    # The start time of the query used to replicate
-    replication_started_at = models.DateTimeField(null=True)
-
-    # The end time of the query used to replicate
-    replication_ended_at = models.DateTimeField(null=True)
-
-    # The start time of the job
-    start_time = models.DateTimeField(null=True)
-
-    # The end time of the job
-    end_time = models.DateTimeField(null=True)
-
-    class Meta:
-        db_table = "survey_jobs"
-
-
-class SurveyJobKeyValue(TimeTrackedModel):
-    """
-    This table is used for tracking fields onto a SurveyJob record that
-    would be sparsely populated if it was its own column.
-    I.e. one source may have an extra field or two that are worth
-    tracking but are specific to that source.
-    """
-    survey_job = models.ForeignKey(SurveyJob, on_delete=models.CASCADE)
-    key = models.CharField(max_length=256)
-    value = models.CharField(max_length=256)
-
-    class Meta:
-        db_table = "survey_job_key_values"
-
-
-class BatchStatuses(Enum):
-    NEW = "NEW"
-    DOWNLOADED = "DOWNLOADED"
-    PROCESSED = "PROCESSED"
-
-
-class Batch(TimeTrackedModel):
-    survey_job = models.ForeignKey(SurveyJob)
-    source_type = models.CharField(max_length=256)
-    size_in_bytes = models.IntegerField()
-    download_url = models.CharField(max_length=2048)
-    raw_format = models.CharField(max_length=256, null=True)
-    processed_format = models.CharField(max_length=256, null=True)
-    pipeline_required = models.CharField(max_length=256)
-    accession_code = models.CharField(max_length=32)
-    status = models.CharField(max_length=20)
-
-    # This field will denote where in our system the file can be found
-    internal_location = models.CharField(max_length=256, null=True)
-
-    # This will utilize the organism taxonomy ID from NCBI
-    organism = models.IntegerField()
-
-    class Meta:
-        db_table = "batches"
-
-
-class BatchKeyValue(TimeTrackedModel):
-    """
-    This table is used for tracking fields onto a Batch record that would
-    be sparsely populated if it was its own column.
-    I.e. one source may have an extra field or two that are worth tracking
-    but are specific to that source.
-    """
-    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
-    key = models.CharField(max_length=256)
-    value = models.CharField(max_length=256)
-
-    class Meta:
-        db_table = "batch_key_values"
-
-
-class ProcessorJob(TimeTrackedModel):
-    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
-    start_time = models.DateTimeField(null=True)
-    end_time = models.DateTimeField(null=True)
-    success = models.NullBooleanField(null=True)
-
-    # This field will contain an enumerated value specifying which processor
-    # pipeline was applied during the processor job.
-    pipeline_applied = models.CharField(max_length=256)
-
-    # This field represents how many times this job has been retried. It starts
-    # at 0 and each time the job has to be retried it will be incremented.
-    # At some point there will probably be some code like:
-    # if job.num_retries >= 3:
-    #     # do a bunch of logging
-    # else:
-    #     # retry the job
-    num_retries = models.IntegerField(default=0)
-
-    # This point of this field is to identify which worker ran the job.
-    # A few fields may actually be required or something other than just an id.
-    worker_id = models.CharField(max_length=256, null=True)
-
-    class Meta:
-        db_table = "processor_jobs"
-
-
-class DownloaderJob(TimeTrackedModel):
-    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
-    start_time = models.DateTimeField(null=True)
-    end_time = models.DateTimeField(null=True)
-    success = models.NullBooleanField(null=True)
-
-    # These two fields are analagous to the fields with the same names
-    # in ProcessorJob, see their descriptions for more information
-    num_retries = models.IntegerField(default=0)
-    worker_id = models.CharField(max_length=256, null=True)
-
-    class Meta:
-        db_table = "downloader_jobs"
diff --git a/data_models/data_refinery_models/models/__init__.py b/data_models/data_refinery_models/models/__init__.py
new file mode 100644
index 000000000..5f4af8c27
--- /dev/null
+++ b/data_models/data_refinery_models/models/__init__.py
@@ -0,0 +1,13 @@
+from data_refinery_models.models.surveys import SurveyJob, SurveyJobKeyValue
+from data_refinery_models.models.batches import (
+    BatchStatuses,
+    Batch,
+    BatchKeyValue
+)
+from data_refinery_models.models.jobs import (
+    DownloaderJob,
+    ProcessorJob,
+    DownloaderJobsToBatches,
+    ProcessorJobsToBatches
+)
+from data_refinery_models.models.organism import Organism
diff --git a/data_models/data_refinery_models/models/base_models.py b/data_models/data_refinery_models/models/base_models.py
new file mode 100644
index 000000000..cf9df7afc
--- /dev/null
+++ b/data_models/data_refinery_models/models/base_models.py
@@ -0,0 +1,20 @@
+from django.db import models
+from django.utils import timezone
+
+
+class TimeTrackedModel(models.Model):
+    """Base model with auto created_at and updated_at fields."""
+
+    created_at = models.DateTimeField(editable=False)
+    updated_at = models.DateTimeField()
+
+    def save(self, *args, **kwargs):
+        """ On save, update timestamps """
+        current_time = timezone.now()
+        if not self.id:
+            self.created_at = current_time
+        self.updated_at = current_time
+        return super(TimeTrackedModel, self).save(*args, **kwargs)
+
+    class Meta:
+        abstract = True
diff --git a/data_models/data_refinery_models/models/batches.py b/data_models/data_refinery_models/models/batches.py
new file mode 100644
index 000000000..2c7c0563f
--- /dev/null
+++ b/data_models/data_refinery_models/models/batches.py
@@ -0,0 +1,64 @@
+from enum import Enum
+from django.db import models
+from data_refinery_models.models.base_models import TimeTrackedModel
+from data_refinery_models.models.surveys import SurveyJob
+
+
+class BatchStatuses(Enum):
+    """Valid values for the status field of the Batch model."""
+
+    NEW = "NEW"
+    DOWNLOADED = "DOWNLOADED"
+    PROCESSED = "PROCESSED"
+
+
+class Batch(TimeTrackedModel):
+    """Represents a batch of data.
+
+    The definition of a Batch is intentionally that vague. What a batch
+    is will vary from source to source. It could be a single file, or
+    a group of files with some kind of logical grouping such as an
+    experiment.
+    """
+
+    survey_job = models.ForeignKey(SurveyJob, on_delete=models.PROTECT)
+    source_type = models.CharField(max_length=256)
+    size_in_bytes = models.IntegerField()
+    download_url = models.CharField(max_length=4096)
+    raw_format = models.CharField(max_length=256, null=True)
+    processed_format = models.CharField(max_length=256, null=True)
+    pipeline_required = models.CharField(max_length=256)
+    platform_accession_code = models.CharField(max_length=32)
+    experiment_accession_code = models.CharField(max_length=32)
+    experiment_title = models.CharField(max_length=256)
+    status = models.CharField(max_length=20)
+    release_date = models.DateField()
+    last_uploaded_date = models.DateField()
+    name = models.CharField(max_length=1024)
+
+    # This field will denote where in our system the file can be found.
+    internal_location = models.CharField(max_length=256, null=True)
+
+    # This corresponds to the organism taxonomy ID from NCBI.
+    organism_id = models.IntegerField()
+    # This is the organism name as it appeared in the experiment.
+    organism_name = models.CharField(max_length=256)
+
+    class Meta:
+        db_table = "batches"
+
+
+class BatchKeyValue(TimeTrackedModel):
+    """Tracks additional fields for Batches.
+
+    Useful for fields that would be sparsely populated if they were
+    their own columns. I.e. one source may have an extra field or two
+    that are worth tracking but are specific to that source.
+    """
+
+    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
+    key = models.CharField(max_length=256)
+    value = models.CharField(max_length=256)
+
+    class Meta:
+        db_table = "batch_key_values"
diff --git a/data_models/data_refinery_models/models/jobs.py b/data_models/data_refinery_models/models/jobs.py
new file mode 100644
index 000000000..f6827ffc3
--- /dev/null
+++ b/data_models/data_refinery_models/models/jobs.py
@@ -0,0 +1,73 @@
+from django.db import models
+from data_refinery_models.models.base_models import TimeTrackedModel
+from data_refinery_models.models.batches import Batch
+
+
+class ProcessorJob(TimeTrackedModel):
+    """Records information about running a processor."""
+
+    start_time = models.DateTimeField(null=True)
+    end_time = models.DateTimeField(null=True)
+    success = models.NullBooleanField(null=True)
+
+    # This field will contain an enumerated value specifying which processor
+    # pipeline was applied during the processor job.
+    pipeline_applied = models.CharField(max_length=256)
+
+    # This field represents how many times this job has been retried. It starts
+    # at 0 and each time the job has to be retried it will be incremented.
+    # At some point there will probably be some code like:
+    # if job.num_retries >= 3:
+    #     # do a bunch of logging
+    # else:
+    #     # retry the job
+    num_retries = models.IntegerField(default=0)
+
+    # This point of this field is to identify which worker ran the job.
+    # A few fields may actually be required or something other than just an id.
+    worker_id = models.CharField(max_length=256, null=True)
+
+    class Meta:
+        db_table = "processor_jobs"
+
+
+class ProcessorJobsToBatches(TimeTrackedModel):
+    """Represents a many to many relationship.
+
+    Maps between ProcessorJobs and Batches.
+    """
+
+    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
+    processor_job = models.ForeignKey(ProcessorJob, on_delete=models.CASCADE)
+
+    class Meta:
+        db_table = "processor_jobs_to_batches"
+
+
+class DownloaderJob(TimeTrackedModel):
+    """Records information about running a Downloader."""
+
+    start_time = models.DateTimeField(null=True)
+    end_time = models.DateTimeField(null=True)
+    success = models.NullBooleanField(null=True)
+
+    # These two fields are analagous to the fields with the same names
+    # in ProcessorJob, see their descriptions for more information
+    num_retries = models.IntegerField(default=0)
+    worker_id = models.CharField(max_length=256, null=True)
+
+    class Meta:
+        db_table = "downloader_jobs"
+
+
+class DownloaderJobsToBatches(TimeTrackedModel):
+    """Represents a many to many relationship.
+
+    Maps between DownloaderJobs and Batches.
+    """
+
+    batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
+    downloader_job = models.ForeignKey(DownloaderJob, on_delete=models.CASCADE)
+
+    class Meta:
+        db_table = "downloader_jobs_to_batches"
diff --git a/data_models/data_refinery_models/models/organism.py b/data_models/data_refinery_models/models/organism.py
new file mode 100644
index 000000000..a81d7921a
--- /dev/null
+++ b/data_models/data_refinery_models/models/organism.py
@@ -0,0 +1,129 @@
+import requests
+from xml.etree import ElementTree
+from django.db import models
+from data_refinery_models.models.base_models import TimeTrackedModel
+
+# Import and set logger
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+NCBI_ROOT_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+ESEARCH_URL = NCBI_ROOT_URL + "esearch.fcgi"
+EFETCH_URL = NCBI_ROOT_URL + "efetch.fcgi"
+TAXONOMY_DATABASE = "taxonomy"
+
+
+class UnscientificNameError(BaseException):
+    pass
+
+
+class InvalidNCBITaxonomyId(BaseException):
+    pass
+
+
+def get_scientific_name(taxonomy_id: int) -> str:
+    parameters = {"db": TAXONOMY_DATABASE, "id": str(taxonomy_id)}
+    response = requests.get(EFETCH_URL, parameters)
+
+    root = ElementTree.fromstring(response.text)
+    taxon_list = root.findall("Taxon")
+
+    if len(taxon_list) == 0:
+        logger.error("No names returned by ncbi.nlm.nih.gov for organism "
+                     + "with taxonomy ID %d.",
+                     taxonomy_id)
+        raise InvalidNCBITaxonomyId
+
+    return taxon_list[0].find("ScientificName").text.upper()
+
+
+def get_taxonomy_id(organism_name: str) -> int:
+    parameters = {"db": TAXONOMY_DATABASE, "term": organism_name}
+    response = requests.get(ESEARCH_URL, parameters)
+
+    root = ElementTree.fromstring(response.text)
+    id_list = root.find("IdList").findall("Id")
+
+    if len(id_list) == 0:
+        logger.error("Unable to retrieve NCBI taxonomy ID number for organism "
+                     + "with name: %s",
+                     organism_name)
+        return 0
+    elif len(id_list) > 1:
+        logger.warn("Organism with name %s returned multiple NCBI taxonomy ID "
+                    + "numbers.",
+                    organism_name)
+
+    return int(id_list[0].text)
+
+
+def get_taxonomy_id_scientific(organism_name: str) -> int:
+    parameters = {"db": TAXONOMY_DATABASE, "field": "scin", "term": organism_name}
+    response = requests.get(ESEARCH_URL, parameters)
+
+    root = ElementTree.fromstring(response.text)
+    id_list = root.find("IdList").findall("Id")
+
+    if len(id_list) == 0:
+        raise UnscientificNameError
+    elif len(id_list) > 1:
+        logger.warn("Organism with name %s returned multiple NCBI taxonomy ID "
+                    + "numbers.",
+                    organism_name)
+
+    return int(id_list[0].text)
+
+
+class Organism(TimeTrackedModel):
+    """Provides a lookup between organism name and taxonomy ids.
+
+    Should only be used via the two class methods get_name_for_id and
+    get_id_for_name. These methods will populate the database table
+    with any missing values by accessing the NCBI API.
+    """
+
+    name = models.CharField(max_length=256)
+    taxonomy_id = models.IntegerField()
+    is_scientific_name = models.BooleanField(default=False)
+
+    @classmethod
+    def get_name_for_id(cls, taxonomy_id: int) -> str:
+        try:
+            organism = (cls.objects
+                        .filter(taxonomy_id=taxonomy_id)
+                        .order_by("-is_scientific_name")
+                        [0])
+        except IndexError:
+            name = get_scientific_name(taxonomy_id).upper()
+            organism = Organism(name=name,
+                                taxonomy_id=taxonomy_id,
+                                is_scientific_name=True)
+            organism.save()
+
+        return organism.name
+
+    @classmethod
+    def get_id_for_name(cls, name: str) -> id:
+        name = name.upper()
+        try:
+            organism = (cls.objects
+                        .filter(name=name)
+                        [0])
+        except IndexError:
+            is_scientific_name = False
+            try:
+                taxonomy_id = get_taxonomy_id_scientific(name)
+                is_scientific_name = True
+            except UnscientificNameError:
+                taxonomy_id = get_taxonomy_id(name)
+
+            organism = Organism(name=name,
+                                taxonomy_id=taxonomy_id,
+                                is_scientific_name=is_scientific_name)
+            organism.save()
+
+        return organism.taxonomy_id
+
+    class Meta:
+        db_table = "organisms"
diff --git a/data_models/data_refinery_models/models/surveys.py b/data_models/data_refinery_models/models/surveys.py
new file mode 100644
index 000000000..80e5d8f74
--- /dev/null
+++ b/data_models/data_refinery_models/models/surveys.py
@@ -0,0 +1,40 @@
+from django.db import models
+from data_refinery_models.models.base_models import TimeTrackedModel
+
+
+class SurveyJob(TimeTrackedModel):
+    """Records information about a Surveyor Job."""
+
+    source_type = models.CharField(max_length=256)
+    success = models.NullBooleanField(null=True)
+
+    # The start time of the query used to replicate
+    replication_started_at = models.DateTimeField(null=True)
+
+    # The end time of the query used to replicate
+    replication_ended_at = models.DateTimeField(null=True)
+
+    # The start time of the job
+    start_time = models.DateTimeField(null=True)
+
+    # The end time of the job
+    end_time = models.DateTimeField(null=True)
+
+    class Meta:
+        db_table = "survey_jobs"
+
+
+class SurveyJobKeyValue(TimeTrackedModel):
+    """Tracks additional fields for SurveyJobs.
+
+    Useful for fields that would be sparsely populated if they were
+    their own columns. I.e. one source may have an extra field or two
+    that are worth tracking but are specific to that source.
+    """
+
+    survey_job = models.ForeignKey(SurveyJob, on_delete=models.CASCADE)
+    key = models.CharField(max_length=256)
+    value = models.CharField(max_length=256)
+
+    class Meta:
+        db_table = "survey_job_key_values"
diff --git a/data_models/data_refinery_models/models/test_organisms.py b/data_models/data_refinery_models/models/test_organisms.py
new file mode 100644
index 000000000..f48216393
--- /dev/null
+++ b/data_models/data_refinery_models/models/test_organisms.py
@@ -0,0 +1,239 @@
+from unittest.mock import Mock, patch, call
+from django.test import TestCase
+from data_refinery_models.models.organism import (
+    Organism,
+    ESEARCH_URL,
+    EFETCH_URL,
+    InvalidNCBITaxonomyId,
+)
+
+ESEARCH_RESPONSE_XML = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN"
+    "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
+<eSearchResult>
+  <Count>1</Count>
+  <RetMax>1</RetMax>
+  <RetStart>0</RetStart>
+  <IdList>
+    <Id>9606</Id>
+  </IdList>
+  <TranslationSet/>
+  <TranslationStack>
+    <TermSet>
+      <Term>homo sapiens[Scientific Name]</Term>
+      <Field>Scientific Name</Field>
+      <Count>1</Count>
+      <Explode>N</Explode>
+    </TermSet>
+    <OP>GROUP</OP>
+  </TranslationStack>
+  <QueryTranslation>homo sapiens[Scientific Name]</QueryTranslation>
+</eSearchResult>"""
+
+ESEARCH_NOT_FOUND_XML = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN"
+    "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">
+<eSearchResult>
+  <Count>0</Count>
+  <RetMax>0</RetMax>
+  <RetStart>0</RetStart>
+  <IdList/>
+  <TranslationSet/>
+  <QueryTranslation>(man[Scientific Name])</QueryTranslation>
+  <ErrorList>
+    <PhraseNotFound>blah</PhraseNotFound>
+  </ErrorList>
+  <WarningList>
+    <OutputMessage>No items found.</OutputMessage>
+  </WarningList>
+</eSearchResult>"""
+
+EFETCH_RESPONSE_XML = """<?xml version="1.0" ?>
+<!DOCTYPE TaxaSet PUBLIC "-//NLM//DTD Taxon, 14th January 2002//EN"
+"https://www.ncbi.nlm.nih.gov/entrez/query/DTD/taxon.dtd">
+<TaxaSet><Taxon>
+    <TaxId>9606</TaxId>
+    <ScientificName>Homo sapiens</ScientificName>
+    <OtherNames>
+        <GenbankCommonName>human</GenbankCommonName>
+        <CommonName>man</CommonName>
+        <Name>
+            <ClassCDE>authority</ClassCDE>
+            <DispName>Homo sapiens Linnaeus, 1758</DispName>
+        </Name>
+    </OtherNames>
+    <ParentTaxId>9605</ParentTaxId>
+    <Rank>species</Rank>
+    <Division>Primates</Division>
+    <GeneticCode>
+        <GCId>1</GCId>
+        <GCName>Standard</GCName>
+    </GeneticCode>
+    <MitoGeneticCode>
+        <MGCId>2</MGCId>
+        <MGCName>Vertebrate Mitochondrial</MGCName>
+    </MitoGeneticCode>
+    <Lineage>cellular organisms</Lineage>
+    <LineageEx>
+        <Taxon>
+            <TaxId>131567</TaxId>
+            <ScientificName>cellular organisms</ScientificName>
+            <Rank>no rank</Rank>
+        </Taxon>
+    </LineageEx>
+    <CreateDate>1995/02/27 09:24:00</CreateDate>
+    <UpdateDate>2017/02/28 16:38:58</UpdateDate>
+    <PubDate>1992/05/26 01:00:00</PubDate>
+</Taxon>
+
+</TaxaSet>"""
+
+EFETCH_NOT_FOUND_XML = """<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE eEfetchResult PUBLIC "-//NLM//DTD efetch 20131226//EN"
+"https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20131226/efetch.dtd">
+<eFetchResult>
+<ERROR>ID list is empty! Possibly it has no correct IDs.</ERROR>
+</eFetchResult>"""
+
+
+def mocked_requests_get(url, parameters):
+    mock = Mock(ok=True)
+    if url is not ESEARCH_URL:
+        mock.text = "This is wrong."
+    else:
+        try:
+            if parameters["field"] is "scin":
+                mock.text = ESEARCH_NOT_FOUND_XML
+            else:
+                mock.text = "This is also wrong."
+        except KeyError:
+            mock.text = ESEARCH_RESPONSE_XML
+
+    return mock
+
+
+class OrganismModelTestCase(TestCase):
+    def tearDown(self):
+        Organism.objects.all().delete()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_cached_names_are_found(self, mock_get):
+        Organism.objects.create(name="HOMO SAPIENS",
+                                taxonomy_id=9606,
+                                is_scientific_name=True)
+
+        name = Organism.get_name_for_id(9606)
+
+        self.assertEqual(name, "HOMO SAPIENS")
+        mock_get.assert_not_called()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_cached_ids_are_found(self, mock_get):
+        Organism.objects.create(name="HOMO SAPIENS",
+                                taxonomy_id=9606,
+                                is_scientific_name=True)
+
+        id = Organism.get_id_for_name("Homo Sapiens")
+
+        self.assertEqual(id, 9606)
+        mock_get.assert_not_called()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_uncached_scientific_names_are_found(self, mock_get):
+        mock_get.return_value = Mock(ok=True)
+        mock_get.return_value.text = ESEARCH_RESPONSE_XML
+
+        taxonomy_id = Organism.get_id_for_name("Homo Sapiens")
+
+        self.assertEqual(taxonomy_id, 9606)
+        mock_get.assert_called_once_with(
+            ESEARCH_URL,
+            {"db": "taxonomy", "field": "scin", "term": "HOMO%20SAPIENS"}
+        )
+
+        # The first call should have stored the organism record in the
+        # database so this call should not make a request.
+        mock_get.reset_mock()
+        new_id = Organism.get_id_for_name("Homo Sapiens")
+
+        self.assertEqual(new_id, 9606)
+        mock_get.assert_not_called()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_uncached_other_names_are_found(self, mock_get):
+        mock_get.side_effect = mocked_requests_get
+
+        taxonomy_id = Organism.get_id_for_name("Human")
+
+        self.assertEqual(taxonomy_id, 9606)
+        mock_get.assert_has_calls([
+            call(ESEARCH_URL,
+                 {"db": "taxonomy", "field": "scin", "term": "HUMAN"}),
+            call(ESEARCH_URL,
+                 {"db": "taxonomy", "term": "HUMAN"})])
+
+        # The first call should have stored the organism record in the
+        # database so this call should not make a request.
+        mock_get.reset_mock()
+        new_id = Organism.get_id_for_name("Human")
+
+        self.assertEqual(new_id, 9606)
+        mock_get.assert_not_called()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_uncached_ids_are_found(self, mock_get):
+        mock_get.return_value = Mock(ok=True)
+        mock_get.return_value.text = EFETCH_RESPONSE_XML
+
+        organism_name = Organism.get_name_for_id(9606)
+
+        self.assertEqual(organism_name, "HOMO SAPIENS")
+        mock_get.assert_called_once_with(
+            EFETCH_URL,
+            {"db": "taxonomy", "id": "9606"}
+        )
+
+        # The first call should have stored the organism record in the
+        # database so this call should not make a request.
+        mock_get.reset_mock()
+        new_name = Organism.get_name_for_id(9606)
+
+        self.assertEqual(new_name, "HOMO SAPIENS")
+        mock_get.assert_not_called()
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_invalid_ids_cause_exceptions(self, mock_get):
+        mock_get.return_value = Mock(ok=True)
+        mock_get.return_value.text = EFETCH_NOT_FOUND_XML
+
+        with self.assertRaises(InvalidNCBITaxonomyId):
+            Organism.get_name_for_id(0)
+
+    @patch('data_refinery_models.models.organism.requests.get')
+    def test_unfound_names_return_0(self, mock_get):
+        """If we can't find an NCBI taxonomy ID for an organism name
+        we can keep things moving for a while without it.
+        get_taxonomy_id will log an error message which will prompt
+        a developer to investigate what the organism name that was
+        unable to be found is. Therefore setting the ID to 0 is the
+        right thing to do in this case despite not seeming like it.
+        """
+        mock_get.return_value = Mock(ok=True)
+        mock_get.return_value.text = ESEARCH_NOT_FOUND_XML
+
+        taxonomy_id = Organism.get_id_for_name("blah")
+
+        self.assertEqual(taxonomy_id, 0)
+        mock_get.assert_has_calls([
+            call(ESEARCH_URL,
+                 {"db": "taxonomy", "field": "scin", "term": "BLAH"}),
+            call(ESEARCH_URL,
+                 {"db": "taxonomy", "term": "BLAH"})])
+
+        # The first call should have stored the organism record in the
+        # database so this call should not make a request.
+        mock_get.reset_mock()
+        new_id = Organism.get_id_for_name("BLAH")
+
+        self.assertEqual(new_id, 0)
+        mock_get.assert_not_called()
diff --git a/data_models/data_refinery_models/tests.py b/data_models/data_refinery_models/models/test_time_tracked_models.py
similarity index 100%
rename from data_models/data_refinery_models/tests.py
rename to data_models/data_refinery_models/models/test_time_tracked_models.py
diff --git a/data_models/requirements.in b/data_models/requirements.in
index 08088127e..48852bf73 100644
--- a/data_models/requirements.in
+++ b/data_models/requirements.in
@@ -1,2 +1,3 @@
 django
 psycopg2
+requests
diff --git a/data_models/requirements.txt b/data_models/requirements.txt
index 380d9d088..a30db3f5a 100644
--- a/data_models/requirements.txt
+++ b/data_models/requirements.txt
@@ -6,3 +6,4 @@
 #
 django==1.10.6
 psycopg2==2.7.1
+requests==2.13.0
diff --git a/foreman/data_refinery_foreman/surveyor/array_express.py b/foreman/data_refinery_foreman/surveyor/array_express.py
index 5cebee15d..7f32ca60e 100644
--- a/foreman/data_refinery_foreman/surveyor/array_express.py
+++ b/foreman/data_refinery_foreman/surveyor/array_express.py
@@ -1,10 +1,11 @@
 import requests
 from typing import List
+
 from data_refinery_models.models import (
     Batch,
     BatchKeyValue,
-    SurveyJob,
-    SurveyJobKeyValue
+    SurveyJobKeyValue,
+    Organism
 )
 from data_refinery_foreman.surveyor.external_source import (
     ExternalSourceSurveyor,
@@ -16,68 +17,111 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+EXPERIMENTS_URL = "https://www.ebi.ac.uk/arrayexpress/json/v3/experiments/"
+SAMPLES_URL = EXPERIMENTS_URL + "{}/samples"
 
-class ArrayExpressSurveyor(ExternalSourceSurveyor):
-    # Files API endpoint for ArrayExpress
-    FILES_URL = "http://www.ebi.ac.uk/arrayexpress/json/v2/files"
-    DOWNLOADER_TASK = "data_refinery_workers.downloaders.array_express.download_array_express"
 
+class ArrayExpressSurveyor(ExternalSourceSurveyor):
     def source_type(self):
         return "ARRAY_EXPRESS"
 
     def downloader_task(self):
-        return self.DOWNLOADER_TASK
+        return "data_refinery_workers.downloaders.array_express.download_array_express"
 
     def determine_pipeline(self,
                            batch: Batch,
                            key_values: List[BatchKeyValue] = []):
         return ProcessorPipeline.AFFY_TO_PCL
 
-    def survey(self, survey_job: SurveyJob):
-        accession_code = (SurveyJobKeyValue
-                          .objects
-                          .filter(survey_job_id=survey_job.id,
-                                  key__exact="accession_code")
-                          [:1]
-                          .get()
-                          .value)
-        parameters = {"raw": "true", "array": accession_code}
-
-        r = requests.get(self.FILES_URL, params=parameters)
-        response_dictionary = r.json()
-
-        try:
-            experiments = response_dictionary["files"]["experiment"]
-        except KeyError:  # If the platform does not exist or has no files...
-            logger.info(
-                "No files were found with this platform accession code: %s",
-                accession_code
-            )
-            return True
-
-        logger.info("Found %d new experiments for Survey Job #%d.",
-                    len(experiments),
-                    survey_job.id)
-
-        for experiment in experiments:
-            data_files = experiment["file"]
-
-            # If there is only one file object in data_files,
-            # ArrayExpress does not put it in a list of size 1
-            if (type(data_files) != list):
-                data_files = [data_files]
-
-            for data_file in data_files:
-                if (data_file["kind"] == "raw"):
-                    url = data_file["url"].replace("\\", "")
-                    # This is another place where this is still a POC.
-                    # More work will need to be done to determine some
-                    # of these additional metadata fields.
-                    self.handle_batch(Batch(size_in_bytes=data_file["size"],
-                                            download_url=url,
-                                            raw_format="MICRO_ARRAY",
-                                            processed_format="PCL",
-                                            accession_code=accession_code,
-                                            organism=1))
-
-        return True
+    @staticmethod
+    def get_experiment_metadata(experiment_accession_code):
+        experiment_request = requests.get(EXPERIMENTS_URL + experiment_accession_code)
+        parsed_json = experiment_request.json()["experiments"]["experiment"][0]
+
+        experiment = {}
+
+        experiment["name"] = parsed_json["name"]
+        experiment["experiment_accession_code"] = experiment_accession_code
+
+        # If there is more than one arraydesign listed in the experiment
+        # then there is no other way to determine which array was used
+        # for which sample other than looking at the header of the CEL
+        # file. That obviously cannot happen until the CEL file has been
+        # downloaded so we can just mark it as UNKNOWN and let the
+        # downloader inspect the downloaded file to determine the
+        # array then.
+        if len(parsed_json["arraydesign"]) == 0:
+            logger.warn("Experiment %s has no arraydesign listed.", experiment_accession_code)
+            experiment["platform_accession_code"] = "UNKNOWN"
+        elif len(parsed_json["arraydesign"]) > 1:
+            experiment["platform_accession_code"] = "UNKNOWN"
+        else:
+            experiment["platform_accession_code"] = \
+                parsed_json["arraydesign"][0]["accession"]
+
+        experiment["release_date"] = parsed_json["releasedate"]
+
+        if "lastupdatedate" in parsed_json:
+            experiment["last_update_date"] = parsed_json["lastupdatedate"]
+        else:
+            experiment["last_update_date"] = parsed_json["releasedate"]
+
+        return experiment
+
+    def survey(self):
+        experiment_accession_code = (
+            SurveyJobKeyValue
+            .objects
+            .get(survey_job_id=self.survey_job.id,
+                 key__exact="experiment_accession_code")
+            .value
+        )
+
+        logger.info("Surveying experiment with accession code: %s.", experiment_accession_code)
+
+        experiment = self.get_experiment_metadata(experiment_accession_code)
+
+        r = requests.get(SAMPLES_URL.format(experiment_accession_code))
+        samples = r.json()["experiment"]["sample"]
+
+        batches = []
+        for sample in samples:
+            if "file" not in sample:
+                continue
+
+            organism_name = "UNKNOWN"
+            for characteristic in sample["characteristic"]:
+                if characteristic["category"].upper() == "ORGANISM":
+                    organism_name = characteristic["value"].upper()
+
+            if organism_name == "UNKNOWN":
+                logger.error("Sample from experiment %s did not specify the organism name.",
+                             experiment_accession_code)
+                organism_id = 0
+            else:
+                organism_id = Organism.get_id_for_name(organism_name)
+
+            for sample_file in sample["file"]:
+                if sample_file["type"] != "data":
+                    continue
+
+                batches.append(Batch(
+                    size_in_bytes=-1,  # Will have to be determined later
+                    download_url=sample_file["comment"]["value"],
+                    raw_format=sample_file["name"].split(".")[-1],
+                    processed_format="PCL",
+                    platform_accession_code=experiment["platform_accession_code"],
+                    experiment_accession_code=experiment_accession_code,
+                    organism_id=organism_id,
+                    organism_name=organism_name,
+                    experiment_title=experiment["name"],
+                    release_date=experiment["release_date"],
+                    last_uploaded_date=experiment["last_update_date"],
+                    name=sample_file["name"]
+                ))
+
+        # Group batches based on their download URL and handle each group.
+        download_urls = {batch.download_url for batch in batches}
+        for url in download_urls:
+            batches_with_url = [batch for batch in batches if batch.download_url == url]
+            self.handle_batches(batches_with_url)
diff --git a/foreman/data_refinery_foreman/surveyor/external_source.py b/foreman/data_refinery_foreman/surveyor/external_source.py
index 62bca47cb..0be944435 100644
--- a/foreman/data_refinery_foreman/surveyor/external_source.py
+++ b/foreman/data_refinery_foreman/surveyor/external_source.py
@@ -3,13 +3,16 @@
 from enum import Enum
 from typing import List
 from retrying import retry
+from django.db import transaction
 from data_refinery_models.models import (
     Batch,
     BatchStatuses,
-    BatchKeyValue,
     DownloaderJob,
+    DownloaderJobsToBatches,
     SurveyJob
 )
+from data_refinery_foreman.surveyor.message_queue import app
+
 
 # Import and set logger
 import logging
@@ -25,7 +28,8 @@ class PipelineEnums(Enum):
     """An abstract class to enumerate valid processor pipelines.
 
     Enumerations which extend this class are valid values for the
-    pipeline_required field of the Batches table."""
+    pipeline_required field of the Batches table.
+    """
     pass
 
 
@@ -35,8 +39,7 @@ class ProcessorPipeline(PipelineEnums):
 
 
 class DiscoveryPipeline(PipelineEnums):
-    """Pipelines which discover what kind of processing is appropriate
-    for the data."""
+    """Pipelines which discover appropriate processing for the data."""
     pass
 
 
@@ -52,56 +55,69 @@ def source_type(self):
 
     @abc.abstractproperty
     def downloader_task(self):
-        """This property should return the Celery Downloader Task name
-        from the data_refinery_workers project which should be queued
-        to download Batches discovered by this surveyor."""
+        """Abstract property representing the downloader task.
+
+        Should return the Celery Downloader Task name from the
+        data_refinery_workers project which should be queued to
+        download Batches discovered by this surveyor.
+        """
         return
 
     @abc.abstractmethod
     def determine_pipeline(self,
-                           batch: Batch,
-                           key_values: List[BatchKeyValue] = []):
-        """Determines the appropriate processor pipeline for the batch
-        and returns a string that represents a processor pipeline.
-        Must return a member of PipelineEnums."""
+                           batch: Batch):
+        """Determines the appropriate pipeline for the batch.
+
+        Returns a string that represents a processor pipeline.
+        Must return a member of PipelineEnums.
+        """
         return
 
-    def handle_batch(self, batch: Batch, key_values: BatchKeyValue = None):
-        batch.survey_job = self.survey_job
-        batch.source_type = self.source_type()
-        batch.status = BatchStatuses.NEW.value
+    def handle_batches(self, batches: List[Batch]):
+        for batch in batches:
+            batch.survey_job = self.survey_job
+            batch.source_type = self.source_type()
+            batch.status = BatchStatuses.NEW.value
 
-        pipeline_required = self.determine_pipeline(batch, key_values)
-        if (pipeline_required is DiscoveryPipeline) or batch.processed_format:
-            batch.pipeline_required = pipeline_required.value
-        else:
-            message = ("Batches must have the processed_format field set "
-                       "unless the pipeline returned by determine_pipeline "
-                       "is of the type DiscoveryPipeline.")
-            raise InvalidProcessedFormatError(message)
+            pipeline_required = self.determine_pipeline(batch)
+            if (pipeline_required is DiscoveryPipeline) or batch.processed_format:
+                batch.pipeline_required = pipeline_required.value
+            else:
+                message = ("Batches must have the processed_format field set "
+                           "unless the pipeline returned by determine_pipeline "
+                           "is of the type DiscoveryPipeline.")
+                raise InvalidProcessedFormatError(message)
 
-        batch.internal_location = os.path.join(batch.accession_code,
-                                               batch.pipeline_required)
+            batch.internal_location = os.path.join(batch.platform_accession_code,
+                                                   batch.pipeline_required)
 
         @retry(stop_max_attempt_number=3)
-        def save_batch_start_job():
-            batch.save()
-            downloader_job = DownloaderJob(batch=batch)
+        @transaction.atomic
+        def save_batches_start_job():
+            downloader_job = DownloaderJob()
             downloader_job.save()
-            self.downloader_task().delay(downloader_job.id)
+
+            for batch in batches:
+                batch.save()
+                downloader_job_to_batch = DownloaderJobsToBatches(batch=batch,
+                                                                  downloader_job=downloader_job)
+                downloader_job_to_batch.save()
+
+            app.send_task(self.downloader_task(), args=[downloader_job.id])
 
         try:
-            save_batch_start_job()
-        except Exception as e:
-            logger.error("Failed to save batch to database three times "
-                         + "because error: %s. Terminating survey job #%d.",
-                         type(e).__name__,
-                         self.survey_job.id)
+            save_batches_start_job()
+        except Exception:
+            logger.exception(("Failed to save batches to database three times. "
+                              "Terminating survey job #%d."),
+                             self.survey_job.id)
             raise
 
     @abc.abstractmethod
-    def survey(self, survey_job: SurveyJob):
-        """Implementations of this function should do the following:
+    def survey(self):
+        """Abstract method to survey a source.
+
+        Implementations of this method should do the following:
         1. Query the external source to discover batches that should be
            downloaded.
         2. Create a Batch object for each discovered batch and optionally
diff --git a/foreman/data_refinery_foreman/surveyor/management/commands/start.py b/foreman/data_refinery_foreman/surveyor/management/commands/start.py
deleted file mode 100644
index 7bc5e4a89..000000000
--- a/foreman/data_refinery_foreman/surveyor/management/commands/start.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from django.core.management.base import BaseCommand
-from data_refinery_foreman.surveyor import surveyor
-
-
-class Command(BaseCommand):
-    def handle(self, *args, **options):
-        surveyor.test()
diff --git a/foreman/data_refinery_foreman/surveyor/management/commands/survey.py b/foreman/data_refinery_foreman/surveyor/management/commands/survey.py
new file mode 100644
index 000000000..42de9d798
--- /dev/null
+++ b/foreman/data_refinery_foreman/surveyor/management/commands/survey.py
@@ -0,0 +1,31 @@
+"""
+This command will create and run survey jobs for each experiment in the
+experiment_list. experiment list should be a file containing one
+experiment accession code per line.
+"""
+
+from django.core.management.base import BaseCommand
+from data_refinery_foreman.surveyor import surveyor
+
+# Import and set logger
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "experiment_list",
+            help=("A file containing a list of experiment accession codes to "
+                  "survey, download, and process. These should be listed one "
+                  "per line. Should be a path relative to the foreman "
+                  "directory."))
+
+    def handle(self, *args, **options):
+        if options["experiment_list"] is None:
+            logger.error("You must specify an experiment list.")
+            return 1
+        else:
+            surveyor.survey_experiments(options["experiment_list"])
+            return 0
diff --git a/foreman/data_refinery_foreman/surveyor/surveyor.py b/foreman/data_refinery_foreman/surveyor/surveyor.py
index af7b4a49f..5823da04a 100644
--- a/foreman/data_refinery_foreman/surveyor/surveyor.py
+++ b/foreman/data_refinery_foreman/surveyor/surveyor.py
@@ -57,7 +57,7 @@ def run_job(survey_job: SurveyJob):
         return survey_job
 
     try:
-        job_success = surveyor.survey(survey_job)
+        job_success = surveyor.survey()
     except Exception as e:
         logger.error("Exception caught while running job #%d with message: %s",
                      survey_job.id,
@@ -73,8 +73,20 @@ def test():
     survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
     survey_job.save()
     key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
-                                       key="accession_code",
-                                       value="A-AFFY-1")
+                                       key="experiment_accession_code",
+                                       value="E-MTAB-3050")
     key_value_pair.save()
     run_job(survey_job)
     return
+
+
+def survey_experiments(experiments_list_file):
+    with open(experiments_list_file, "r") as experiments:
+        for experiment in experiments:
+            survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
+            survey_job.save()
+            key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
+                                               key="experiment_accession_code",
+                                               value=experiment.rstrip())
+            key_value_pair.save()
+            run_job(survey_job)
diff --git a/foreman/data_refinery_foreman/surveyor/test_array_express.py b/foreman/data_refinery_foreman/surveyor/test_array_express.py
index 8c7f28d7d..7cb26d9cb 100644
--- a/foreman/data_refinery_foreman/surveyor/test_array_express.py
+++ b/foreman/data_refinery_foreman/surveyor/test_array_express.py
@@ -1,151 +1,298 @@
 import json
+import datetime
 from unittest.mock import Mock, patch
 from django.test import TestCase
 from data_refinery_models.models import (
     Batch,
+    DownloaderJob,
     SurveyJob,
-    SurveyJobKeyValue
+    SurveyJobKeyValue,
+    Organism
+)
+from data_refinery_foreman.surveyor.array_express import (
+    ArrayExpressSurveyor,
+    EXPERIMENTS_URL,
 )
-from data_refinery_foreman.surveyor.array_express import ArrayExpressSurveyor
-
-
-class MockTask():
-    def delay(self, id):
-        return True
 
 
-class SurveyTestCase(TestCase):
-    experiments_json = """
-    {
-    "files": {
+EXPERIMENTS_JSON = """
+{
+    "experiments": {
         "api-revision": "091015",
-        "api-version": 2,
+        "api-version": 3,
         "experiment": [
             {
                 "accession": "E-MTAB-3050",
+                "arraydesign": [
+                    {
+                        "accession": "A-AFFY-1",
+                        "count": 5,
+                        "id": 11048,
+                        "legacy_id": 5728564,
+                        "name": "Affymetrix GeneChip Human  U95Av2 [HG_U95Av2]"
+                    }
+                ],
+                "bioassaydatagroup": [
+                    {
+                        "arraydesignprovider": null,
+                        "bioassaydatacubes": 5,
+                        "bioassays": 5,
+                        "dataformat": "rawData",
+                        "id": null,
+                        "isderived": 0,
+                        "name": "rawData"
+                    }
+                ],
+                "description": [
+                    {
+                        "id": null,
+                        "text": "description tex"
+                    }
+                ],
+                "experimentalvariable": [
+                    {
+                        "name": "cell type",
+                        "value": [
+                            "differentiated",
+                            "expanded",
+                            "freshly isolated"
+                        ]
+                    }
+                ],
+                "experimentdesign": [
+                    "cell type comparison design",
+                    "development or differentiation design"
+                ],
+                "experimenttype": [
+                    "transcription profiling by array"
+                ],
+                "id": 511696,
+                "lastupdatedate": "2014-10-30",
+                "name": "Microarray analysis of in vitro differentiation",
+                "organism": [
+                    "Homo sapiens"
+                ],
+                "protocol": [
+                    {
+                        "accession": "P-MTAB-41859",
+                        "id": 1092859
+                    }
+                ],
+                "provider": [
+                    {
+                        "contact": "Joel Habener",
+                        "email": "jhabener@partners.org",
+                        "role": "submitter"
+                    }
+                ],
+                "releasedate": "2014-10-31",
+                "samplecharacteristic": [
+                    {
+                        "category": "age",
+                        "value": [
+                            "38 year",
+                            "54 year"
+                        ]
+                    }
+                ]
+            }
+        ],
+        "revision": "091015",
+        "total": 1,
+        "total-assays": 5,
+        "total-samples": 2,
+        "version": 3.0
+    }
+} """
+
+SAMPLES_JSON = """
+{
+    "experiment": {
+        "accession": "E-MTAB-3050",
+        "api-revision": "091015",
+        "api-version": 3,
+        "revision": "091015",
+        "sample": [
+            {
+                "assay": {
+                    "name": "1007409-C30057"
+                },
+                "characteristic": [
+                    {
+                        "category": "organism",
+                        "value": "Homo sapiens"
+                    }
+                ],
+                "extract": {
+                    "name": "donor A islets RNA"
+                },
                 "file": [
                     {
-                        "extension": "zip",
-                        "kind": "raw",
-                        "lastmodified": "2014-10-30T10:15:00",
-                        "location": "E-MTAB-3050.raw.1.zip",
-                        "name": "E-MTAB-3050.raw.1.zip",
-                        "size": 14876114,
-                        "url":
-    "http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip"
+                        "comment": {
+                            "name": "ArrayExpress FTP file",
+                            "value": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.raw.1.zip"
+                        },
+                        "name": "C30057.CEL",
+                        "type": "data",
+                        "url": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.raw.1.zip/C30057.CEL"
                     },
                     {
-                        "extension": "xls",
-                        "kind": "adf",
-                        "lastmodified": "2010-03-14T02:31:00",
-                        "location": "A-AFFY-1.adf.xls",
-                        "name": "A-AFFY-1.adf.xls",
-                        "size": 2040084,
-                        "url":
-    "http://www.ebi.ac.uk/arrayexpress/files/A-AFFY-1/A-AFFY-1.adf.xls"
+                        "comment": {
+                            "name": "Derived ArrayExpress FTP file",
+                            "value": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.processed.1.zip"
+                        },
+                        "name": "C30057.txt",
+                        "type": "derived data",
+                        "url": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.processed.1.zip/C30057.txt"
+                    }
+                ],
+                "labeled-extract": {
+                    "label": "biotin",
+                    "name": "donor A islets LEX"
+                },
+                "source": {
+                    "name": "donor A islets"
+                },
+                "variable": [
+                    {
+                        "name": "cell type",
+                        "value": "freshly isolated"
                     }
                 ]
             },
             {
-                "accession": "E-MTAB-3042",
+                "assay": {
+                    "name": "1007409-C30058"
+                },
+                "characteristic": [
+                    {
+                        "category": "organism",
+                        "value": "Homo sapiens"
+                    }
+                ],
+                "extract": {
+                    "name": "donor A expanded cells RNA"
+                },
                 "file": [
                     {
-                        "extension": "txt",
-                        "kind": "idf",
-                        "lastmodified": "2014-10-28T10:15:00",
-                        "location": "E-MTAB-3042.idf.txt",
-                        "name": "E-MTAB-3042.idf.txt",
-                        "size": 5874,
-                        "url":
-    "http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3042/E-MTAB-3042.idf.txt"
+                        "comment": {
+                            "name": "ArrayExpress FTP file",
+                            "value": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.raw.1.zip"
+                        },
+                        "name": "C30058.CEL",
+                        "type": "data",
+                        "url": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.raw.1.zip/C30058.CEL"
                     },
                     {
-                        "extension": "zip",
-                        "kind": "raw",
-                        "lastmodified": "2014-10-28T10:15:00",
-                        "location": "E-MTAB-3042.raw.1.zip",
-                        "name": "E-MTAB-3042.raw.1.zip",
-                        "size": 5525709,
-                        "url":
-    "http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3042/E-MTAB-3042.raw.1.zip"
+                        "comment": {
+                            "name": "Derived ArrayExpress FTP file",
+                            "value": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.processed.1.zip"
+                        },
+                        "name": "C30058.txt",
+                        "type": "derived data",
+                        "url": "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.processed.1.zip/C30058.txt"
+                    }
+                ],
+                "labeled-extract": {
+                    "label": "biotin",
+                    "name": "donor A expanded cells LEX"
+                },
+                "source": {
+                    "name": "donor A islets"
+                },
+                "variable": [
+                    {
+                        "name": "cell type",
+                        "value": "expanded"
                     }
                 ]
             }
         ],
-        "revision": 130311,
-        "total-experiments": 108,
-        "version": 1.2
+        "version": 1.0
     }
-}
-"""
+}"""  # noqa
+
+
+def mocked_requests_get(url):
+    mock = Mock(ok=True)
+    if url == (EXPERIMENTS_URL + "E-MTAB-3050"):
+        mock.json.return_value = json.loads(EXPERIMENTS_JSON)
+    else:
+        mock.json.return_value = json.loads(SAMPLES_JSON)
 
+    return mock
+
+
+class SurveyTestCase(TestCase):
     def setUp(self):
         survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
         survey_job.save()
         self.survey_job = survey_job
 
         key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
-                                           key="accession_code",
-                                           value="A-AFFY-1")
+                                           key="experiment_accession_code",
+                                           value="E-MTAB-3050")
         key_value_pair.save()
 
+        # Insert the organism into the database so the model doesn't call the
+        # taxonomy API to populate it.
+        organism = Organism(name="HOMO SAPIENS",
+                            taxonomy_id=9606,
+                            is_scientific_name=True)
+        organism.save()
+
     def tearDown(self):
         SurveyJob.objects.all().delete()
         SurveyJobKeyValue.objects.all().delete()
         Batch.objects.all().delete
 
-    @patch("data_refinery_foreman.surveyor.array_express.requests.get")
-    @patch("data_refinery_foreman.surveyor.array_express.ArrayExpressSurveyor.downloader_task")  # noqa
-    def test_multiple_experiements(self, mock_task, mock_get):
-        """Multiple experiments are turned into multiple batches"""
+    @patch('data_refinery_foreman.surveyor.array_express.requests.get')
+    def test_experiment_object(self, mock_get):
+        """The get_experiment_metadata function extracts all experiment metadata
+        from the experiments API."""
         mock_get.return_value = Mock(ok=True)
-        mock_get.return_value.json.return_value = json.loads(
-            self.experiments_json)
-        mock_task.return_value = MockTask()
+        mock_get.return_value.json.return_value = json.loads(EXPERIMENTS_JSON)
 
-        ae_surveyor = ArrayExpressSurveyor(self.survey_job)
-        self.assertTrue(ae_surveyor.survey(self.survey_job))
-        self.assertEqual(2, Batch.objects.all().count())
-
-    # Note that this json has the `file` key mapped to a dictionary
-    # instead of a list. This is behavior exhibited by the API
-    # and is being tested on purpose here.
-    experiment_json = """
-    {
-    "files": {
-        "api-revision": "091015",
-        "api-version": 2,
-        "experiment": [
-            {
-                "accession": "E-MTAB-3050",
-                "file": {
-                        "extension": "zip",
-                        "kind": "raw",
-                        "lastmodified": "2014-10-30T10:15:00",
-                        "location": "E-MTAB-3050.raw.1.zip",
-                        "name": "E-MTAB-3050.raw.1.zip",
-                        "size": 14876114,
-                        "url":
-        "http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip"
-                    }
-                }
-    ],
-        "revision": 130311,
-        "total-experiments": 108,
-        "version": 1.2
-    }
-}
-"""
+        experiment = ArrayExpressSurveyor.get_experiment_metadata("E-MTAB-3050")
+        self.assertEqual("Microarray analysis of in vitro differentiation", experiment["name"])
+        self.assertEqual("E-MTAB-3050", experiment["experiment_accession_code"])
+        self.assertEqual("A-AFFY-1", experiment["platform_accession_code"])
+        self.assertEqual("2014-10-31", experiment["release_date"])
+        self.assertEqual("2014-10-30", experiment["last_update_date"])
 
     @patch('data_refinery_foreman.surveyor.array_express.requests.get')
-    @patch("data_refinery_foreman.surveyor.array_express.ArrayExpressSurveyor.downloader_task")  # noqa
-    def test_single_experiment(self, mock_task, mock_get):
-        """A single experiment is turned into a single batch."""
-        mock_get.return_value = Mock(ok=True)
-        mock_get.return_value.json.return_value = json.loads(
-            self.experiment_json)
-        mock_task.return_value = MockTask()
+    @patch('data_refinery_foreman.surveyor.message_queue.app.send_task')
+    def test_survey(self, mock_send_task, mock_get):
+        """survey generates one Batch per sample with all possible fields populated.
+        This test also tests the handle_batches method of ExternalSourceSurveyor
+        which isn't tested on its own because it is an abstract class."""
+        mock_send_task.return_value = Mock(ok=True)
+        mock_get.side_effect = mocked_requests_get
 
         ae_surveyor = ArrayExpressSurveyor(self.survey_job)
-        self.assertTrue(ae_surveyor.survey(self.survey_job))
-        self.assertEqual(1, Batch.objects.all().count())
+        ae_surveyor.survey()
+
+        self.assertEqual(2, len(mock_send_task.mock_calls))
+        batches = Batch.objects.all()
+        self.assertEqual(2, len(batches))
+        downloader_jobs = DownloaderJob.objects.all()
+        self.assertEqual(2, len(downloader_jobs))
+
+        batch = batches[0]
+        self.assertEqual(batch.survey_job.id, self.survey_job.id)
+        self.assertEqual(batch.source_type, "ARRAY_EXPRESS")
+        self.assertEqual(batch.size_in_bytes, -1)
+        self.assertEqual(batch.download_url, "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3050/E-MTAB-3050.raw.1.zip/C30057.CEL")  # noqa
+        self.assertEqual(batch.raw_format, "CEL")
+        self.assertEqual(batch.processed_format, "PCL")
+        self.assertEqual(batch.pipeline_required, "AFFY_TO_PCL")
+        self.assertEqual(batch.platform_accession_code, "A-AFFY-1")
+        self.assertEqual(batch.experiment_accession_code, "E-MTAB-3050")
+        self.assertEqual(batch.experiment_title, "Microarray analysis of in vitro differentiation")
+        self.assertEqual(batch.status, "NEW")
+        self.assertEqual(batch.release_date, datetime.date(2014, 10, 31))
+        self.assertEqual(batch.last_uploaded_date, datetime.date(2014, 10, 30))
+        self.assertEqual(batch.name, "C30057.CEL")
+        self.assertEqual(batch.internal_location, "A-AFFY-1/AFFY_TO_PCL/")
+        self.assertEqual(batch.organism_id, 9606)
+        self.assertEqual(batch.organism_name, "HOMO SAPIENS")
diff --git a/foreman/data_refinery_foreman/surveyor/test_surveyor.py b/foreman/data_refinery_foreman/surveyor/test_surveyor.py
index 359496b8d..4662a03b8 100644
--- a/foreman/data_refinery_foreman/surveyor/test_surveyor.py
+++ b/foreman/data_refinery_foreman/surveyor/test_surveyor.py
@@ -26,7 +26,7 @@ def test_calls_survey(self, survey_method):
         job.save()
         surveyor.run_job(job)
 
-        survey_method.assert_called_with(job)
+        survey_method.assert_called()
         self.assertIsInstance(job.replication_ended_at, datetime.datetime)
         self.assertIsInstance(job.start_time, datetime.datetime)
         self.assertIsInstance(job.end_time, datetime.datetime)
diff --git a/foreman/requirements.in b/foreman/requirements.in
index 4095eb400..61c1f238e 100644
--- a/foreman/requirements.in
+++ b/foreman/requirements.in
@@ -2,4 +2,5 @@ django
 celery
 psycopg2
 requests
+GEOparse
 retrying
diff --git a/foreman/requirements.txt b/foreman/requirements.txt
index 60a1060f3..40caa0728 100644
--- a/foreman/requirements.txt
+++ b/foreman/requirements.txt
@@ -8,10 +8,15 @@ amqp==2.1.4               # via kombu
 billiard==3.5.0.2         # via celery
 celery==4.0.2
 django==1.10.6
+GEOparse==0.1.10
 kombu==4.0.2              # via celery
+numpy==1.12.1             # via geoparse, pandas
+pandas==0.19.2            # via geoparse
 psycopg2==2.7.1
-pytz==2016.10             # via celery
+python-dateutil==2.6.0    # via pandas
+pytz==2017.2              # via celery, django, pandas
 requests==2.13.0
 retrying==1.3.3
-six==1.10.0               # via retrying
+six==1.10.0               # via python-dateutil, retrying
 vine==1.1.3               # via amqp
+wgetter==0.6              # via geoparse
diff --git a/foreman/run_surveyor.sh b/foreman/run_surveyor.sh
index d941d228f..e88b381f0 100755
--- a/foreman/run_surveyor.sh
+++ b/foreman/run_surveyor.sh
@@ -11,12 +11,20 @@ cd $script_directory
 # move up a level
 cd ..
 
+# Set up the data volume directory if it does not already exist
+volume_directory="$script_directory/volume"
+if [ ! -d "$volume_directory" ]; then
+    mkdir $volume_directory
+    chmod 775 $volume_directory
+fi
+
 docker build -t dr_foreman -f foreman/Dockerfile .
 
 HOST_IP=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
 
 docker run \
-       --link some-rabbit:rabbit \
+       --link message-queue:rabbit \
        --add-host=database:$HOST_IP \
        --env-file foreman/environments/dev \
-       dr_foreman
+       --volume $volume_directory:/home/user/data_store \
+       dr_foreman survey "$@"
diff --git a/requirements.in b/requirements.in
index bf848a288..71a4ddceb 100644
--- a/requirements.in
+++ b/requirements.in
@@ -2,3 +2,5 @@ django
 psycopg2
 pip-tools
 autopep8
+flake8
+requests
diff --git a/requirements.txt b/requirements.txt
index a5296e1de..6d68f38b4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,7 +8,11 @@ autopep8==1.3.1
 click==6.7                # via pip-tools
 django==1.10.6
 first==2.0.1              # via pip-tools
+flake8==3.3.0
+mccabe==0.6.1             # via flake8
 pip-tools==1.9.0
 psycopg2==2.7.1
-pycodestyle==2.3.1        # via autopep8
+pycodestyle==2.3.1        # via autopep8, flake8
+pyflakes==1.5.0           # via flake8
+requests==2.13.0
 six==1.10.0               # via pip-tools
diff --git a/run_rabbitmq.sh b/run_rabbitmq.sh
new file mode 100755
index 000000000..7477b963d
--- /dev/null
+++ b/run_rabbitmq.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+docker run -d --hostname rabbit-queue --name message-queue rabbitmq:3
diff --git a/run_shell.sh b/run_shell.sh
index 3e45fdf24..557a50703 100755
--- a/run_shell.sh
+++ b/run_shell.sh
@@ -5,21 +5,30 @@
 # By default the Docker container will be for the foreman project.
 # This can be changed by modifying the --env-file command line arg,
 # changing foreman/Dockerfile to the appropriate Dockerfile,
+# changing the volume_directory path,
 # and by modifying the Dockerfile.shell file appropriately.
 
 # This script should always run as if it were being called from
 # the directory it lives in.
-script_directory=`dirname "${BASH_SOURCE[0]}"`
+script_directory=`cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd`
 cd $script_directory
 
+# Set up the data volume directory if it does not already exist
+volume_directory="$script_directory/foreman/volume"
+if [ ! -d "$volume_directory" ]; then
+    mkdir $volume_directory
+    chmod 775 $volume_directory
+fi
+
 docker build -t dr_shell -f foreman/Dockerfile .
 
 HOST_IP=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
 
 docker run \
-       --link some-rabbit:rabbit \
+       --link message-queue:rabbit \
        --add-host=database:$HOST_IP \
        --env-file foreman/environments/dev \
        --volume /tmp:/tmp \
+       --volume $volume_directory:/home/user/data_store \
        --entrypoint ./manage.py \
        --interactive dr_shell shell
diff --git a/schema.png b/schema.png
new file mode 100644
index 000000000..d4e22c16b
Binary files /dev/null and b/schema.png differ
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 000000000..2be2471d9
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[pep8]
+max-line-length: 99
+
+[flake8]
+max-line-length: 99
diff --git a/workers/Dockerfile b/workers/Dockerfile
index 4a37b858d..b696681e5 100644
--- a/workers/Dockerfile
+++ b/workers/Dockerfile
@@ -18,4 +18,4 @@ COPY workers/ .
 
 USER user
 
-ENTRYPOINT ["celery", "-l", "info", "-A", "data_refinery_workers", "-c", "1", "worker"]
+CMD ["celery", "-l", "info", "-A", "data_refinery_workers", "-c", "1", "worker"]
diff --git a/workers/Dockerfile.tests b/workers/Dockerfile.tests
new file mode 100644
index 000000000..3c55740aa
--- /dev/null
+++ b/workers/Dockerfile.tests
@@ -0,0 +1,23 @@
+FROM continuumio/miniconda3:4.3.11
+
+RUN groupadd user && useradd --create-home --home-dir /home/user -g user user
+WORKDIR /home/user
+
+COPY workers/environment.yml .
+
+RUN conda env create -f environment.yml
+
+ENV PATH "/opt/conda/envs/workers/bin:$PATH"
+
+COPY data_models/dist/data-refinery-models-* data_models/
+
+# Get the latest version from the dist directory.
+RUN pip install data_models/$(ls data_models -1 | tail -1)
+
+COPY workers/ .
+
+USER user
+
+ENTRYPOINT ["python3.6", "manage.py"]
+
+CMD ["test", "--no-input"]
diff --git a/workers/data_refinery_workers/downloaders/array_express.py b/workers/data_refinery_workers/downloaders/array_express.py
index e88d9e7bf..f51179ddc 100644
--- a/workers/data_refinery_workers/downloaders/array_express.py
+++ b/workers/data_refinery_workers/downloaders/array_express.py
@@ -1,10 +1,20 @@
 from __future__ import absolute_import, unicode_literals
-import requests
+import urllib.request
+import os
+import shutil
+import zipfile
+from typing import List
+from contextlib import closing
 from celery import shared_task
 from celery.utils.log import get_task_logger
 from django.core.exceptions import ObjectDoesNotExist
-from data_refinery_models.models import Batch, DownloaderJob
+from data_refinery_models.models import (
+    Batch,
+    DownloaderJob,
+    DownloaderJobsToBatches
+)
 from data_refinery_workers.downloaders import utils
+import logging
 
 logger = get_task_logger(__name__)
 
@@ -12,8 +22,50 @@
 CHUNK_SIZE = 1024 * 256
 
 
+def _verify_batch_grouping(batches: List[Batch], job_id):
+    """All batches in the same job should have the same downloader url"""
+    for batch in batches:
+        if batch.download_url != batches[0].download_url:
+            logger.error(("A Batch doesn't have the same download URL as the other batches"
+                          " in downloader job #%d."),
+                         job_id)
+            raise ValueError("A batch doesn't have the same download url as other batches.")
+
+
+def _download_file(download_url, file_path, job_id):
+    try:
+        logger.debug("Downloading file from %s to %s. (Job #%d)",
+                     download_url,
+                     file_path,
+                     job_id)
+        target_file = open(file_path, "wb")
+        with closing(urllib.request.urlopen(download_url)) as request:
+            shutil.copyfileobj(request, target_file, CHUNK_SIZE)
+    except Exception:
+        logging.exception("Exception caught while running Job #%d.",
+                          job_id)
+        raise
+    finally:
+        target_file.close()
+
+
+def _extract_file(file_path, job_id):
+    try:
+        zip_ref = zipfile.ZipFile(file_path, 'r')
+        zip_ref.extractall(os.path.dirname(file_path))
+    except Exception:
+        logging.exception("Exception caught while extracting %s during Job #%d.",
+                          file_path,
+                          job_id)
+        raise
+    finally:
+        zip_ref.close()
+        os.remove(file_path)
+
+
 @shared_task
 def download_array_express(job_id):
+    logger.debug("Starting job with id: %s.", job_id)
     try:
         job = DownloaderJob.objects.get(id=job_id)
     except ObjectDoesNotExist:
@@ -23,44 +75,34 @@ def download_array_express(job_id):
     success = True
     utils.start_job(job)
 
-    try:
-        batch = Batch.objects.get(id=job.batch_id)
-    except ObjectDoesNotExist:
-        logger.error("Cannot find batch record with ID %d.", job.batch_id)
-        utils.end_job(job, None, False)
-        return
+    batch_relations = DownloaderJobsToBatches.objects.filter(downloader_job_id=job_id)
+    batches = [br.batch for br in batch_relations]
 
-    target_file_path = utils.prepare_destination(batch)
+    if len(batches) > 0:
+        target_file_path = utils.prepare_destination(batches[0])
+        download_url = batches[0].download_url
+    else:
+        logger.error("No batches found for job #%d.",
+                     job_id)
+        success = False
 
-    logger.info("Downloading file from %s to %s. (Batch #%d, Job #%d)",
-                batch.download_url,
-                target_file_path,
-                batch.id,
-                job_id)
+    if success:
+        try:
+            _verify_batch_grouping(batches, job_id)
 
-    try:
-        target_file = open(target_file_path, "wb")
-        request = requests.get(batch.download_url, stream=True)
-
-        for chunk in request.iter_content(CHUNK_SIZE):
-            if chunk:
-                target_file.write(chunk)
-                target_file.flush()
-    except Exception as e:
-        success = False
-        logger.error("Exception caught while running Job #%d for Batch #%d "
-                     + "with message: %s",
-                     job_id,
-                     batch.id,
-                     e)
-    finally:
-        target_file.close()
-        request.close()
+            # The files for all of the batches in the grouping are
+            # contained within the same zip file. Therefore only
+            # download the one.
+            _download_file(download_url, target_file_path, job_id)
+            _extract_file(target_file_path, job_id)
+        except Exception:
+            # Exceptions are already logged and handled.
+            # Just need to mark the job as failed.
+            success = False
 
     if success:
-        logger.info("File %s (Batch #%d) downloaded successfully in Job #%d.",
-                    batch.download_url,
-                    batch.id,
-                    job_id)
+        logger.debug("File %s downloaded and extracted successfully in Job #%d.",
+                     download_url,
+                     job_id)
 
-    utils.end_job(job, batch, success)
+    utils.end_job(job, batches, success)
diff --git a/workers/data_refinery_workers/downloaders/management/commands/queue_downloader.py b/workers/data_refinery_workers/downloaders/management/commands/queue_downloader.py
index 856956197..61c20cac0 100644
--- a/workers/data_refinery_workers/downloaders/management/commands/queue_downloader.py
+++ b/workers/data_refinery_workers/downloaders/management/commands/queue_downloader.py
@@ -8,7 +8,8 @@
     SurveyJob,
     Batch,
     BatchStatuses,
-    DownloaderJob
+    DownloaderJob,
+    DownloaderJobsToBatches
 )
 from data_refinery_workers.downloaders.array_express \
     import download_array_express
@@ -33,18 +34,27 @@ def handle(self, *args, **options):
             survey_job=survey_job,
             source_type="ARRAY_EXPRESS",
             size_in_bytes=0,
-            download_url="http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip",  # noqa
-            raw_format="MICRO_ARRAY",
+            download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip",  # noqa
+            raw_format="CEL",
             processed_format="PCL",
-            pipeline_required="MICRO_ARRAY_TO_PCL",
-            accession_code="A-AFFY-1",
-            internal_location="A-AFFY-1/MICRO_ARRAY_TO_PCL/",
-            organism=1,
+            pipeline_required="AFFY_TO_PCL",
+            platform_accession_code="A-AFFY-141",
+            experiment_accession_code="E-GEOD-59071",
+            experiment_title="It doesn't really matter.",
+            name="GSM1426072_CD_colon_active_2.CEL",
+            internal_location="A-AFFY-141/AFFY_TO_PCL/",
+            organism_id=9606,
+            organism_name="HOMO SAPIENS",
+            release_date="2017-05-05",
+            last_uploaded_date="2017-05-05",
             status=BatchStatuses.NEW.value
         )
         batch.save()
 
-        downloader_job = DownloaderJob(batch=batch)
+        downloader_job = DownloaderJob()
         downloader_job.save()
+        downloader_job_to_batch = DownloaderJobsToBatches(batch=batch,
+                                                          downloader_job=downloader_job)
+        downloader_job_to_batch.save()
         logger.info("Queuing a task.")
         download_array_express.delay(downloader_job.id)
diff --git a/workers/data_refinery_workers/downloaders/management/commands/queue_task.py b/workers/data_refinery_workers/downloaders/management/commands/queue_task.py
deleted file mode 100644
index aedb718ac..000000000
--- a/workers/data_refinery_workers/downloaders/management/commands/queue_task.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from django.core.management.base import BaseCommand
-from data_refinery_models.models import (
-    SurveyJob,
-    Batch,
-    BatchStatuses,
-    DownloaderJob
-)
-from data_refinery_workers.downloaders.array_express \
-    import download_array_express
-
-
-# Import and set logger
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-# Just a temporary way to queue a celery task
-# without running the surveyor.
-class Command(BaseCommand):
-    def handle(self, *args, **options):
-        # Create all the dummy data that would have been created
-        # before a downloader job could have been generated.
-        survey_job = SurveyJob(
-            source_type="ARRAY_EXPRESS"
-        )
-        survey_job.save()
-
-        batch = Batch(
-            survey_job=survey_job,
-            source_type="ARRAY_EXPRESS",
-            size_in_bytes=0,
-            download_url="http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip",  # noqa
-            raw_format="MICRO_ARRAY",
-            processed_format="PCL",
-            pipeline_required="MICRO_ARRAY_TO_PCL",
-            accession_code="A-AFFY-1",
-            internal_location="expression_data/array_express/A-AFFY-1/",
-            organism=1,
-            status=BatchStatuses.NEW.value
-        )
-        batch.save()
-
-        downloader_job = DownloaderJob(batch=batch)
-        downloader_job.save()
-        logger.info("Queuing a task.")
-        download_array_express.delay(downloader_job.id)
diff --git a/workers/data_refinery_workers/downloaders/management/commands/queue_test_task.py b/workers/data_refinery_workers/downloaders/management/commands/queue_test_task.py
deleted file mode 100644
index 9befd38b3..000000000
--- a/workers/data_refinery_workers/downloaders/management/commands/queue_test_task.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from django.core.management.base import BaseCommand
-from data_refinery_models.models import (
-    SurveyJob,
-    Batch,
-    BatchStatuses,
-    DownloaderJob
-)
-from data_refinery_workers.downloaders.array_express \
-    import download_array_express
-
-
-# Import and set logger
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class Command(BaseCommand):
-    """Just a temporary way to queue a celery task without running
-    the surveyor."""
-
-    def handle(self, *args, **options):
-        # Create all the dummy data that would have been created
-        # before a downloader job could have been generated.
-        survey_job = SurveyJob(
-            source_type="ARRAY_EXPRESS"
-        )
-        survey_job.save()
-
-        batch = Batch(
-            survey_job=survey_job,
-            source_type="ARRAY_EXPRESS",
-            size_in_bytes=0,
-            download_url="http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip",  # noqa
-            raw_format="MICRO_ARRAY",
-            processed_format="PCL",
-            pipeline_required="MICRO_ARRAY_TO_PCL",
-            accession_code="A-AFFY-1",
-            internal_location="expression_data/array_express/A-AFFY-1/",
-            organism=1,
-            status=BatchStatuses.NEW.value
-        )
-        batch.save()
-
-        downloader_job = DownloaderJob(batch=batch)
-        downloader_job.save()
-        logger.info("Queuing a test task.")
-        download_array_express.delay(downloader_job.id)
diff --git a/workers/data_refinery_workers/downloaders/test_array_express.py b/workers/data_refinery_workers/downloaders/test_array_express.py
new file mode 100644
index 000000000..e2e825469
--- /dev/null
+++ b/workers/data_refinery_workers/downloaders/test_array_express.py
@@ -0,0 +1,117 @@
+import copy
+from unittest.mock import patch, MagicMock
+from django.test import TestCase
+from data_refinery_models.models import (
+    SurveyJob,
+    Batch,
+    BatchStatuses,
+    DownloaderJob,
+    DownloaderJobsToBatches,
+    ProcessorJob,
+    ProcessorJobsToBatches
+)
+from data_refinery_workers.downloaders import array_express
+
+
+class DownloadArrayExpressTestCase(TestCase):
+    def test_good_batch_grouping(self):
+        """Returns true if all batches have the same download_url."""
+        batches = [Batch(download_url="https://example.com"),
+                   Batch(download_url="https://example.com"),
+                   Batch(download_url="https://example.com")]
+        job_id = 1
+        self.assertIsNone(array_express._verify_batch_grouping(batches, job_id))
+
+    def test_bad_batch_grouping(self):
+        """Raises exception if all batches don't have the same download_url."""
+        batches = [Batch(download_url="https://example.com"),
+                   Batch(download_url="https://example.com"),
+                   Batch(download_url="https://wompwomp.com")]
+        job_id = 1
+        with self.assertRaises(ValueError):
+            array_express._verify_batch_grouping(batches, job_id)
+
+    @patch("data_refinery_workers.downloaders.array_express.utils.processor_pipeline_registry")
+    @patch("data_refinery_workers.downloaders.array_express._verify_batch_grouping")
+    @patch("data_refinery_workers.downloaders.array_express._download_file")
+    @patch("data_refinery_workers.downloaders.array_express._extract_file")
+    @patch("data_refinery_workers.downloaders.array_express.utils.prepare_destination")
+    def test_download(self,
+                      prepare_destination,
+                      _extract_file,
+                      _download_file,
+                      _verify_batch_grouping,
+                      pipeline_registry):
+        # Set up mocks:
+        mock_processor_task = MagicMock()
+        mock_processor_task.delay = MagicMock()
+        mock_processor_task.delay.return_value = None
+        pipeline_registry.__getitem__ = MagicMock()
+        pipeline_registry.__getitem__.return_value = mock_processor_task
+        target_file_path = "target_file_path"
+        prepare_destination.return_value = "target_file_path"
+
+        # Set up database records:
+        survey_job = SurveyJob(
+            source_type="ARRAY_EXPRESS"
+        )
+        survey_job.save()
+
+        download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip/GSM1426072_CD_colon_active_2.CEL"  # noqa
+        batch = Batch(
+            survey_job=survey_job,
+            source_type="ARRAY_EXPRESS",
+            size_in_bytes=0,
+            download_url=download_url,
+            raw_format="CEL",
+            processed_format="PCL",
+            pipeline_required="AFFY_TO_PCL",
+            platform_accession_code="A-AFFY-1",
+            experiment_accession_code="E-MTAB-3050",
+            experiment_title="It doesn't really matter.",
+            name="CE1234",
+            internal_location="A-AFFY-1/MICRO_ARRAY_TO_PCL/",
+            organism_id=9606,
+            organism_name="HOMO SAPIENS",
+            release_date="2017-05-05",
+            last_uploaded_date="2017-05-05",
+            status=BatchStatuses.NEW.value
+        )
+        batch2 = copy.deepcopy(batch)
+        batch2.name = "CE2345"
+        batch.save()
+        batch2.save()
+
+        downloader_job = DownloaderJob()
+        downloader_job.save()
+        downloader_job_to_batch = DownloaderJobsToBatches(batch=batch,
+                                                          downloader_job=downloader_job)
+        downloader_job_to_batch.save()
+        downloader_job_to_batch2 = DownloaderJobsToBatches(batch=batch2,
+                                                           downloader_job=downloader_job)
+        downloader_job_to_batch2.save()
+
+        # Call the task we're testing:
+        array_express.download_array_express.apply(args=(downloader_job.id,)).get()
+
+        # Verify that all expected functionality is run:
+        prepare_destination.assert_called_once()
+        _verify_batch_grouping.assert_called_once()
+        _download_file.assert_called_with(download_url, target_file_path, downloader_job.id)
+        _extract_file.assert_called_with(target_file_path, downloader_job.id)
+
+        mock_processor_task.delay.assert_called()
+
+        # Verify that the database has been updated correctly:
+        batches = Batch.objects.all()
+        for batch in batches:
+            self.assertEqual(batch.status, BatchStatuses.DOWNLOADED.value)
+
+        downloader_job = DownloaderJob.objects.get()
+        self.assertTrue(downloader_job.success)
+        self.assertIsNotNone(downloader_job.end_time)
+
+        processor_jobs = ProcessorJob.objects.all()
+        self.assertEqual(len(processor_jobs), 2)
+        processor_jobs_to_batches = ProcessorJobsToBatches.objects.all()
+        self.assertEqual(len(processor_jobs_to_batches), 2)
diff --git a/workers/data_refinery_workers/downloaders/utils.py b/workers/data_refinery_workers/downloaders/utils.py
index baa054e4d..4a70d4073 100644
--- a/workers/data_refinery_workers/downloaders/utils.py
+++ b/workers/data_refinery_workers/downloaders/utils.py
@@ -2,11 +2,13 @@
 import urllib
 from retrying import retry
 from django.utils import timezone
+from django.db import transaction
 from data_refinery_models.models import (
     Batch,
     BatchStatuses,
     DownloaderJob,
-    ProcessorJob
+    ProcessorJob,
+    ProcessorJobsToBatches
 )
 from data_refinery_workers.processors.processor_registry \
     import processor_pipeline_registry
@@ -27,21 +29,23 @@ def start_job(job: DownloaderJob):
     job.save()
 
 
-def end_job(job: DownloaderJob, batch: Batch, success):
-    """Record in the database that this job has completed,
-    create a processor job, and queue a processor task."""
-    job.success = success
-    job.end_time = timezone.now()
-    job.save()
+def end_job(job: DownloaderJob, batches: Batch, success):
+    """Record in the database that this job has completed.
 
+    Create a processor job and queue a processor task for each batch
+    if the job was successful.
+    """
     @retry(stop_max_attempt_number=3)
-    def save_batch_create_job():
+    def save_batch_create_job(batch):
         batch.status = BatchStatuses.DOWNLOADED.value
         batch.save()
 
-        logger.info("Creating processor job for batch #%d.", batch.id)
-        processor_job = ProcessorJob(batch=batch)
+        logger.debug("Creating processor job for batch #%d.", batch.id)
+        processor_job = ProcessorJob()
         processor_job.save()
+        processor_job_to_batch = ProcessorJobsToBatches(batch=batch,
+                                                        processor_job=processor_job)
+        processor_job_to_batch.save()
         return processor_job
 
     @retry(stop_max_attempt_number=3)
@@ -49,14 +53,23 @@ def queue_task(processor_job):
         processor_task = processor_pipeline_registry[batch.pipeline_required]
         processor_task.delay(processor_job.id)
 
-    if batch is not None:
-        processor_job = save_batch_create_job()
-        queue_task(processor_job)
+    if success:
+        for batch in batches:
+            with transaction.atomic():
+                processor_job = save_batch_create_job(batch)
+                queue_task(processor_job)
+
+    job.success = success
+    job.end_time = timezone.now()
+    job.save()
 
 
 def prepare_destination(batch: Batch):
-    """Prepare the destination directory and return the full
-    path the Batch's file should be downloaded to."""
+    """Prepare the destination directory for the batch.
+
+    Also returns the full path the Batch's file should be downloaded
+    to.
+    """
     target_directory = os.path.join(ROOT_URI, batch.internal_location)
     os.makedirs(target_directory, exist_ok=True)
 
diff --git a/workers/data_refinery_workers/processors/array_express.py b/workers/data_refinery_workers/processors/array_express.py
index 492eebd8b..0836864e9 100644
--- a/workers/data_refinery_workers/processors/array_express.py
+++ b/workers/data_refinery_workers/processors/array_express.py
@@ -1,6 +1,15 @@
+"""This processor is currently out of date. It is designed to process
+multiple CEL files at a time, but that is not how we are going to
+process Array Express files. I have rewritten the Array Express
+surveyor/downloader to support this, but we don't have the new
+processor yet. This will run, which is good enough for testing
+the system, however since it will change so much the processor
+itself is not yet tested.
+"""
+
+
 from __future__ import absolute_import, unicode_literals
 import os
-import zipfile
 from typing import Dict
 import rpy2.robjects as ro
 from celery import shared_task
@@ -11,33 +20,18 @@
 
 
 def cel_to_pcl(kwargs: Dict):
-    batch = kwargs["batch"]
+    # Array Express processor jobs have one batch per job.
+    batch = kwargs["batches"][0]
 
-    temp_directory = utils.ROOT_URI + "temp/" + batch.internal_location
+    from_directory = utils.ROOT_URI + "raw/" + batch.internal_location
     target_directory = utils.ROOT_URI + "processed/" + batch.internal_location
-    os.makedirs(temp_directory, exist_ok=True)
     os.makedirs(target_directory, exist_ok=True)
-
-    raw_file_name = batch.download_url.split('/')[-1]
-    zip_location = (utils.ROOT_URI + "raw/" + batch.internal_location
-                    + raw_file_name)
-
-    if os.path.isfile(zip_location):
-        zip_ref = zipfile.ZipFile(zip_location, 'r')
-        zip_ref.extractall(temp_directory)
-        zip_ref.close()
-    else:
-        logger.error("Missing file: %s", zip_location)
-        return {"success": False}
-
-    # Experiment code should be added to the batches data model
-    experiment_code = raw_file_name.split('/')[0]
-    new_name = experiment_code + ".pcl"
+    new_name = batch.name + "." + batch.processed_format
 
     ro.r('source("/home/user/r_processors/process_cel_to_pcl.R")')
     ro.r['ProcessCelFiles'](
-        temp_directory,
-        "Hs",  # temporary until organism discovery is working
+        from_directory,
+        "Hs",  # temporary until organism handling is more defined
         target_directory + new_name)
 
     return kwargs
@@ -48,5 +42,4 @@ def affy_to_pcl(job_id):
     utils.run_pipeline({"job_id": job_id},
                        [utils.start_job,
                         cel_to_pcl,
-                        utils.cleanup_temp_data,
                         utils.end_job])
diff --git a/workers/data_refinery_workers/processors/management/commands/queue_processor.py b/workers/data_refinery_workers/processors/management/commands/queue_processor.py
index eae9bbade..a75f3c3bb 100644
--- a/workers/data_refinery_workers/processors/management/commands/queue_processor.py
+++ b/workers/data_refinery_workers/processors/management/commands/queue_processor.py
@@ -1,20 +1,21 @@
 """This command is intended for development purposes.
 It creates the database records necessary for a processor job to
 run and queues one. It assumes that the file
-/home/user/data_store/raw/A-AFFY-1/MICRO_ARRAY_TO_PCL/E-MTAB-3050.raw.1.zip
+/home/user/data_store/raw/A-AFFY-141/AFFY_TO_PCL/GSM1426072_CD_colon_active_2.CEL
 exists.
 The easiest way to run this is with the tester.sh script.
-(Changing queue_downloader to queue_processor.)"""
+(Changing queue_downloader to queue_processor.)
+"""
 
 from django.core.management.base import BaseCommand
 from data_refinery_models.models import (
     SurveyJob,
     Batch,
     BatchStatuses,
-    ProcessorJob
+    ProcessorJob,
+    ProcessorJobsToBatches
 )
-from data_refinery_workers.processors.array_express \
-    import process_array_express
+from data_refinery_workers.processors.array_express import affy_to_pcl
 
 
 # Import and set logger
@@ -36,18 +37,27 @@ def handle(self, *args, **options):
             survey_job=survey_job,
             source_type="ARRAY_EXPRESS",
             size_in_bytes=0,
-            download_url="http://www.ebi.ac.uk/arrayexpress/files/E-MTAB-3050/E-MTAB-3050.raw.1.zip",  # noqa
-            raw_format="MICRO_ARRAY",
+            download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip",  # noqa
+            raw_format="CEL",
             processed_format="PCL",
-            pipeline_required="MICRO_ARRAY_TO_PCL",
-            accession_code="A-AFFY-1",
-            internal_location="A-AFFY-1/MICRO_ARRAY_TO_PCL/",
-            organism=1,
+            pipeline_required="AFFY_TO_PCL",
+            platform_accession_code="A-AFFY-141",
+            experiment_accession_code="E-GEOD-59071",
+            experiment_title="It doesn't really matter.",
+            name="GSM1426072_CD_colon_active_2.CEL",
+            internal_location="A-AFFY-141/AFFY_TO_PCL/",
+            organism_id=9606,
+            organism_name="HOMO SAPIENS",
+            release_date="2017-05-05",
+            last_uploaded_date="2017-05-05",
             status=BatchStatuses.NEW.value
         )
         batch.save()
 
-        processor_job = ProcessorJob(batch=batch)
+        processor_job = ProcessorJob()
         processor_job.save()
+        downloader_job_to_batch = ProcessorJobsToBatches(batch=batch,
+                                                         processor_job=processor_job)
+        downloader_job_to_batch.save()
         logger.info("Queuing a processor job.")
-        process_array_express.delay(processor_job.id)
+        affy_to_pcl.delay(processor_job.id)
diff --git a/workers/data_refinery_workers/processors/test_utils.py b/workers/data_refinery_workers/processors/test_utils.py
new file mode 100644
index 000000000..c88fe3400
--- /dev/null
+++ b/workers/data_refinery_workers/processors/test_utils.py
@@ -0,0 +1,180 @@
+import copy
+from unittest.mock import patch, MagicMock
+from django.test import TestCase
+from data_refinery_models.models import (
+    SurveyJob,
+    Batch,
+    BatchStatuses,
+    DownloaderJob,
+    DownloaderJobsToBatches,
+    ProcessorJob,
+    ProcessorJobsToBatches
+)
+from data_refinery_workers.processors import utils
+
+
+def init_batch():
+    survey_job = SurveyJob(
+        source_type="ARRAY_EXPRESS"
+    )
+    survey_job.save()
+
+    return Batch(
+        survey_job=survey_job,
+        source_type="ARRAY_EXPRESS",
+        size_in_bytes=0,
+        download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip/GSM1426072_CD_colon_active_2.CEL",  # noqa
+        raw_format="CEL",
+        processed_format="PCL",
+        pipeline_required="AFFY_TO_PCL",
+        platform_accession_code="A-AFFY-1",
+        experiment_accession_code="E-MTAB-3050",
+        experiment_title="It doesn't really matter.",
+        name="CE1234",
+        internal_location="A-AFFY-1/MICRO_ARRAY_TO_PCL/",
+        organism_id=9606,
+        organism_name="HOMO SAPIENS",
+        release_date="2017-05-05",
+        last_uploaded_date="2017-05-05",
+        status=BatchStatuses.DOWNLOADED.value
+    )
+
+
+class StartJobTestCase(TestCase):
+    def test_success(self):
+        batch = init_batch()
+        batch2 = copy.deepcopy(batch)
+        batch2.name = "CE2345"
+        batch.save()
+        batch2.save()
+
+        processor_job = ProcessorJob()
+        processor_job.save()
+        processor_job_to_batch = ProcessorJobsToBatches(batch=batch,
+                                                        processor_job=processor_job)
+        processor_job_to_batch.save()
+        processor_job_to_batch2 = ProcessorJobsToBatches(batch=batch2,
+                                                         processor_job=processor_job)
+        processor_job_to_batch2.save()
+
+        kwargs = utils.start_job({"job": processor_job})
+        # start_job preserves the "job" key
+        self.assertEqual(kwargs["job"], processor_job)
+
+        # start_job finds the batches and returns them
+        self.assertEqual(len(kwargs["batches"]), 2)
+
+    def test_failure(self):
+        """Fails because there are no batches for the job."""
+        processor_job = ProcessorJob()
+        processor_job.save()
+
+        kwargs = utils.start_job({"job": processor_job})
+        self.assertFalse(kwargs["success"])
+
+
+class EndJobTestCase(TestCase):
+    def test_success(self):
+        batch = init_batch()
+        batch2 = copy.deepcopy(batch)
+        batch2.name = "CE2345"
+        batch.save()
+        batch2.save()
+
+        processor_job = ProcessorJob()
+        processor_job.save()
+
+        utils.end_job({"job": processor_job,
+                       "batches": [batch, batch2]})
+
+        processor_job.refresh_from_db()
+        self.assertTrue(processor_job.success)
+        self.assertIsNotNone(processor_job.end_time)
+
+        batches = Batch.objects.all()
+        for batch in batches:
+            self.assertEqual(batch.status, BatchStatuses.PROCESSED.value)
+
+    def test_failure(self):
+        batch = init_batch()
+        batch2 = copy.deepcopy(batch)
+        batch2.name = "CE2345"
+        batch.save()
+        batch2.save()
+
+        processor_job = ProcessorJob()
+        processor_job.save()
+
+        utils.end_job({"success": False,
+                       "job": processor_job,
+                       "batches": [batch, batch2]})
+
+        processor_job.refresh_from_db()
+        self.assertFalse(processor_job.success)
+        self.assertIsNotNone(processor_job.end_time)
+
+        batches = Batch.objects.all()
+        for batch in batches:
+            self.assertEqual(batch.status, BatchStatuses.DOWNLOADED.value)
+
+
+class RunPipelineTestCase(TestCase):
+    def test_no_job(self):
+        mock_processor = MagicMock()
+        utils.run_pipeline({"job_id": 100}, [mock_processor])
+        mock_processor.assert_not_called()
+
+    def test_processor_failure(self):
+        processor_job = ProcessorJob()
+        processor_job.save()
+        job_dict = {"job_id": processor_job.id,
+                    "job": processor_job}
+
+        mock_processor = MagicMock()
+        mock_processor.__name__ = "Fake processor."
+        return_dict = copy.copy(job_dict)
+        return_dict["success"] = False
+        mock_processor.return_value = return_dict
+
+        utils.run_pipeline(job_dict, [mock_processor])
+        mock_processor.assert_called_once()
+        processor_job.refresh_from_db()
+        self.assertFalse(processor_job.success)
+        self.assertIsNotNone(processor_job.end_time)
+
+    def test_value_passing(self):
+        """The keys added to kwargs and returned by processors will be
+        passed through to other processors.
+        """
+        batch = init_batch()
+        batch.save()
+        processor_job = ProcessorJob()
+        processor_job.save()
+        processor_jobs_to_batches = ProcessorJobsToBatches(batch=batch,
+                                                           processor_job=processor_job)
+        processor_jobs_to_batches.save()
+
+        mock_processor = MagicMock()
+        mock_dict = {"something_to_pass_along": True,
+                     "job": processor_job,
+                     "batches": [batch]}
+        mock_processor.return_value = mock_dict
+
+        def processor_function(kwargs):
+            self.assertTrue(kwargs["something_to_pass_along"])
+            return kwargs
+
+        test_processor = MagicMock(side_effect=processor_function)
+
+        utils.run_pipeline({"job_id": processor_job.id},
+                           [utils.start_job,
+                            mock_processor,
+                            test_processor,
+                            utils.end_job])
+
+        processor_job.refresh_from_db()
+        self.assertTrue(processor_job.success)
+        self.assertIsNotNone(processor_job.end_time)
+
+        batch.refresh_from_db()
+        self.assertEqual(batch.status, BatchStatuses.PROCESSED.value)
diff --git a/workers/data_refinery_workers/processors/utils.py b/workers/data_refinery_workers/processors/utils.py
index be402223f..b803056f8 100644
--- a/workers/data_refinery_workers/processors/utils.py
+++ b/workers/data_refinery_workers/processors/utils.py
@@ -1,9 +1,6 @@
-import os
-import urllib
-import shutil
 from django.utils import timezone
 from typing import List, Dict, Callable
-from data_refinery_models.models import Batch, BatchStatuses, ProcessorJob
+from data_refinery_models.models import BatchStatuses, ProcessorJob, ProcessorJobsToBatches
 
 # Import and set logger
 import logging
@@ -15,27 +12,34 @@
 
 
 def start_job(kwargs: Dict):
-    """Record in the database that this job is being started and
-    retrieves the job's batch from the database and
-    adds it to the dictionary passed in with the key 'batch'."""
+    """A processor function to start jobs.
+
+    Record in the database that this job is being started and
+    retrieves the job's batches from the database and adds them to the
+    dictionary passed in with the key 'batches'.
+    """
     job = kwargs["job"]
     job.worker_id = "For now there's only one. For now..."
     job.start_time = timezone.now()
     job.save()
 
-    try:
-        batch = Batch.objects.get(id=job.batch_id)
-    except Batch.DoesNotExist:
-        logger.error("Cannot find batch record with ID %d.", job.batch_id)
+    batch_relations = ProcessorJobsToBatches.objects.filter(processor_job_id=job.id)
+    batches = [br.batch for br in batch_relations]
+
+    if len(batches) == 0:
+        logger.error("No batches found for job #%d.", job.id)
         return {"success": False}
 
-    kwargs["batch"] = batch
+    kwargs["batches"] = batches
     return kwargs
 
 
 def end_job(kwargs: Dict):
-    """Record in the database that this job has completed and that
-    the batch has been processed if successful."""
+    """A processor function to end jobs.
+
+    Record in the database that this job has completed and that
+    the batch has been processed if successful.
+    """
     job = kwargs["job"]
 
     if "success" in kwargs:
@@ -48,51 +52,34 @@ def end_job(kwargs: Dict):
     job.save()
 
     if job.success:
-        batch = kwargs["batch"]
-        batch.status = BatchStatuses.PROCESSED.value
-        batch.save()
+        batches = kwargs["batches"]
+        for batch in batches:
+            batch.status = BatchStatuses.PROCESSED.value
+            batch.save()
 
     # Every processor returns a dict, however end_job is always called
     # last so it doesn't need to contain anything.
     return {}
 
 
-def cleanup_temp_data(kwargs: Dict):
-    """Removes data from raw/ and temp/ directories related to the batch."""
-    batch = kwargs["batch"]
-
-    path = urllib.parse.urlparse(batch.download_url).path
-    raw_file_name = os.path.basename(path)
-    raw_file_location = os.path.join(ROOT_URI,
-                                     "raw",
-                                     batch.internal_location,
-                                     raw_file_name)
-    temp_directory = os.path.join(ROOT_URI, "temp", batch.internal_location)
-    os.remove(raw_file_location)
-    shutil.rmtree(temp_directory)
-
-    return kwargs
-
-
 def run_pipeline(start_value: Dict, pipeline: List[Callable]):
     """Runs a pipeline of processor functions.
 
-    start_value must contain a key 'job_id' which is a valid id
-    for a ProcessorJob record.
+    start_value must contain a key 'job_id' which is a valid id for a
+    ProcessorJob record.
 
     Each processor fuction must accept a dictionary and return a
     dictionary.
 
-    Any processor function which returns a dictionary
-    containing a key of 'success' with a value of False will cause
-    the pipeline to terminate with a call to utils.end_job.
+    Any processor function which returns a dictionary containing a key
+    of 'success' with a value of False will cause the pipeline to
+    terminate with a call to utils.end_job.
 
-    The key 'job' is reserved for the ProcessorJob currently being run.
-    The key 'batch' is reserved for the Batch that is currently being
-    processed.
-    It is required that the dictionary returned by each processor
-    function preserve the mappings for 'job' and 'batch' that were
-    passed into it.
+    The key 'job' is reserved for the ProcessorJob currently being
+    run.  The key 'batches' is reserved for the Batches that are
+    currently being processed.  It is required that the dictionary
+    returned by each processor function preserve the mappings for
+    'job' and 'batches' that were passed into it.
     """
 
     job_id = start_value["job_id"]
@@ -102,6 +89,10 @@ def run_pipeline(start_value: Dict, pipeline: List[Callable]):
         logger.error("Cannot find processor job record with ID %d.", job_id)
         return
 
+    if len(pipeline) == 0:
+        logger.error("Empty pipeline specified for job #%d.",
+                     job_id)
+
     last_result = start_value
     last_result["job"] = job
     for processor in pipeline:
diff --git a/workers/run_tests.sh b/workers/run_tests.sh
new file mode 100755
index 000000000..6691e1beb
--- /dev/null
+++ b/workers/run_tests.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Script for executing Django PyUnit tests within a Docker container.
+
+# This script should always run as if it were being called from
+# the directory it lives in.
+script_directory=`dirname "${BASH_SOURCE[0]}"  | xargs realpath`
+cd $script_directory
+
+# However in order to give Docker access to all the code we have to
+# move up a level
+cd ..
+
+docker build -t dr_worker -f workers/Dockerfile.tests .
+
+HOST_IP=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+
+docker run \
+       --add-host=database:$HOST_IP \
+       --env-file workers/environments/test \
+       -i dr_worker test --no-input "$@"
diff --git a/workers/tester.sh b/workers/tester.sh
index 69af5cfb4..f2f78abef 100755
--- a/workers/tester.sh
+++ b/workers/tester.sh
@@ -16,7 +16,7 @@ docker build -t test_master -f workers/Dockerfile .
 HOST_IP=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
 
 docker run \
-       --link some-rabbit:rabbit \
+       --link message-queue:rabbit \
        --add-host=database:$HOST_IP \
        --env-file workers/environments/dev \
        --entrypoint ./manage.py \
diff --git a/workers/worker.sh b/workers/worker.sh
index b628ff007..ee5e49937 100755
--- a/workers/worker.sh
+++ b/workers/worker.sh
@@ -23,7 +23,7 @@ docker build -t dr_worker -f workers/Dockerfile .
 HOST_IP=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
 
 docker run \
-       --link some-rabbit:rabbit \
+       --link message-queue:rabbit \
        --name worker1 \
        --add-host=database:$HOST_IP \
        --env-file workers/environments/dev \