diff --git a/.gitignore b/.gitignore
index 88aa4b6..e138b05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,12 +31,6 @@ _site/
 *.sqlite3
 !sample/sample.sqlite3
 
-# Crawl files, except for the sample crawl #
-############################################
-*.warc
-*.warc.gz
-!sample/crawl.warc.gz
-
 # OS generated files #
 ######################
 .DS_Store
diff --git a/crawler/management/commands/warc_to_csv.py b/crawler/management/commands/warc_to_csv.py
deleted file mode 100644
index 882842a..0000000
--- a/crawler/management/commands/warc_to_csv.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import csv
-
-import djclick as click
-
-from crawler.models import Component, Error, Link, Page, Redirect
-from crawler.reader import generate_instances
-
-
-@click.command()
-@click.argument("warc", type=click.File("rb"))
-@click.option(
-    "--pages-csv",
-    type=click.File("w", encoding="utf-8-sig"),
-    default="pages.csv",
-    show_default=True,
-)
-@click.option(
-    "--errors-csv",
-    type=click.File("w", encoding="utf-8-sig"),
-    default="errors.csv",
-    show_default=True,
-)
-@click.option(
-    "--redirects-csv",
-    type=click.File("w", encoding="utf-8-sig"),
-    default="redirects.csv",
-    show_default=True,
-)
-@click.option(
-    "--links-csv",
-    type=click.File("w", encoding="utf-8-sig"),
-    default="links.csv",
-    show_default=True,
-)
-@click.option(
-    "--components-csv",
-    type=click.File("w", encoding="utf-8-sig"),
-    default="components.csv",
-    show_default=True,
-)
-@click.option(
-    "--max-pages", type=int, help="Maximum number of pages to read from the archive"
-)
-def command(
-    warc, pages_csv, errors_csv, redirects_csv, links_csv, components_csv, max_pages
-):
-    writers = {
-        model: csv.writer(model_csv, csv.QUOTE_ALL)
-        for model, model_csv in {
-            Page: pages_csv,
-            Error: errors_csv,
-            Redirect: redirects_csv,
-            Link: links_csv,
-            Component: components_csv,
-        }.items()
-    }
-
-    for instance in generate_instances(warc, max_pages=max_pages):
-        if isinstance(instance, Page):
-            writers[Page].writerow(
-                [
-                    instance.timestamp,
-                    instance.url,
-                    instance.title,
-                    instance.language,
-                ]
-            )
-
-            for component in instance.components.all():
-                writers[Component].writerow(
-                    [
-                        instance.url,
-                        component.class_name,
-                    ]
-                )
-
-            for link in instance.links.all():
-                writers[Link].writerow(
-                    [
-                        instance.url,
-                        link.href,
-                    ]
-                )
-        elif isinstance(instance, Error):
-            writers[Error].writerow(
-                [
-                    instance.timestamp,
-                    instance.url,
-                    instance.status_code,
-                    instance.referrer,
-                ]
-            )
-        elif isinstance(instance, Redirect):
-            writers[Redirect].writerow(
-                [
-                    instance.timestamp,
-                    instance.url,
-                    instance.status_code,
-                    instance.referrer,
-                    instance.location,
-                ]
-            )
-        else:
-            raise ValueError(instance)
diff --git a/crawler/management/commands/warc_to_db.py b/crawler/management/commands/warc_to_db.py
deleted file mode 100644
index 43f348c..0000000
--- a/crawler/management/commands/warc_to_db.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os
-import os.path
-from collections import defaultdict
-
-from django.db import connections
-from django.conf import settings
-from django.core.management import call_command
-from django.test import override_settings
-
-import djclick as click
-
-from crawler.reader import generate_instances
-from crawler.writer import DatabaseWriter
-
-
-@click.command()
-@click.argument("warc", type=click.File("rb"))
-@click.argument("db_filename", type=click.Path())
-@click.option(
-    "--max-pages", type=int, help="Maximum number of pages to read from the archive"
-)
-@click.option(
-    "--recreate",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Recreate database file if it already exists.",
-)
-@click.option(
-    "--noinput",
-    "--no-input",
-    is_flag=True,
-    default=False,
-    help="Do not prompt the user for input of any kind.",
-)
-@click.option(
-    "--multiple-domains/--no-multiple-domains",
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Limit pages to the first domain seen.",
-)
-def command(warc, db_filename, max_pages, recreate, noinput, multiple_domains):
-    if os.path.exists(db_filename):
-        if not recreate:
-            if noinput:
-                raise click.ClickException(
-                    f"File {db_filename} already exists, use --recreate to recreate."
-                )
-
-            click.confirm(
-                f"File {db_filename} already exists, do you wish to recreate?",
-                abort=True,
-            )
-
-        os.remove(db_filename)
-
-    db_alias = "warc_to_db"
-
-    connections.databases[db_alias] = {
-        "ENGINE": "django.db.backends.sqlite3",
-        "NAME": db_filename,
-    }
-
-    click.echo("Creating empty database tables...")
-    call_command("migrate", database=db_alias, app_label="crawler", run_syncdb=True)
-
-    click.echo("Reading WARC content into database tables...")
-    writer = DatabaseWriter(db_alias)
-
-    for instance in generate_instances(
-        warc, max_pages=max_pages, single_domain_only=not multiple_domains
-    ):
-        writer.write(instance)
-
-    writer.analyze()
diff --git a/crawler/reader.py b/crawler/reader.py
deleted file mode 100644
index a2eb866..0000000
--- a/crawler/reader.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import os.path
-import re
-from urllib.parse import urlparse, urlunparse
-
-import click
-import lxml.html
-from warcio.archiveiterator import ArchiveIterator
-
-from crawler.models import Component, Error, Link, Page, Redirect
-
-
-WHITESPACE = re.compile(r"\s+")
-
-
-COMPONENT_SEARCH = re.compile(r"(?:(?:class=\")|\s)((?:o|m|a)-[\w\-]*)")
-
-
-def read_warc_records(warc, silent=False):
-    iterator = ArchiveIterator(warc)
-    progress_bar = None
-    progress_last = 0
-    warc_request = None
-
-    if not silent:
-        file_size = os.path.getsize(warc.name)
-        progress_bar = click.progressbar(length=file_size)
-
-    for warc_record in iterator:
-        if warc_record.rec_type == "request":
-            warc_request = warc_record
-        else:
-            if warc_record.rec_type == "response":
-                yield warc_request, warc_record
-
-            warc_request = None
-
-        if progress_bar:
-            progress_current = iterator.fh.tell()
-            progress_step = progress_current - progress_last
-            progress_bar.update(progress_step)
-            progress_last = progress_current
-
-
-def get_body(tree):
-    body = tree.find("./body")
-
-    if body is not None:
-        drop_element_selectors = [
-            ".o-header",
-            ".o-footer",
-            ".skip-nav",
-            "img",
-            "script",
-            "style",
-        ]
-
-        for drop_element_selector in drop_element_selectors:
-            for element in body.cssselect(drop_element_selector):
-                element.drop_tree()
-
-    return body
-
-
-def make_instance_from_warc_record(
-    warc_request, warc_response, seen_urls, limit_domain
-):
-    url = warc_response.rec_headers.get_header("WARC-Target-URI")
-
-    # Skip non-HTTP responses (e.g. DNS lookups).
-    if not warc_response.http_headers:
-        return
-
-    # This code is needed because, surprisingly, WARCs may contain multiple
-    # records pointing to the same URL. This can happen if multiple redirects
-    # or relative links point to the same target URL. We only want to generate
-    # records for each URL a single time, so we keep a record of which ones
-    # we've already seen.
-    if url in seen_urls:
-        return
-
-    seen_urls.add(url)
-
-    status_code = int(warc_response.http_headers.get_statuscode())
-    content_type = warc_response.http_headers.get_header("Content-Type")
-    timestamp = warc_response.rec_headers.get_header("WARC-Date")
-
-    if warc_request:
-        referrer = warc_request.http_headers.get_header("Referer")
-    else:
-        referrer = None
-
-    if status_code >= 300:
-        if status_code < 400:
-            location = warc_response.http_headers.get("Location")
-            return Redirect(
-                timestamp=timestamp,
-                url=url,
-                status_code=status_code,
-                referrer=referrer,
-                location=location,
-            )
-        else:
-            return Error(
-                timestamp=timestamp, url=url, status_code=status_code, referrer=referrer
-            )
-
-        return
-
-    if 200 != status_code:
-        raise ValueError(f"Unexpected status code {status_code} for {url}")
-
-    if not content_type:
-        raise ValueError(f"Missing content type for {url}")
-
-    if not content_type.startswith("text/html"):
-        return
-
-    if limit_domain and not url.startswith(limit_domain):
-        return
-
-    html = warc_response.content_stream().read().decode("utf-8")
-    tree = lxml.html.fromstring(html)
-    title_tag = tree.find(".//title")
-    title = title_tag.text.strip() if title_tag is not None else None
-    language = tree.find(".").get("lang")
-
-    if title is None:
-        return
-
-    body = get_body(tree)
-
-    if body is not None:
-        text = WHITESPACE.sub(" ", body.text_content()).strip()
-    else:
-        text = None
-
-    page = Page(
-        timestamp=timestamp,
-        url=url,
-        title=title,
-        language=language,
-        html=html,
-        text=text,
-    )
-
-    hrefs = set(
-        href
-        for element, attribute, href, pos in body.iterlinks()
-        if "a" == element.tag and "href" == attribute
-    )
-
-    page.links = [Link(href=href) for href in sorted(hrefs)]
-
-    body_html = lxml.etree.tostring(body, encoding="unicode")
-
-    class_names = set(COMPONENT_SEARCH.findall(body_html))
-    page.components = [
-        Component(class_name=class_name) for class_name in sorted(class_names)
-    ]
-
-    return page
-
-
-def generate_instances(warc, max_pages=None, single_domain_only=True, silent=False):
-    page_count = 0
-    seen_urls = set()
-    limit_domain = None
-
-    for warc_request, warc_response in read_warc_records(warc, silent=silent):
-        instance = make_instance_from_warc_record(
-            warc_request, warc_response, seen_urls, limit_domain
-        )
-
-        if not instance:
-            continue
-
-        yield instance
-
-        if isinstance(instance, Page):
-            page_count += 1
-
-            if max_pages and page_count >= max_pages:
-                break
-
-            if single_domain_only and not limit_domain:
-                parsed = urlparse(instance.url)
-                limit_domain = urlunparse(
-                    (parsed.scheme, parsed.netloc, "/", "", "", "")
-                )
diff --git a/crawler/wpull_plugin.py b/crawler/wpull_plugin.py
index af85d08..78993bd 100644
--- a/crawler/wpull_plugin.py
+++ b/crawler/wpull_plugin.py
@@ -68,7 +68,7 @@ def deactivate(self):
         self.db_writer.analyze()
 
     def init_db(self):
-        db_alias = "warc_to_db"
+        db_alias = "crawler"
 
         connections.databases[db_alias] = {
             "ENGINE": "django.db.backends.sqlite3",
diff --git a/list_qs_params.py b/list_qs_params.py
deleted file mode 100755
index 48749b7..0000000
--- a/list_qs_params.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-
-# Given a pages.csv file generated by "read_warc.py dump_csvs", output a list
-# of query string parameters used across all page URLs. Useful for creating a
-# proper --reject-regex parameter for wget, for example:
-#
-# ./list_qs_params.py pages.csv \
-#     --ignore-param=page \
-#     --ignore-param=ext_url \
-#     --ignore-param=signature \
-#     --wget-reject-regex
-#
-# This command will dump out a list of query string parameters, excluding
-# "page", "ext_url", and "signature" in a format compatible with
-# wget --reject-regex:
-#
-# CatID=|NavCode=|_gl=|activity_type=|...
-#
-# See relevant wget documentation at:
-#
-# https://www.gnu.org/software/wget/manual/html_node/Recursive-Accept_002fReject-Options.html
-import argparse
-import csv
-from itertools import chain
-from operator import itemgetter
-from urllib.parse import parse_qs, urlparse
-
-
-def list_qs_params(pages_csv, ignore_param, wget_reject_regex):
-    reader = csv.reader(pages_csv)
-    urls = list(map(itemgetter(1), reader))
-    params = sorted(set(chain(*(parse_qs(urlparse(url).query).keys() for url in urls))))
-    params = [p for p in params if p not in ignore_param]
-
-    if wget_reject_regex:
-        print("|".join(f"{param}=" for param in params))
-    else:
-        print(params)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("pages_csv", type=argparse.FileType("r", encoding="utf-8"))
-    parser.add_argument("--ignore-param", nargs="*", default=[])
-    parser.add_argument("--wget-reject-regex", action="store_true")
-
-    args = parser.parse_args()
-
-    list_qs_params(**vars(args))
diff --git a/requirements/base.txt b/requirements/base.txt
index 91679fe..cc5e6c2 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -9,7 +9,6 @@ django-modelcluster==5.3
 djangorestframework==3.13.1
 djangorestframework-csv==2.1.1
 lxml==4.9.1
-warcio==1.7.4
 whitenoise==5.3.0
 wpull==2.0.1
 
diff --git a/wget_crawl.sh b/wget_crawl.sh
deleted file mode 100755
index b8d4300..0000000
--- a/wget_crawl.sh
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env bash
-
-# Recursively crawl a website and save its HTML locally.
-#
-# Example usage:
-#
-# ./wget_crawl.sh [-d depth] https://www.consumerfinance.gov/
-#
-# Optionally specify -d depth to limit the crawl depth.
-
-# If a command fails, stop executing this script and return its error code.
-set -e
-
-depth=0
-
-while getopts ":d:" opt; do
-    case $opt in
-      d )
-        depth="$OPTARG";
-        number_regex='^[0-9]+$'
-        if ! [[ $depth =~ $number_regex ]] ; then
-            echo "Crawl depth must be a number." 1>&2
-            exit 1
-        fi
-        ;;
-    \? )
-        echo "Invalid option: -$OPTARG." 1>&2
-        exit 1
-        ;;
-    : )
-        echo "Invalid option: -$OPTARG requires an argument." 1>&2
-        exit 1
-        ;;
-    esac
-done
-
-shift $((OPTIND -1))
-
-url=$1
-
-if [ -z "$url" ]; then
-  echo "Must specify URL to crawl."
-  exit 1
-fi
-
-echo "Starting crawl at $url."
-
-domain=$url
-domain="${domain#http://}"
-domain="${domain#https://}"
-domain="${domain%%:*}"
-domain="${domain%%\?*}"
-domain="${domain%%/*}"
-echo "Limiting crawl to domain $domain."
-
-if [ $depth -ne 0 ]; then
-    echo "Limiting crawl to depth $depth."
-fi
-
-# Crawl into a temporary directory to avoid potential unexpected overwriting
-# due to use of --trust-server-names.
-# See https://nvd.nist.gov/vuln/detail/CVE-2010-2252.
-tmp_dir=$(mktemp -d -t wget-$(date +%Y-%m-%d-%H-%M-%S)-XXXXXXXX)
-echo "Working in $tmp_dir."
-
-pushd "$tmp_dir" > /dev/null
-
-time wget \
-    --domains="$domain" \
-    --no-verbose \
-    --delete-after \
-    --no-directories \
-    --warc-file=crawl \
-    --warc-cdx=on \
-    --warc-tempdir="$tmp_dir" \
-    --execute robots=off \
-    --wait=0.5 \
-    --random-wait \
-    --ignore-case \
-    --no-hsts \
-    --reject '*.css,*.csv,*.do,*.doc,*.docx,*.epub,*.gif,*.ico,*.jpg,*.js,*.json,*.mp3,*.pdf,*.png,*.pptx,*.py,*.r,*.sas,*.sps,*.svg,*.tmp,*.txt,*.wav,*.webmanifest,*.woff,*.woff2,*.xls,*xlsx,*.xml,*.zip' \
-    --reject-regex "CatID=|NavCode=|_gl=|activity_type=|authors=|book=|categories=|chartType=|charttype=|clhx=|dateInterval=|date_received_min=|dateinterval=|entx=|ext_url=|fdx=|filter1_topics=|filter2_topics=|form-id=|gib=|gpl=|grade_level=|has_narrative=|hltx=|hous=|houx=|insi=|insl=|inst=|iped=|issue=|language=|lens=|mta=|oid=|othg=|othr=|othx=|parl=|pelg=|perl=|pid=|ppl=|product=|prvf=|prvi=|prvl=|q=|regs=|retx=|schg=|school_subject=|searchField=|search_field=|searchfield=|signature=|size=|sort=|stag=|subl=|tab=|taxx=|title=|topic=|topics=|totl=|tran=|trnx=|tuit=|unsl=|utm_campaign=|utm_medium=|utm_source=|wkst=" \
-    --recursive \
-    --level="$depth" \
-    --user-agent="crawsqueal" \
-    "$url" 2>&1 | tee wget.log
-
-popd > /dev/null
-
-# Copy back log and WARC file from temporary directory.
-cp "$tmp_dir"/wget.log .
-cp "$tmp_dir"/crawl.{warc.gz,cdx} .
-
-# Clean up temporary directory.
-rm -rf "$tmp_dir"