diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 00000000..88c9c59b
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,46 @@
+name: Docs Build
+
+on:
+  push:
+    branches:
+      - mkdocs_no_exec
+
+jobs:
+  build_docs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install .
+          pip install -r docs/requirements_docs.txt
+
+      # - name: Convert notebooks to HTML
+      #   # if: ${{ github.event_name == 'push' && contains(github.event.head_commit.modified, 'Tutorial/') && contains(github.event.head_commit.modified, '.ipynb') }}
+      #   run: |
+      #     # jupyter nbconvert --to html --allow-errors --no-input --show-input --template classic --output-dir docs/tutorial Tutorial/*.ipynb
+      #     jupyter nbconvert --to html --allow-errors --template classic --output-dir docs/tutorial Tutorial/*.ipynb
+
+      # - name: Build Tutorial Table of Contents
+      #   run: |
+      #     bash docs/scripts/build_tutorial_toc.sh
+
+      - name: Build Documentation sources
+        run: |
+          bash docs/scripts/build_docs_sources.sh
+
+      - name: Build mkdocs.yml
+        run: |
+          bash docs/scripts/build_mkdocs.sh
+
+      - name: Build and Deploy Docs
+        run: |
+          mkdocs gh-deploy --force --clean --verbose
diff --git a/.github/workflows/publish_package.yml b/.github/workflows/publish_package.yml
index 7f6e1931..fc2f200d 100644
--- a/.github/workflows/publish_package.yml
+++ b/.github/workflows/publish_package.yml
@@ -29,38 +29,4 @@ jobs:
         TWINE_USERNAME: __token__
         TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
       run: twine upload dist/*
-  build-and-publish-conda:
-    needs: build-and-publish-pypi
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
-
-    - name: Setup Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.10'
-
-    - name: Install dependencies
-      run: |
-         conda install -c conda-forge grayskull conda-build anaconda-client -y
-
-    - name: Build conda recipe from PYPI
-      run: |
-        $CONDA/bin/grayskull pypi tpot2
-    
-    - name: Update meta.yaml
-      run: |
-        sed -i "/tpot2 --help/d" tpot2/meta.yaml
-        sed -i "/^  license:/ s/.*/  license: LGPLv3/" tpot2/meta.yaml
-        sed -i "/^  license_file:/ s/.*/  license_file: ..\/LICENSE/" tpot2/meta.yaml
-        
-    - name: Build package with conda-forge
-      run: |
-        $CONDA/bin/conda-build tpot2 -c conda-forge
-
-    - name: Upload to conda-forge
-      env:
-        CONDA_API_TOKEN: ${{ secrets.CONDA_API_TOKEN }}
-      run: |
-        $CONDA/bin/anaconda -t $CONDA_API_TOKEN upload -u conda-forge $CONDA/conda-bld/noarch/*.tar.bz2
+  
diff --git a/.gitignore b/.gitignore
index 2ca5f1f5..a9df30b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,6 @@ cache_folder/
 dask-worker-space/
 .tox/
 *.egg-info/
-.coverage
\ No newline at end of file
+.coverage
+target/
+.venv/
\ No newline at end of file
diff --git a/docs/Tutorial b/docs/Tutorial
new file mode 120000
index 00000000..8883c00a
--- /dev/null
+++ b/docs/Tutorial
@@ -0,0 +1 @@
+../Tutorial
\ No newline at end of file
diff --git a/docs/cite.md b/docs/cite.md
new file mode 100644
index 00000000..415482d2
--- /dev/null
+++ b/docs/cite.md
@@ -0,0 +1 @@
+# Citing TPOT2
\ No newline at end of file
diff --git a/docs/contribute.md b/docs/contribute.md
new file mode 100644
index 00000000..ae86a06b
--- /dev/null
+++ b/docs/contribute.md
@@ -0,0 +1,3 @@
+# Contributing
+
+We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it.
\ No newline at end of file
diff --git a/docs/css/extra.css b/docs/css/extra.css
new file mode 100644
index 00000000..f025d58f
--- /dev/null
+++ b/docs/css/extra.css
@@ -0,0 +1,3 @@
+.md-grid {
+    max-width: 100%;
+}
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..e9047a65
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,3 @@
+{%
+    include-markdown "../README.md"
+%}
\ No newline at end of file
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 00000000..107687e6
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,34 @@
+# Installation
+
+TPOT2 requires a working installation of Python.
+
+### Creating a conda environment (optional)
+
+We recommend using conda environments for installing TPOT2, though it would work equally well if manually installed without it.
+
+[More information on making anaconda environments found here.](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)
+
+```
+conda create --name tpot2env python=3.10
+conda activate tpot2env
+```
+
+### Note for M1 Mac or other Arm-based CPU users
+
+You need to install the lightgbm package directly from conda using the following command before installing TPOT2. 
+
+This is to ensure that you get the version that is compatible with your system.
+
+```
+conda install --yes -c conda-forge 'lightgbm>=3.3.3'
+```
+
+### Developer/Latest Branch Installation
+
+
+```
+pip install -e /path/to/tpot2repo
+```
+
+If you downloaded with git pull, then the repository folder will be named TPOT2. (Note: this folder is the one that includes setup.py inside of it and not the folder of the same name inside it).
+If you downloaded as a zip, the folder may be called tpot2-main. 
diff --git a/docs/related.md b/docs/related.md
new file mode 100644
index 00000000..9d2e12dd
--- /dev/null
+++ b/docs/related.md
@@ -0,0 +1,112 @@
+Other Automated Machine Learning (AutoML) tools and related projects:
+
+<table>
+<tr>
+<th width="20%">Name</th>
+<th width="15%">Language</th>
+<th width="15%">License</th>
+<th>Description</th>
+</tr>
+<tr>
+<td><a target="_blank" href="http://www.cs.ubc.ca/labs/beta/Projects/autoweka/">Auto-WEKA</a></td>
+<td>Java</td>
+<td>GPL-v3</td>
+<td>Automated model selection and hyper-parameter tuning for Weka models.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/automl/auto-sklearn">auto-sklearn</a></td>
+<td>Python</td>
+<td>BSD-3-Clause</td>
+<td>An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/ClimbsRocks/auto_ml">auto_ml</a></td>
+<td>Python</td>
+<td>MIT</td>
+<td>Automated machine learning for analytics & production. Supports manual feature type declarations.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html">H2O AutoML</a></td>
+<td>Java with Python, Scala & R APIs and web GUI</td>
+<td>Apache 2.0</td>
+<td>Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/joeddav/devol">devol</a></td>
+<td>Python</td>
+<td>MIT</td>
+<td>Automated deep neural network design via genetic programming.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/AxeldeRomblay/MLBox">MLBox</a></td>
+<td>Python</td>
+<td>BSD-3-Clause</td>
+<td>Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/RecipeML/Recipe">Recipe</a></td>
+<td>C</td>
+<td>GPL-v3</td>
+<td>Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/reiinakano/xcessiv">Xcessiv</a></td>
+<td>Python</td>
+<td>Apache 2.0</td>
+<td>A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.</td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/PGijsbers/gama">GAMA</a></td>
+<td>Python</td>
+<td>Apache 2.0</td>
+<td>Machine-learning pipeline optimization through asynchronous evaluation based genetic programming. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://pymoo.org">PyMoo</a></td>
+<td>Python</td>
+<td>Apache 2.0</td>
+<td>Multi-objective optimization in Python. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/kstaats/karoo_gp">Karoo GP</a></td>
+<td>Python</td>
+<td>MIT</td>
+<td>A Python based genetic programming application suite with support for symbolic regression and classification. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/Hintzelab/MABE/wiki">MABE</a></td>
+<td>C++</td>
+<td><a target="_blank" href="https://github.com/Hintzelab/MABE/wiki/license">See here</a></td>
+<td>A Python based genetic programming application suite with support for symbolic regression and classification. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/jpbonson/SBBFramework">SBBFramework</a></td>
+<td>Python</td>
+<td>BSD-2-Clause</td>
+<td>Python implementation of Symbiotic Bid-Based (SBB) framework for problem decomposition using Genetic Programming (GP). </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/moshesipper/tiny_gp">Tiny GP</a></td>
+<td>Python</td>
+<td>GPL-v3</td>
+<td>A minimalistic program implementing Koza-style (tree-based) genetic programming to solve a symbolic regression problem. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/alegonz/baikal">Baikal</a></td>
+<td>Python</td>
+<td>BSD-3-Clause</td>
+<td>A graph-based functional API for building complex scikit-learn pipelines. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/scikit-learn-contrib/skdag">skdag</a></td>
+<td>Python</td>
+<td>MIT</td>
+<td>A more flexible alternative to scikit-learn Pipelines. </td>
+</tr>
+<tr>
+<td><a target="_blank" href="https://github.com/d6t/d6tflow">d6tflow</a></td>
+<td>Python</td>
+<td>MIT</td>
+<td>A python library which makes building complex data science workflows easy, fast and intuitive. </td>
+</tr>
+</table>
diff --git a/docs/requirements_docs.txt b/docs/requirements_docs.txt
new file mode 100644
index 00000000..8075cc15
--- /dev/null
+++ b/docs/requirements_docs.txt
@@ -0,0 +1,7 @@
+mkdocs==1.4.2
+mkdocs-material==9.1.6
+mkdocs-include-markdown-plugin==4.0.4
+nbconvert==7.4.0
+mkdocs-jupyter==0.24.1
+mkdocstrings==0.21.2
+mkdocstrings-python==0.10.1
\ No newline at end of file
diff --git a/docs/scripts/build_docs_sources.sh b/docs/scripts/build_docs_sources.sh
new file mode 100644
index 00000000..52089cf7
--- /dev/null
+++ b/docs/scripts/build_docs_sources.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+function iterate_files() {
+    local directory="$1"
+    base_dir="docs/documentation"
+
+    for file in "$directory"/*; do
+        if [ -f "$file" ] && [[ "$file" == *.py ]] && [ "$(basename "$file")" != "__init__.py" ] && \
+            ! echo "$file" | grep -q "test" && [ "$(basename "$file")" != "graph_utils.py" ]; then
+            directories=$base_dir/$(dirname "$file")
+            file_name=$(basename "$file")
+            md_file=$directories/"${file_name%.*}".md
+
+            mkdir -p $directories && touch $md_file
+            include_line=$(dirname "$file")
+            include_line="${include_line//\//.}"."${file_name%.*}"
+            echo "::: $include_line" > $md_file
+
+        elif [ -d "$file" ]; then
+            iterate_files "$file"
+        fi
+    done
+}
+
+iterate_files "tpot2"
diff --git a/docs/scripts/build_mkdocs.sh b/docs/scripts/build_mkdocs.sh
new file mode 100644
index 00000000..4800a52a
--- /dev/null
+++ b/docs/scripts/build_mkdocs.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+cat > mkdocs.yml <<EOF
+site_name: TPOT2
+site_url: http://epistasislab.github.io/tpot2
+
+repo_url: https://github.com/epistasislab/tpot2
+edit_uri: edit/main/source
+
+plugins: 
+  - include-markdown
+  - search
+  - mkdocs-jupyter:
+  #     ignore_h1_titles: True
+  #     execute: False
+  #     include_source: True
+  - mkdocstrings:
+      handlers: 
+        python:
+          options:
+            docstring_style: numpy
+            show_root_full_path: False
+            # show_root_toc_entry: False
+  # # temp plugin
+  # - exclude:
+  #     glob:
+  #       - tutorial/*
+
+extra_css:
+  - css/extra.css
+
+theme:
+  name: material
+  features:
+    # - toc.integrate
+    - search.suggest
+    - search.highlight
+  palette:
+    # light mode
+    - scheme: default
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+
+    # dark mode
+    - scheme: slate
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+
+markdown_extensions:
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences
+  - toc:
+      permalink: true
+  - tables
+  - fenced_code
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+
+docs_dir: docs
+site_dir: target/site
+
+nav:
+  - Home: index.md
+  - Installation: installation.md
+  - Using TPOT2: using.md
+EOF
+# static pages
+echo "  - TPOT2 API:" >> mkdocs.yml
+echo "    - tpot2_api/estimator.md" >> mkdocs.yml
+echo "    - tpot2_api/classifier.md" >> mkdocs.yml
+echo "    - tpot2_api/regressor.md" >> mkdocs.yml
+echo "  - Examples:" >> mkdocs.yml
+for file in docs/Tutorial/*.ipynb; do
+  base=$(basename $file .ipynb)
+    echo "    - Tutorial/$base.ipynb" >> mkdocs.yml
+done
+echo "  - Documentation:" >> mkdocs.yml
+function iterate_source_files() {
+  local directory="$1"
+
+  for file in "$directory"/*; do
+    if [ -f "$file" ] && [[ "$file" == *.md ]]; then
+      slash_count=$(echo "$file" | grep -o '/' | wc -l)
+      num_spaces=$((slash_count * 2))
+      spaces=$(printf "%*s" $num_spaces)
+      echo "$spaces- ${file#*/}" >> mkdocs.yml
+    fi
+  done
+
+  for file in "$directory"/*; do
+    if [ -d "$file" ]; then
+      slash_count=$(echo "$file" | grep -o '/' | wc -l)
+      num_spaces=$((slash_count * 2))
+      spaces=$(printf "%*s" $num_spaces)
+      last_dir=$(basename "$file")
+      echo "$spaces- $last_dir:" >> mkdocs.yml
+      iterate_source_files "$file"
+    fi
+  done
+}
+iterate_source_files "docs/documentation"
+# make these static instead
+# for file in docs/*.md; do
+#   base=$(basename $file .md)
+#   if [ "$base" == "index" ]; then
+#     continue
+#   fi
+#   echo "  - $base.md" >> mkdocs.yml
+# done
+echo "  - contribute.md" >> mkdocs.yml
+echo "  - cite.md" >> mkdocs.yml
+echo "  - support.md" >> mkdocs.yml
+echo "  - related.md" >> mkdocs.yml
+# moved to the top
+# # test docstring
+# # echo "  - Tutorials:" >> mkdocs.yml
+# for file in docs/tutorial/*.ipynb; do
+#   base=$(basename $file .ipynb)
+#     echo "    - tutorial/$base.ipynb" >> mkdocs.yml
+# done
diff --git a/docs/scripts/build_tutorial_toc_not_used.sh b/docs/scripts/build_tutorial_toc_not_used.sh
new file mode 100644
index 00000000..948fb089
--- /dev/null
+++ b/docs/scripts/build_tutorial_toc_not_used.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for file in docs/tutorial/*.html; do
+    base=$(basename "$file" .html)
+    echo "<div><embed width=\"100%\" height=\"800\" src=\"../$base.html\" /></div>" > "docs/tutorial/$base.md"
+done
diff --git a/docs/support.md b/docs/support.md
new file mode 100644
index 00000000..aa96d986
--- /dev/null
+++ b/docs/support.md
@@ -0,0 +1,5 @@
+# Support
+
+TPOT2 was developed in the [Artificial Intelligence Innovation (A2I) Lab](http://epistasis.org/) at Cedars-Sinai with funding from the [NIH](http://www.nih.gov/) under grants U01 AG066833 and R01 LM010098. We are incredibly grateful for the support of the NIH and the Cedars-Sinai during the development of this project.
+
+The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.
\ No newline at end of file
diff --git a/docs/tpot2_api/classifier.md b/docs/tpot2_api/classifier.md
new file mode 100644
index 00000000..b8d81f66
--- /dev/null
+++ b/docs/tpot2_api/classifier.md
@@ -0,0 +1 @@
+::: tpot2.tpot_estimator.templates.tpottemplates.TPOTClassifier
\ No newline at end of file
diff --git a/docs/tpot2_api/estimator.md b/docs/tpot2_api/estimator.md
new file mode 100644
index 00000000..d18b41c9
--- /dev/null
+++ b/docs/tpot2_api/estimator.md
@@ -0,0 +1 @@
+::: tpot2.tpot_estimator.estimator
\ No newline at end of file
diff --git a/docs/tpot2_api/regressor.md b/docs/tpot2_api/regressor.md
new file mode 100644
index 00000000..5013fbef
--- /dev/null
+++ b/docs/tpot2_api/regressor.md
@@ -0,0 +1 @@
+::: tpot2.tpot_estimator.templates.tpottemplates.TPOTRegressor
\ No newline at end of file
diff --git a/docs/using.md b/docs/using.md
new file mode 100644
index 00000000..b69b0693
--- /dev/null
+++ b/docs/using.md
@@ -0,0 +1 @@
+# Using TPOT2
\ No newline at end of file
diff --git a/tpot2/base_evolver.py b/tpot2/base_evolver.py
index 290c39a6..7b96e92a 100644
--- a/tpot2/base_evolver.py
+++ b/tpot2/base_evolver.py
@@ -85,141 +85,100 @@ def __init__(   self,
         ----------
         individual_generator : generator
             Generator that yields new base individuals. Used to generate initial population.
-        
         objective_functions : list of callables
             list of functions that get applied to the individual and return a float or list of floats
             If an objective function returns multiple values, they are all concatenated in order 
             with respect to objective_function_weights and early_stop_tol.
-        
         objective_function_weights : list of floats
             list of weights for each objective function. Sign flips whether bigger is better or not
-        
         objective_names : list of strings, default=None
             Names of the objectives. If None, objective0, objective1, etc. will be used
-        
         objective_kwargs : dict, default=None
             Dictionary of keyword arguments to pass to the objective function
-        
         bigger_is_better : bool, default=True
             If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction.
-        
         population_size : int, default=50
             Size of the population
-        
         initial_population_size : int, default=None
             Size of the initial population. If None, population_size will be used.
-        
         population_scaling : int, default=0.5
             Scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
-        
         generations_until_end_population : int, default=1  
             Number of generations until the population size reaches population_size            
-        
         generations : int, default=50
             Number of generations to run
-        
         early_stop : int, default=None
             Number of generations without improvement before early stopping. All objectives must have converged within the tolerance for this to be triggered.
-        
         early_stop_tol : float, list of floats, or None, default=0.001
             -list of floats
                 list of tolerances for each objective function. If the difference between the best score and the current score is less than the tolerance, the individual is considered to have converged
                 If an index of the list is None, that item will not be used for early stopping
             -int 
                 If an int is given, it will be used as the tolerance for all objectives
-        
         max_time_seconds : float, default=float("inf")
             Maximum time to run the optimization. If none or inf, will run until the end of the generations.
-        
         max_eval_time_seconds : float, default=60*5
             Maximum time to evaluate a single individual. If none or inf, there will be no time limit per evaluation.
-        
         n_jobs : int, default=1
             Number of processes to run in parallel.
-        
         memory_limit : str, default="4GB"
             Memory limit for each job. See Dask [LocalCluster documentation](https://distributed.dask.org/en/stable/api.html#distributed.Client) for more information.
-        
         client : dask.distributed.Client, default=None
             A dask client to use for parallelization. If not None, this will override the n_jobs and memory_limit parameters. If None, will create a new client with num_workers=n_jobs and memory_limit=memory_limit. 
-        
         survival_percentage : float, default=1
             Percentage of the population size to utilize for mutation and crossover at the beginning of the generation. The rest are discarded. Individuals are selected with the selector passed into survival_selector. The value of this parameter must be between 0 and 1, inclusive. 
             For example, if the population size is 100 and the survival percentage is .5, 50 individuals will be selected with NSGA2 from the existing population. These will be used for mutation and crossover to generate the next 100 individuals for the next generation. The remainder are discarded from the live population. In the next generation, there will now be the 50 parents + the 100 individuals for a total of 150. Surivival percentage is based of the population size parameter and not the existing population size. Therefore, in the next generation we will still select 50 individuals from the currently existing 150.
-        
         crossover_probability : float, default=.2
             Probability of generating a new individual by crossover between two individuals.
-        
         mutate_probability : float, default=.7
             Probability of generating a new individual by crossover between one individuals.
-        
         mutate_then_crossover_probability : float, default=.05
             Probability of generating a new individual by mutating two individuals followed by crossover.
-        
         crossover_then_mutate_probability : float, default=.05
             Probability of generating a new individual by crossover between two individuals followed by a mutation of the resulting individual.
-        
         n_parents : int, default=2
             Number of parents to use for crossover. Must be greater than 1.
-        
         survival_selector : function, default=survival_select_NSGA2
             Function to use to select individuals for survival. Must take a matrix of scores and return selected indexes.
             Used to selected population_size * survival_percentage individuals at the start of each generation to use for mutation and crossover.
-        
         parent_selector : function, default=parent_select_NSGA2
             Function to use to select pairs parents for crossover and individuals for mutation. Must take a matrix of scores and return selected indexes.
-        
         budget_range : list [start, end], default=None
             A starting and ending budget to use for the budget scaling.
-        
         budget_scaling float : [0,1], default=0.5
             A scaling factor to use when determining how fast we move the budget from the start to end budget.
-        
         generations_until_end_budget : int, default=1
             The number of generations to run before reaching the max budget.
-        
         stepwise_steps : int, default=1
             The number of staircase steps to take when scaling the budget and population size.
-        
         threshold_evaluation_early_stop : list [start, end], default=None
             starting and ending percentile to use as a threshold for the evaluation early stopping.
             Values between 0 and 100.
-        
         threshold_evaluation_scaling : float [0,inf), default=0.5
             A scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
             Must be greater than zero. Higher numbers will move the threshold to the end faster.
-        
         min_history_threshold : int, default=0
             The minimum number of previous scores needed before using threshold early stopping.
-        
         selection_evaluation_early_stop : list, default=None
             A lower and upper percent of the population size to select each round of CV.
             Values between 0 and 1.
-        
         selection_evaluation_scaling : float, default=0.5 
             A scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
             Must be greater than zero. Higher numbers will move the threshold to the end faster.
-        
         evaluation_early_stop_steps : int, default=1
             The number of steps that will be taken from the objective function. (e.g., the number of CV folds to evaluate)
-        
         final_score_strategy : str, default="mean" 
             The strategy to use when determining the final score for an individual.
             "mean": The mean of all objective scores
             "last": The score returned by the last call. Currently each objective is evaluated with a clone of the individual.
-        
         n_initial_optimizations : int, default=0
             Number of individuals to optimize before starting the evolution.
-        
         optimization_objective : function, default=None
             Function to optimize the individual with. If None, the first objective function will be used
-        
         max_optimize_time_seconds : float, default=60*5
             Maximum time to run an optimization
-        
         optimization_steps : int, default=10
             Number of steps per optimization
-        
         verbose : int, default=0
             How much information to print during the optimization process. Higher values include the information from lower values.
             0. nothing
@@ -228,11 +187,9 @@ def __init__(   self,
             3. best individual
             4. warnings
             >=5. full warnings trace
-        
         periodic_checkpoint_folder : str, default=None
             Folder to save the population to periodically. If None, no periodic saving will be done.
             If provided, training will resume from this checkpoint.
-        
         callback : tpot2.CallBackInterface, default=None
             Callback object. Not implemented
         """
diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py
index 7f9dd42b..f28d3f31 100644
--- a/tpot2/tpot_estimator/estimator.py
+++ b/tpot2/tpot_estimator/estimator.py
@@ -95,38 +95,27 @@ def __init__(self,  scorers,
         scorers : (list, scorer)
             A scorer or list of scorers to be used in the cross-validation process. 
             see https://scikit-learn.org/stable/modules/model_evaluation.html
-        
         scorers_weights : list
             A list of weights to be applied to the scorers during the optimization process.
-        
         classification : bool
             If True, the problem is treated as a classification problem. If False, the problem is treated as a regression problem.
             Used to determine the CV strategy.
-        
         cv : int, cross-validator
             - (int): Number of folds to use in the cross-validation process. By uses the sklearn.model_selection.KFold cross-validator for regression and StratifiedKFold for classification. In both cases, shuffled is set to True.
             - (sklearn.model_selection.BaseCrossValidator): A cross-validator to use in the cross-validation process.
                 - max_depth (int): The maximum depth from any node to the root of the pipelines to be generated.
-        
         other_objective_functions : list, default=[tpot2.estimator_objective_functions.average_path_length_objective]
             A list of other objective functions to apply to the pipeline.
-        
         other_objective_functions_weights : list, default=[-1]
             A list of weights to be applied to the other objective functions.
-        
         objective_function_names : list, default=None
             A list of names to be applied to the objective functions. If None, will use the names of the objective functions.
-        
         bigger_is_better : bool, default=True
             If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction.
-
-        
         max_size : int, default=np.inf
             The maximum number of nodes of the pipelines to be generated.
-        
         linear_pipeline : bool, default=False
             If True, the pipelines generated will be linear. If False, the pipelines generated will be directed acyclic graphs.
-        
         root_config_dict : dict, default='auto'
             The configuration dictionary to use for the root node of the model.
             If 'auto', will use "classifiers" if classification=True, else "regressors".
@@ -144,7 +133,6 @@ def __init__(self,  scorers,
             - 'genetic encoders' : Includes Genetic Encoder methods as used in AutoQTL.
             - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL.
             - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary.
-        
         inner_config_dict : dict, default=["selectors", "transformers"]
             The configuration dictionary to use for the inner nodes of the model generation.
             Default ["selectors", "transformers"]
@@ -163,7 +151,6 @@ def __init__(self,  scorers,
             - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL.
             - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary.
             - None : If None and max_depth>1, the root_config_dict will be used for the inner nodes as well.
-        
         leaf_config_dict : dict, default=None 
             The configuration dictionary to use for the leaf node of the model. If set, leaf nodes must be from this dictionary.
             Otherwise leaf nodes will be generated from the root_config_dict. 
@@ -183,19 +170,15 @@ def __init__(self,  scorers,
             - 'FeatureEncodingFrequencySelector': Includes FeatureEncodingFrequencySelector method as used in AutoQTL.
             - list : a list of strings out of the above options to include the corresponding methods in the configuration dictionary.
             - None : If None, a leaf will not be required (i.e. the pipeline can be a single root node). Leaf nodes will be generated from the inner_config_dict.
-        
         cross_val_predict_cv : int, default=0
             Number of folds to use for the cross_val_predict function for inner classifiers and regressors. Estimators will still be fit on the full dataset, but the following node will get the outputs from cross_val_predict.
-            
             - 0-1 : When set to 0 or 1, the cross_val_predict function will not be used. The next layer will get the outputs from fitting and transforming the full dataset.
             - >=2 : When fitting pipelines with inner classifiers or regressors, they will still be fit on the full dataset. 
                     However, the output to the next node will come from cross_val_predict with the specified number of folds.
-         
         categorical_features: list or None
             Categorical columns to inpute and/or one hot encode during the preprocessing step. Used only if preprocessing is not False.
             - None : If None, TPOT2 will automatically use object columns in pandas dataframes as objects for one hot encoding in preprocessing.
             - List of categorical features. If X is a dataframe, this should be a list of column names. If X is a numpy array, this should be a list of column indices
-
         subsets : str or list, default=None
             Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries.
             - str : If a string, it is assumed to be a path to a csv file with the subsets. 
@@ -203,8 +186,6 @@ def __init__(self,  scorers,
             - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets.
             - None : If None, each column will be treated as a subset. One column will be selected per subset.
             If subsets is None, each column will be treated as a subset. One column will be selected per subset.
-
-
         memory: Memory object or string, default=None
             If supplied, pipeline will cache each transformer after calling fit. This feature
             is used to avoid computing the fit transformers within a pipeline if the parameters
@@ -220,177 +201,131 @@ def __init__(self,  scorers,
                 and TPOT does NOT clean the caching directory up upon shutdown.
             - None:
                 TPOT does not use memory caching.
-
         preprocessing : bool or BaseEstimator/Pipeline, 
             EXPERIMENTAL
             A pipeline that will be used to preprocess the data before CV.
             - bool : If True, will use a default preprocessing pipeline.
             - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline.
-              
         validation_strategy : str, default='none'
             EXPERIMENTAL The validation strategy to use for selecting the final pipeline from the population. TPOT2 may overfit the cross validation score. A second validation set can be used to select the final pipeline.
             - 'auto' : Automatically determine the validation strategy based on the dataset shape.
             - 'reshuffled' : Use the same data for cross validation and final validation, but with different splits for the folds. This is the default for small datasets. 
             - 'split' : Use a separate validation set for final validation. Data will be split according to validation_fraction. This is the default for medium datasets. 
             - 'none' : Do not use a separate validation set for final validation. Select based on the original cross-validation score. This is the default for large datasets.
-
         validation_fraction : float, default=0.2
-          EXPERIMENTAL The fraction of the dataset to use for the validation set when validation_strategy is 'split'. Must be between 0 and 1.
-        
+            EXPERIMENTAL The fraction of the dataset to use for the validation set when validation_strategy is 'split'. Must be between 0 and 1.
         population_size : int, default=50
             Size of the population
-        
         initial_population_size : int, default=None
             Size of the initial population. If None, population_size will be used.
-        
         population_scaling : int, default=0.5
             Scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
-        
         generations_until_end_population : int, default=1  
             Number of generations until the population size reaches population_size            
-        
         generations : int, default=50
             Number of generations to run
-        
         early_stop : int, default=None
             Number of generations without improvement before early stopping. All objectives must have converged within the tolerance for this to be triggered.
-        
         scorers_early_stop_tol : 
             -list of floats
                 list of tolerances for each scorer. If the difference between the best score and the current score is less than the tolerance, the individual is considered to have converged
                 If an index of the list is None, that item will not be used for early stopping
             -int 
                 If an int is given, it will be used as the tolerance for all objectives
-        
         other_objectives_early_stop_tol : 
             -list of floats
                 list of tolerances for each of the other objective function. If the difference between the best score and the current score is less than the tolerance, the individual is considered to have converged
                 If an index of the list is None, that item will not be used for early stopping
             -int 
                 If an int is given, it will be used as the tolerance for all objectives
-    
         max_time_seconds : float, default=float("inf")
             Maximum time to run the optimization. If none or inf, will run until the end of the generations.
-        
         max_eval_time_seconds : float, default=60*5
             Maximum time to evaluate a single individual. If none or inf, there will be no time limit per evaluation.
-        
         n_jobs : int, default=1
             Number of processes to run in parallel.
-        
         memory_limit : str, default="4GB"
             Memory limit for each job. See Dask [LocalCluster documentation](https://distributed.dask.org/en/stable/api.html#distributed.Client) for more information.
-        
         client : dask.distributed.Client, default=None
             A dask client to use for parallelization. If not None, this will override the n_jobs and memory_limit parameters. If None, will create a new client with num_workers=n_jobs and memory_limit=memory_limit. 
-        
         survival_percentage : float, default=1
             Percentage of the population size to utilize for mutation and crossover at the beginning of the generation. The rest are discarded. Individuals are selected with the selector passed into survival_selector. The value of this parameter must be between 0 and 1, inclusive. 
             For example, if the population size is 100 and the survival percentage is .5, 50 individuals will be selected with NSGA2 from the existing population. These will be used for mutation and crossover to generate the next 100 individuals for the next generation. The remainder are discarded from the live population. In the next generation, there will now be the 50 parents + the 100 individuals for a total of 150. Surivival percentage is based of the population size parameter and not the existing population size. Therefore, in the next generation we will still select 50 individuals from the currently existing 150.
-        
         crossover_probability : float, default=.2
             Probability of generating a new individual by crossover between two individuals.
-        
         mutate_probability : float, default=.7
             Probability of generating a new individual by crossover between one individuals.
-        
         mutate_then_crossover_probability : float, default=.05
             Probability of generating a new individual by mutating two individuals followed by crossover.
-        
         crossover_then_mutate_probability : float, default=.05
             Probability of generating a new individual by crossover between two individuals followed by a mutation of the resulting individual.
-        
         n_parents : int, default=2
             Number of parents to use for crossover. Must be greater than 1.
-        
         survival_selector : function, default=survival_select_NSGA2
             Function to use to select individuals for survival. Must take a matrix of scores and return selected indexes.
             Used to selected population_size * survival_percentage individuals at the start of each generation to use for mutation and crossover.
-        
         parent_selector : function, default=parent_select_NSGA2
             Function to use to select pairs parents for crossover and individuals for mutation. Must take a matrix of scores and return selected indexes.
-        
         budget_range : list [start, end], default=None
             A starting and ending budget to use for the budget scaling.
-        
         budget_scaling float : [0,1], default=0.5
             A scaling factor to use when determining how fast we move the budget from the start to end budget.
-        
         generations_until_end_budget : int, default=1
             The number of generations to run before reaching the max budget.
-        
         stepwise_steps : int, default=1
             The number of staircase steps to take when scaling the budget and population size.
-        
         threshold_evaluation_early_stop : list [start, end], default=None
             starting and ending percentile to use as a threshold for the evaluation early stopping.
             Values between 0 and 100.
-        
         threshold_evaluation_scaling : float [0,inf), default=0.5
             A scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
             Must be greater than zero. Higher numbers will move the threshold to the end faster.
-        
         min_history_threshold : int, default=0
             The minimum number of previous scores needed before using threshold early stopping.
-        
         selection_evaluation_early_stop : list, default=None
             A lower and upper percent of the population size to select each round of CV.
             Values between 0 and 1.
-        
         selection_evaluation_scaling : float, default=0.5 
             A scaling factor to use when determining how fast we move the threshold moves from the start to end percentile.
             Must be greater than zero. Higher numbers will move the threshold to the end faster.
-        
         n_initial_optimizations : int, default=0
             Number of individuals to optimize before starting the evolution.
-        
         optimization_cv : int 
-           Number of folds to use for the optuna optimization's internal cross-validation.
-        
+            Number of folds to use for the optuna optimization's internal cross-validation.
         max_optimize_time_seconds : float, default=60*5
             Maximum time to run an optimization
-        
         optimization_steps : int, default=10
             Number of steps per optimization
-          
         warm_start : bool, default=False
             If True, will use the continue the evolutionary algorithm from the last generation of the previous run.
-         
         subset_column : str or int, default=None
             EXPERIMENTAL The column to use for the subset selection. Must also pass in unique_subset_values to GraphIndividual to function.
-         
         evolver : tpot2.evolutionary_algorithms.eaNSGA2.eaNSGA2_Evolver), default=eaNSGA2_Evolver
             The evolver to use for the optimization process. See tpot2.evolutionary_algorithms
             - type : an type or subclass of a BaseEvolver
             - "nsga2" : tpot2.evolutionary_algorithms.eaNSGA2.eaNSGA2_Evolver
-        
         verbose : int, default=1 
             How much information to print during the optimization process. Higher values include the information from lower values.
             0. nothing
             1. progress bar
-            
             3. best individual
             4. warnings
             >=5. full warnings trace
             6. evaluations progress bar. (Temporary: This used to be 2. Currently, using evaluation progress bar may prevent some instances were we terminate a generation early due to it reaching max_time_seconds in the middle of a generation OR a pipeline failed to be terminated normally and we need to manually terminate it.)
-        
         periodic_checkpoint_folder : str, default=None
             Folder to save the population to periodically. If None, no periodic saving will be done.
             If provided, training will resume from this checkpoint.
-        
         callback : tpot2.CallBackInterface, default=None
             Callback object. Not implemented
-
         processes : bool, default=True
             If True, will use multiprocessing to parallelize the optimization process. If False, will use threading.
             True seems to perform better. However, False is required for interactive debugging.
-            
+
+
         Attributes
         ----------
-
         fitted_pipeline_ : GraphPipeline
             A fitted instance of the GraphPipeline that inherits from sklearn BaseEstimator. This is fitted on the full X, y passed to fit.
-
         evaluated_individuals : A pandas data frame containing data for all evaluated individuals in the run. 
             Columns: 
             - *objective functions : The first few columns correspond to the passed in scorers and objective functions
@@ -405,7 +340,6 @@ def __init__(self,  scorers,
             - Instance	: The unfitted GraphPipeline BaseEstimator. 
             - *validation objective functions : Objective function scores evaluated on the validation set.
             - Validation_Pareto_Front : The full pareto front calculated on the validation set. This is calculated for all pipelines with Pareto_Front equal to 0. Unlike the Pareto_Front which only calculates the frontier and the final population, the Validation Pareto Front is calculated for all pipelines tested on the validation set.
-            
         pareto_front : The same pandas dataframe as evaluated individuals, but containing only the frontier pareto front pipelines.
         '''
 
diff --git a/tpot2/tpot_estimator/templates/tpottemplates.py b/tpot2/tpot_estimator/templates/tpottemplates.py
index 3ba2cd01..0bc017cb 100644
--- a/tpot2/tpot_estimator/templates/tpottemplates.py
+++ b/tpot2/tpot_estimator/templates/tpottemplates.py
@@ -68,6 +68,9 @@ def __init__(       self,
                         callback: tpot2.CallBackInterface=None,
                         processes = True,
         ):
+        """
+        See TPOTEstimator for documentation
+        """
         super(TPOTRegressor,self).__init__(
             scorers=scorers, 
             scorers_weights=scorers_weights,
@@ -196,6 +199,9 @@ def __init__(       self,
                         callback: tpot2.CallBackInterface=None,
                         processes = True,
         ):
+        """
+        See TPOTEstimator for documentation
+        """
         super(TPOTClassifier,self).__init__(
                         scorers=scorers, 
             scorers_weights=scorers_weights,