From cda631506a2abd71f936a39a618cc124ae716059 Mon Sep 17 00:00:00 2001
From: Kevin Markham <justmarkham@gmail.com>
Date: Sat, 3 Oct 2015 16:24:54 -0400
Subject: [PATCH] remove unnecessary class 5 file

---
 README.md                       |   4 +-
 code/05_pandas_visualization.py | 173 --------------------------------
 2 files changed, 2 insertions(+), 175 deletions(-)
 delete mode 100644 code/05_pandas_visualization.py

diff --git a/README.md b/README.md
index e374f95..bc0b157 100644
--- a/README.md
+++ b/README.md
@@ -151,7 +151,7 @@ Tuesday | Thursday
 ### Class 5: Visualization
 * Python homework with the Chipotle data due ([solution](code/03_python_homework_chipotle.py), [detailed explanation](notebooks/03_python_homework_chipotle_explained.ipynb))
 * Part 2 of Exploratory Data Analysis with Pandas ([code](code/04_pandas.py))
-* Visualization with Pandas and Matplotlib ([code](code/05_pandas_visualization.py), [notebook](notebooks/05_pandas_visualization.ipynb))
+* Visualization with Pandas and Matplotlib ([notebook](notebooks/05_pandas_visualization.ipynb))
 
 **Homework:**
 * Your project question write-up is due on Thursday.
@@ -177,7 +177,7 @@ Tuesday | Thursday
 -----
 
 ### Class 6: Machine Learning
-* Part 2 of Visualization with Pandas and Matplotlib ([code](code/05_pandas_visualization.py), [notebook](notebooks/05_pandas_visualization.ipynb))
+* Part 2 of Visualization with Pandas and Matplotlib ([notebook](notebooks/05_pandas_visualization.ipynb))
 * Brief introduction to the Jupyter/IPython Notebook
 * "Human learning" exercise:
     * [Iris dataset](http://archive.ics.uci.edu/ml/datasets/Iris) hosted by the UCI Machine Learning Repository
diff --git a/code/05_pandas_visualization.py b/code/05_pandas_visualization.py
deleted file mode 100644
index 7b765ac..0000000
--- a/code/05_pandas_visualization.py
+++ /dev/null
@@ -1,173 +0,0 @@
-'''
-CLASS: Visualization with Pandas (and Matplotlib)
-'''
-
-import pandas as pd
-import matplotlib.pyplot as plt
-
-# increase default figure and font sizes for easier viewing
-plt.rcParams['figure.figsize'] = (8, 6)
-plt.rcParams['font.size'] = 16
-
-# read in the drinks data
-drink_cols = ['country', 'beer', 'spirit', 'wine', 'liters', 'continent']
-drinks = pd.read_csv('drinks.csv', header=0, names=drink_cols, na_filter=False)
-
-'''
-Histogram: show the distribution of a numerical variable
-'''
-
-# sort the beer column and mentally split it into 3 groups
-drinks.beer.order().values
-
-# compare with histogram
-drinks.beer.plot(kind='hist', bins=3)
-
-# try more bins
-drinks.beer.plot(kind='hist', bins=20)
-
-# add title and labels
-drinks.beer.plot(kind='hist', bins=20, title='Histogram of Beer Servings')
-plt.xlabel('Beer Servings')
-plt.ylabel('Frequency')
-
-# compare with density plot (smooth version of a histogram)
-drinks.beer.plot(kind='density', xlim=(0, 500))
-
-'''
-Scatter Plot: show the relationship between two numerical variables
-'''
-
-# select the beer and wine columns and sort by beer
-drinks[['beer', 'wine']].sort('beer').values
-
-# compare with scatter plot
-drinks.plot(kind='scatter', x='beer', y='wine')
-
-# add transparency
-drinks.plot(kind='scatter', x='beer', y='wine', alpha=0.3)
-
-# vary point color by spirit servings
-drinks.plot(kind='scatter', x='beer', y='wine', c='spirit', colormap='Blues')
-
-# scatter matrix of three numerical columns
-pd.scatter_matrix(drinks[['beer', 'spirit', 'wine']])
-
-# increase figure size
-pd.scatter_matrix(drinks[['beer', 'spirit', 'wine']], figsize=(10, 8))
-
-'''
-Bar Plot: show a numerical comparison across different categories
-'''
-
-# count the number of countries in each continent
-drinks.continent.value_counts()
-
-# compare with bar plot
-drinks.continent.value_counts().plot(kind='bar')
-
-# calculate the mean alcohol amounts for each continent
-drinks.groupby('continent').mean()
-
-# side-by-side bar plots
-drinks.groupby('continent').mean().plot(kind='bar')
-drinks.groupby('continent').mean().drop('liters', axis=1).plot(kind='bar')
-
-# stacked bar plots
-drinks.groupby('continent').mean().drop('liters', axis=1).plot(kind='bar', stacked=True)
-
-'''
-Box Plot: show quartiles (and outliers) for one or more numerical variables
-
-Five-number summary:
-  min = minimum value
-  25% = first quartile (Q1) = median of the lower half of the data
-  50% = second quartile (Q2) = median of the data
-  75% = third quartile (Q3) = median of the upper half of the data
-  max = maximum value
-(More useful than mean and standard deviation for describing skewed distributions)
-
-Interquartile Range (IQR) = Q3 - Q1
-
-Outliers:
-  below Q1 - 1.5 * IQR
-  above Q3 + 1.5 * IQR
-'''
-
-# sort the spirit column
-drinks.spirit.order().values
-
-# show "five-number summary" for spirit
-drinks.spirit.describe()
-
-# compare with box plot
-drinks.spirit.plot(kind='box')
-
-# include multiple variables
-drinks.drop('liters', axis=1).plot(kind='box')
-
-'''
-Line Plot: show the trend of a numerical variable over time
-'''
-
-# read in the ufo data
-ufo = pd.read_csv('ufo.csv')
-ufo['Time'] = pd.to_datetime(ufo.Time)
-ufo['Year'] = ufo.Time.dt.year
-
-# count the number of ufo reports each year (and sort by year)
-ufo.Year.value_counts().sort_index()
-
-# compare with line plot
-ufo.Year.value_counts().sort_index().plot()
-
-# don't use a line plot when there is no logical ordering
-drinks.continent.value_counts().plot()
-
-'''
-Grouped Box Plots: show one box plot for each group
-'''
-
-# reminder: box plot of beer servings
-drinks.beer.plot(kind='box')
-
-# box plot of beer servings grouped by continent
-drinks.boxplot(column='beer', by='continent')
-
-# box plot of all numeric columns grouped by continent
-drinks.boxplot(by='continent')
-
-'''
-Grouped Histograms: show one histogram for each group
-'''
-
-# reminder: histogram of beer servings
-drinks.beer.plot(kind='hist')
-
-# histogram of beer servings grouped by continent
-drinks.hist(column='beer', by='continent')
-
-# share the x axes
-drinks.hist(column='beer', by='continent', sharex=True)
-
-# share the x and y axes
-drinks.hist(column='beer', by='continent', sharex=True, sharey=True)
-
-# change the layout
-drinks.hist(column='beer', by='continent', sharex=True, layout=(2, 3))
-
-'''
-Assorted Functionality
-'''
-
-# saving a plot to a file: run all four lines at once
-drinks.beer.plot(kind='hist', bins=20, title='Histogram of Beer Servings')
-plt.xlabel('Beer Servings')
-plt.ylabel('Frequency')
-plt.savefig('beer_histogram.png')
-
-# list available plot styles
-plt.style.available
-
-# change to a different style
-plt.style.use('ggplot')