-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSTATHomework.py
168 lines (150 loc) · 4.4 KB
/
STATHomework.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: utf-8 -*-
"""
Created on Sat May 5 12:14:47 2018
@author: fengh
"""
# Q1
import pandas_datareader.data as web
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
# Remotely getting data from companies
morningstar = web.DataReader('F', 'morningstar')
print(morningstar.head(5))
robinhood = web.DataReader('F', 'robinhood')
print(robinhood.head(5))
quandl = web.DataReader('F', 'quandl')
print(quandl.head(5))
ntsdstooq = web.DataReader('^DJI', 'stooq')
print(ntsdstooq.head(5))
# Q2
# create dataframes sepratly for each company
# due to the difference of each dataframes from different compines
# I decide to only use first 200 rows of the data
# Morningstar
mor = morningstar[['Close', 'Open','Volume','High','Low']].head(200)
# Robinhood
# column names are different from others note that
rob = robinhood[['close_price', 'open_price','volume','high_price','low_price']].head(200)
rob.columns = ['Close', 'Open','Volume','High','Low']
# Quandl
qua = quandl[['Close', 'Open','Volume','High','Low']].head(200)
# Nasdaq
ntsd = ntsdstooq[['Close', 'Open','Volume','High','Low']].head(200)
# part a
# creating Close dataframe
morClose = list(mor.Close)
robClose = list(rob.Close)
quaClose = list(qua.Close)
ntsdClose = list(ntsd.Close)
dicClose = {'Morningstar': morClose,
'Robinhood': robClose,
'Quandl': quaClose,
'Nasdaq': ntsdClose}
CloseData = pd.DataFrame(dicClose)
CloseData.head(5)
# part b
# creating Open dataframe
morOpen = list(mor.Open)
robOpen = list(rob.Open)
quaOpen = list(qua.Open)
ntsdOpen = list(ntsd.Open)
dicOpen = {'Morningstar': morOpen,
'Robinhood': robOpen,
'Quandl': quaOpen,
'Nasdaq': ntsdOpen}
OpenData = pd.DataFrame(dicOpen)
OpenData.head(5)
# part c
# creating Volume dataframe
morVolume = list(mor.Volume)
robVolume = list(rob.Volume)
quaVolume = list(qua.Volume)
ntsdVolume = list(ntsd.Volume)
dicVolume = {'Morningstar': morVolume,
'Robinhood': robVolume,
'Quandl': quaVolume,
'Nasdaq': ntsdVolume}
VolumeData = pd.DataFrame(dicVolume)
VolumeData.head(5)
# part d
# creating High dataframe
morHigh = list(mor.High)
robHigh = list(rob.High)
quaHigh = list(qua.High)
ntsdHigh = list(ntsd.High)
dicHigh = {'Morningstar': morHigh,
'Robinhood': robHigh,
'Quandl': quaHigh,
'Nasdaq': ntsdHigh}
HighData = pd.DataFrame(dicHigh)
HighData.head(5)
# part e
# creating Low dataframe
morLow = list(mor.Low)
robLow = list(rob.Low)
quaLow = list(qua.Low)
ntsdLow = list(ntsd.Low)
dicLow = {'Morningstar': morLow,
'Robinhood': robLow,
'Quandl': quaLow,
'Nasdaq': ntsdLow}
LowData = pd.DataFrame(dicLow)
LowData.head(5)
# Q3
# the data I am using below is based on those 200 rows
# if I understand it correct, plot will be Time vs Open
# the data I use here is from Nasdaq
plt.figure()
plt.plot(ntsd.index, ntsd.Open)
plt.xticks(rotation=90)
plt.title("Nasdaq Time Series Plot for Open")
plt.xlabel("Time")
plt.ylabel("Open")
plt.show()
# Q4
# this is based on the 200 rows of data for all companies
VolumeData.corr()
# Q5
# this is based on the 200 rows of data for all companies
# using Morningstar and Quandl close data
CloseData[['Morningstar', 'Quandl']].cov()
# Q6
# for some reason
# Robinhood's data not all numerical
# need to do some pre-process first
OpenData['Robinhood'] = OpenData['Robinhood'].astype('float')
# double check data types make sure it will work
OpenData.dtypes
pd.plotting.scatter_matrix(OpenData, alpha=0.2)
# Q7
# before process, need to change Robinhood'd data type
# this is based on those 200 rows of data from each company
rob['High'] = rob['High'].astype('float')
plt.subplot(221)
plt.hist(mor.High)
plt.xlabel("Morningstar")
plt.grid(True)
plt.subplot(222)
plt.hist(rob.High)
plt.xlabel("Robinhood")
plt.grid(True)
plt.subplot(223)
plt.hist(qua.High)
plt.xlabel("Quandl")
plt.grid(True)
plt.subplot(224)
plt.hist(ntsd.High)
plt.xlabel("Nasdaq")
plt.grid(True)
plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.5,
wspace=0.35)
plt.show()
# Q8
# extract Morningstar's close data
morClose = CloseData.Morningstar
# if I understand correct
# The daily return measures the dollar change in a stock's price
# as a percentage of the previous day's closing price.
# base on this definition simple code will be implemented as follows
morClose[:-1].values / morClose[1:] - 1