forked from realpython/python-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path26_stock_scraper.py
32 lines (26 loc) · 924 Bytes
/
26_stock_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests
from lxml import html
from collections import defaultdict
def get_stocks(url):
# Make Request
page = requests.get(url)
# Parse/Scrape
tree = html.fromstring(page.text)
xpath = '//*[@id="mw-content-text"]/table[1]'
rows = tree.xpath(xpath)[0].findall("tr")
rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]]
rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows]
industries = defaultdict(list)
for row in rows:
industries[row[1]].append(row[0])
return industries
def output_data(data_dict):
for industry in data_dict:
print('\n'+industry)
print('-'*len(industry))
for ticker in data_dict[industry]:
print(ticker)
if __name__ == '__main__':
url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
scraped_data = get_stocks(url)
output_data(scraped_data)