-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathArchivedSquadsFetcher.py
116 lines (93 loc) · 3.37 KB
/
ArchivedSquadsFetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import urllib.request as __urlreq
from bs4 import BeautifulSoup as __soup
import re
from pandas import DataFrame as __df
from openpyxl import Workbook as __wb
import openpyxl.utils.dataframe as __odf
from openpyxl import load_workbook
import openpyxl.styles as __oxl_styles
_url = __urlreq.urlopen("http://www.espncricinfo.com/series/8039/game/1144488/england-vs-pakistan-6th-match-icc-cricket-world-cup-2019")
_squads = ['England', 'Pakistan']
_excel_file = "E:/D11/Next Match Squads.xlsx"
_new_wb = True
_parser = 'html.parser'
_soup = __soup(_url, _parser)
_table_tag_list = _soup.find_all('table')
_player_name_list = list()
_player_role_list = list()
_player_id_list = list()
_table_tag = _table_tag_list[0]
_player_label = _table_tag.th.string
_tr_tag_list = _table_tag.find_all('tr')
_length_tr_tags = len(_tr_tag_list)
def parse_html_scrpe_data():
for i in range(_length_tr_tags):
if(i > 0):
tr_tag = _tr_tag_list[i]
td_tags = tr_tag.findAll('td')
td_tag_1 = td_tags[0]
a_tag = td_tag_1.a
link = a_tag['href']
td_tag_2 = td_tags[1]
player_name = td_tag_1.string.strip()
player_role = td_tag_2.string.strip()
player_id_list = re.findall('\d+', link)
player_id = player_id_list[0]
_player_name_list.append(player_name)
_player_role_list.append(player_role)
_player_id_list.append(player_id)
parse_html_scrpe_data()
_players_df = __df({'Player Name': _player_name_list, 'Player Role': _player_role_list, 'Player ID': _player_id_list})
_wb = __wb()
_ws = _wb.create_sheet(_squads[0])
def appending_rows():
_deployable_odf = __odf.dataframe_to_rows(_players_df, index=False, header=True)
for df_row in _deployable_odf:
_ws.append(df_row)
appending_rows()
def bolding_font():
for i in range(_ws.max_column+1):
if(i > 0):
cell = _ws.cell(row=1,column=i)
cell.font = __oxl_styles.Font(bold=True)
bolding_font()
def align_center():
for i in range(_ws.max_column+1):
if(i > 0):
for j in range( _ws.max_row+1):
if(j > 0):
cell = _ws.cell(row=j,column=i)
cell.alignment = __oxl_styles.Alignment(horizontal='center', vertical='center')
align_center()
def adjust_column_width():
for col in _ws.columns:
max_lenght = 0
col_name = re.findall('\w\d', str(col[0]))
col_name = col_name[0]
col_name = re.findall('\w', str(col_name))[0]
for cell in col:
try:
if len(str(cell.value)) > max_lenght:
max_lenght = len(cell.value)
except:
pass
adjusted_width = (max_lenght+2)
_ws.column_dimensions[col_name].width = adjusted_width
adjust_column_width()
_wb.save(_excel_file)
_player_name_list.clear()
_player_role_list.clear()
_player_id_list.clear()
_table_tag = _table_tag_list[1]
_player_label = _table_tag.th.string
_tr_tag_list = _table_tag.find_all('tr')
_length_tr_tags = len(_tr_tag_list)
parse_html_scrpe_data()
_players_df = __df({'Player Name': _player_name_list, 'Player Role': _player_role_list, 'Player ID': _player_id_list})
_wb = load_workbook(_excel_file)
_ws = _wb.create_sheet(title=_squads[1])
appending_rows()
bolding_font()
align_center()
adjust_column_width()
_wb.save(_excel_file)