-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_extraction.py
82 lines (58 loc) · 3.07 KB
/
data_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
# this function receive a Json with the answer of the chatbot return a dictionary with the list of players
def player_request(chatGPT_answer):
# read a csv file and plot the data
df = pd.read_csv('data/players_22.csv')
# to this prototype we will generalize the categories of the players positions
general_positions = {
'extremo izquierdo': ['LW'],
'Centrocampista izquierdo': ['LM'],
'centro delantero': ['CF'],
'centrocampista central': ['CM'],
'defensor derecho': ['RB'],
'extremo derecho': ['RW'],
'defensor central': ['CB'],
'delantero': ['ST'],
'arquero': ['GK'],
'defensor izquierdo': ['LB'],
'centrocampista derecho': ['CM'],
'lateral derecho': ['RWD'],
'lateral izquierdo': ['LWD'],
'centrocampista ofensivo central': ['CAM'],
'centrocampista defensivo central': ['CDM']
}
players_index_positions = []
# this is the list of player positions that the users want to find
find_player_positions = []
for position in chatGPT_answer['posiciones']:
find_player_positions.extend(general_positions[position])
# iterate pandas dataframe and filter rows with multiple conditions
for index, row in df.iterrows():
if any(item in row['player_positions'].split(', ') for item in find_player_positions):
# print(row['short_name'])
players_index_positions.append(index)
player_index_max_value = []
# this is the max amount in euros that the user want to spend in the player
max_value_eur = 10000000
# iterate pandas dataframe and filter rows with amount <= max_value_eur
for index, row in df.iterrows():
if row['value_eur'] <= max_value_eur:
# print(row['short_name'])
player_index_max_value.append(index)
player_index_max_value = []
# this is the max amount in euros that the user want to spend in the player
max_value_eur = chatGPT_answer['valor']
# iterate pandas dataframe and filter rows with amount <= max_value_eur
for index, row in df.iterrows():
if row['value_eur'] <= max_value_eur:
# print(row['short_name'])
player_index_max_value.append(index)
# leave the index values that are in both lists
players_index = list(set(players_index_positions) & set(player_index_max_value))
# columns to show
columns = ['sofifa_id', 'short_name', 'age', 'nationality_name', 'value_eur', 'wage_eur', 'player_positions', 'club_name', 'player_face_url', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic' ]
df_recomendations = df.loc[players_index, columns]
df_recomendations.loc[players_index, 'player_positions'] = df_recomendations.loc[players_index, 'player_positions'].str.split(', ')
# filter the dataframe with the index values and the columns with columns, without column index, and create a dictionary with the records
df_recomendations_dict = df_recomendations.loc[players_index, columns].to_dict(orient='records')
return df_recomendations_dict