-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_tool.py
34 lines (30 loc) · 1.47 KB
/
feature_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#featuretools工具使用
import featuretools as ft
import pandas as pd
#data包含三张表,即3个实体
data = ft.demo.load_mock_customer()#载入demo数据
customers_df = data["customers"]#一个客户可以有多个session
print(customers_df) #有customer_id
sessions_df=data['sessions']#一个session可以对应多个交易
print(sessions_df.sample(5))#有session_id,customer_id
transactions_df = data["transactions"]
print(transactions_df[:5])#有transactions_id,session_id
#为3个实体指定一个字典,id是必需的
entities = {
"customers" : (customers_df, "customer_id"),
"sessions" : (sessions_df, "session_id", 'session_start'),
"transactions" : (transactions_df, "transaction_id", 'transaction_time')
}
#指定实体间的关联方式
#当两个实体存在一对多的关系(即父子实体关系时)。根据关键字进行对应
relationships = [("sessions", "session_id", "transactions", "session_id"),
("customers", "customer_id", "sessions", "customer_id")]
#显示所有列
pd.set_option('display.max_columns', None)
# 显示所有行
pd.set_option('display.max_rows', None)
#运行深度特征合成。修改target_entity可以得到描述其它实体的特征值
feature_matrix_customers,features_defs=ft.dfs(entities=entities,trans_primitives=['add_numeric','day'],
agg_primitives=['sum', 'median'],max_depth=2,relationships=relationships,target_entity='customers')
print(feature_matrix_customers.columns)#得到描述客户的特征
print(feature_matrix_customers)