-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtrain.conf
123 lines (94 loc) · 3.66 KB
/
train.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# task type, supports train and predict
task = train
# boosting type, support gbdt for now, alias: boosting, boost
boosting_type = gbdt
# application type, support following application
# regression , regression task
# binary , binary classification task
# lambdarank , LambdaRank task
# constrained_cross_entropy , constrained optimization task (classification)
# alias: application, app
objective = constrained_cross_entropy # training FairGBM!
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
# auc
# binary_logloss , default metric for binary
# binary_error
metric = binary_logloss,auc
# frequency for metric output
metric_freq = 1
# true if need output metric for training data, alias: training_metric, train_metric
is_training_metric = true
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
max_bin = 255
# training data
# if existing weight file, should name to "binary.train.weight"
# alias: train_data, train
data = COMPAS.train
# validation data, support multi validation data, separated by ','
# if existing weight file, should name to "binary.test.weight"
# alias: valid, test, test_data,
valid_data = COMPAS.test
# first row has column names
has_header=True
# name of label column
label_column = name:two_year_recid
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
num_trees = 200
# shrinkage rate , alias: shrinkage_rate
learning_rate = 0.1
# number of leaves for one tree, alias: num_leaf
num_leaves = 63
# type of tree learner, support following types:
# serial , single machine version
# feature , use feature parallel to train
# data , use data parallel to train
# voting , use voting based parallel to train
# alias: tree
tree_learner = serial
# number of threads for multi-threading. One thread will use each CPU. The default is the CPU count.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
# alias: sub_feature
feature_fraction = 0.8
# Support bagging (data sub-sample), will perform bagging every 5 iterations
bagging_freq = 5
# Bagging fraction, will random select 80% data on bagging
# alias: sub_row
bagging_fraction = 0.8
# minimal number data for one leaf, use this to deal with over-fit
# alias : min_data_per_leaf, min_data
min_data_in_leaf = 50
# max depth of each individual base learner
max_depth = 10
# minimal sum Hessians for one leaf, use this to deal with over-fit
min_sum_hessian_in_leaf = 5.0
# save memory and faster speed for sparse feature, alias: is_sparse
is_enable_sparse = true
# when data is bigger than memory size, set this to true. otherwise set false will have faster speed
# alias: two_round_loading, two_round
use_two_round_loading = false
# true if need to save data to binary file and application will auto load data from binary file next time
# alias: is_save_binary, save_binary
is_save_binary_file = false
# output model file
output_model = FairGBM_model.txt
# output prediction file for predict task
# output_result= prediction.txt
# fixed random state for reproducibility
# (if multi-threaded may still result in variable results)
random_state = 42
# ** FairGBM-specific parameters **
constraint_stepwise_proxy=cross_entropy
constraint_group_column=name:race_Caucasian
constraint_type=fpr
constraint_fpr_tolerance=0
proxy_margin=1
score_threshold=0.5
lagrangian_learning_rate=200
# global_constraint_type=fpr,fnr # no global constraint on this example, as we're just aiming to maximize accuracy
# global_score_threshold=0.5
# global_target_fpr=0.05
# global_target_fnr=0.30