forked from dmitryikh/leaves
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlgensemble.go
96 lines (82 loc) · 2.65 KB
/
lgensemble.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package leaves
import (
"github.com/dmitryikh/leaves/util"
)
// lgEnsemble is LightGBM model (ensemble of trees)
type lgEnsemble struct {
Trees []lgTree
MaxFeatureIdx int
nRawOutputGroups int
// lgEnsemble suits for different models from different packages (ex., LightGBM gbrt & sklearn gbrt)
// name contains the origin of the model
name string
// averageOutput = true means that trees predictions should be averaged (like in random forest)
// NOTE: LightGBM original implementation always divides result by NEstimators() if average_output set.
// `leaves` implementation divides result by nEstimators (adjusted number of trees used for prediction)
averageOutput bool
}
func (e *lgEnsemble) NEstimators() int {
return len(e.Trees) / e.nRawOutputGroups
}
func (e *lgEnsemble) NRawOutputGroups() int {
return e.nRawOutputGroups
}
func (e *lgEnsemble) NFeatures() int {
if e.MaxFeatureIdx > 0 {
return e.MaxFeatureIdx + 1
}
return 0
}
func (e *lgEnsemble) NLeaves() []int {
nleaves := make([]int, e.NEstimators()*e.NRawOutputGroups())
for estimatorID := 0; estimatorID < e.NEstimators(); estimatorID++ {
for groupID := 0; groupID < e.NRawOutputGroups(); groupID++ {
nleaves[groupID*e.NEstimators()+estimatorID] = e.Trees[estimatorID*e.NRawOutputGroups()+groupID].nLeaves()
}
}
return nleaves
}
func (e *lgEnsemble) Name() string {
return e.name
}
func (e *lgEnsemble) predictInner(fvals []float64, nEstimators int, predictions []float64, startIndex int) {
for k := 0; k < e.nRawOutputGroups; k++ {
predictions[startIndex+k] = 0.0
}
coef := 1.0
if e.averageOutput {
coef = 1.0 / float64(nEstimators)
}
for i := 0; i < nEstimators; i++ {
for k := 0; k < e.nRawOutputGroups; k++ {
pred, _ := e.Trees[i*e.nRawOutputGroups+k].predict(fvals)
predictions[startIndex+k] += pred * coef
}
}
}
func (e *lgEnsemble) predictLeafIndicesInner(fvals []float64, nEstimators int, predictions []float64, startIndex int) {
nResults := e.nRawOutputGroups * nEstimators
for k := 0; k < nResults; k++ {
predictions[startIndex+k] = 0.0
}
for i := 0; i < nEstimators; i++ {
for k := 0; k < e.nRawOutputGroups; k++ {
_, idx := e.Trees[i*e.nRawOutputGroups+k].predict(fvals)
// note that we save leaf idx as float64 for type consistency over different types of results
predictions[startIndex+k*nEstimators+i] = float64(idx)
}
}
}
func (e *lgEnsemble) adjustNEstimators(nEstimators int) int {
if nEstimators > 0 {
nEstimators = util.MinInt(nEstimators, e.NEstimators())
} else {
nEstimators = e.NEstimators()
}
return nEstimators
}
func (e *lgEnsemble) resetFVals(fvals []float64) {
for j := 0; j < len(fvals); j++ {
fvals[j] = 0.0
}
}