forked from facebook/hhvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregion-hot-cfg.cpp
288 lines (248 loc) · 10.2 KB
/
region-hot-cfg.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| [email protected] so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include <memory>
#include <algorithm>
#include "hphp/util/trace.h"
#include "hphp/runtime/vm/jit/normalized-instruction.h"
#include "hphp/runtime/vm/jit/prof-data.h"
#include "hphp/runtime/vm/jit/region-prune-arcs.h"
#include "hphp/runtime/vm/jit/region-selection.h"
#include "hphp/runtime/vm/jit/switch-profile.h"
#include "hphp/runtime/vm/jit/target-profile.h"
#include "hphp/runtime/vm/jit/trans-cfg.h"
/*
* This module supports the implementation of two region selectors: hotcfg and
* wholecfg. In hotcfg mode, it constructs a region that is a maximal CFG
* given the constraints for what is currently supported within a region and
* the JitPGOMinBlockCountPercent and JitPGOMinArcProbability runtime options
* (which can be used to prune cold/unlikely code). In wholecfg mode, these
* two runtime options are ignored and nothing is pruned based on profile
* counters.
*/
namespace HPHP { namespace jit {
TRACE_SET_MOD(pgo);
namespace {
const StaticString s_switchProfile("SwitchProfile");
//////////////////////////////////////////////////////////////////////
struct DFS {
DFS(const ProfData* p, const TransCFG& c, int32_t maxBCInstrs, bool inlining)
: m_profData(p)
, m_cfg(c)
, m_numBCInstrs(maxBCInstrs)
, m_inlining(inlining)
{}
RegionDescPtr formRegion(TransID head) {
m_region = std::make_shared<RegionDesc>();
if (RuntimeOption::EvalJitPGORegionSelector == "wholecfg") {
m_minBlockWeight = 0;
m_minArcProb = 0;
} else {
auto const minBlkPerc = RuntimeOption::EvalJitPGOMinBlockCountPercent;
m_minBlockWeight = minBlkPerc * m_cfg.weight(head) / 100.0;
m_minArcProb = RuntimeOption::EvalJitPGOMinArcProbability;
}
ITRACE(3, "formRegion: starting at head = {} (weight = {})\n"
" minBlockWeight = {:.2}\n"
" minArcProb = {:.2}\n",
head, m_cfg.weight(head), m_minBlockWeight, m_minArcProb);
Trace::Indent indent;
visit(head);
for (auto& arc : m_arcs) {
m_region->addArc(arc.src, arc.dst);
}
return m_region;
}
private:
static constexpr int kMaxNonDefaultCases = 4;
static constexpr int kMinSwitchPercent = 75;
/*
* Look up profiling data for the Switch at the end of tid and decide which
* outgoing arcs, if any, to include in the region. Arcs that survive this
* function may still be trimmed by the other checks in visit().
*/
void trimSwitchArcs(TransID tid,
const RegionDesc& profRegion,
std::vector<TransCFG::Arc*>& arcs) {
ITRACE(5, "Analyzing Switch ending profTrans {}\n", tid);
Trace::Indent indent;
auto sk = profRegion.blocks().back()->last();
assert(sk.op() == OpSwitch);
TargetProfile<SwitchProfile> profile(tid,
TransKind::Optimize,
sk.offset(),
s_switchProfile.get());
assert(!profile.profiling());
if (!profile.optimizing()) {
// We don't have profile data for this Switch, most likely because it saw
// some weird input type during profiling.
arcs.clear();
return;
}
NormalizedInstruction ni{sk, sk.unit()};
std::vector<Offset> offsets;
for (auto off : ni.immVec.range32()) offsets.push_back(sk.offset() + off);
auto const data = sortedSwitchProfile(profile, offsets.size());
uint32_t totalHits = 0;
for (auto const& item : data) totalHits += item.count;
if (totalHits == 0) {
// This switch was never executed during profiling.
arcs.clear();
return;
}
// Allow arcs if the hottest kMaxNonDefaultCases account for at least
// kMinSwitchPercent % of total profiling hits.
uint32_t includedCases = 0;
uint32_t includedHits = 0;
std::unordered_set<SrcKey, SrcKey::Hasher> allowedSks;
for (auto const& item : data) {
// We always have bounds checks for the default, so it doesn't count
// against the case limit.
if (item.caseIdx == data.size() - 1) {
ITRACE(5, "Adding {} hits from default case @ {}\n",
item.count, offsets[item.caseIdx]);
includedHits += item.count;
allowedSks.insert(SrcKey{sk, offsets[item.caseIdx]});
continue;
}
if (includedCases == kMaxNonDefaultCases) {
if (includedHits * 100 / totalHits < kMinSwitchPercent) {
ITRACE(5, "Profile data not biased towards hot cases; bailing\n");
arcs.clear();
return;
}
break;
}
ITRACE(5, "Adding {} hits from case {} @ {}\n",
item.count, item.caseIdx, offsets[item.caseIdx]);
++includedCases;
includedHits += item.count;
allowedSks.insert(SrcKey{sk, offsets[item.caseIdx]});
}
ITRACE(5, "Including {} cases, representing {} / {} samples\n",
includedCases, includedHits, totalHits);
auto firstDead = std::remove_if(
begin(arcs), end(arcs), [&](const TransCFG::Arc* arc) {
auto const rec = m_profData->transRec(arc->dst());
const bool ok = allowedSks.count(rec->srcKey());
ITRACE(5, "Arc {} -> {} {}included\n",
arc->src(), arc->dst(), ok ? "" : "not ");
return !ok;
}
);
arcs.erase(firstDead, end(arcs));
}
void visit(TransID tid) {
auto rec = m_profData->transRec(tid);
auto tidRegion = rec->region();
auto tidInstrs = tidRegion->instrSize();
if (tidInstrs > m_numBCInstrs) {
ITRACE(5, "- visit: skipping {} due to region size\n", tid);
return;
}
// Skip tid if its weight is below the JitPGOMinBlockPercent
// percentage of the weight of the block where this region
// started.
auto tidWeight = m_cfg.weight(tid);
if (tidWeight < m_minBlockWeight) {
ITRACE(5, "- visit: skipping {} due to low weight ({})\n",
tid, tidWeight);
return;
}
if (!m_visited.insert(tid).second) return;
m_visiting.insert(tid);
m_numBCInstrs -= tidInstrs;
ITRACE(5, "- visit: adding {} ({})\n", tid, tidWeight);
auto const termSk = rec->lastSrcKey();
auto const termOp = termSk.op();
if (!breaksRegion(termSk)) {
auto srcBlockId = tidRegion->blocks().back().get()->id();
auto arcs = m_cfg.outArcs(tid);
// We have special profiling and logic to decide which arcs from a Switch
// are eligible for inclusion in the region.
if (termOp == OpSwitch) {
trimSwitchArcs(srcBlockId, *tidRegion, arcs);
}
for (auto const arc : arcs) {
auto dst = arc->dst();
// Skip if the probability of taking this arc is below the specified
// threshold.
if (arc->weight() < m_minArcProb * tidWeight) {
ITRACE(5, "- visit: skipping arc {} -> {} due to low probability "
"({:.2})\n", tid, dst, arc->weight() / (tidWeight + 0.001));
continue;
}
// Skip dst if we already generated a region starting at that SrcKey.
auto dstRec = m_profData->transRec(dst);
auto dstSK = dstRec->srcKey();
if (!m_inlining && m_profData->optimized(dstSK)) {
ITRACE(5, "- visit: skipping {} because SrcKey was already "
"optimize", showShort(dstSK));
continue;
}
always_assert(dst == dstRec->region()->entry()->id());
visit(dst);
// Record the arc if dstBlockId was included in the region. (Note that
// it may not be included in the region due to the
// EvalJitMaxRegionInstrs limit.)
if (m_visited.count(dst)) {
m_arcs.push_back({srcBlockId, dst});
}
}
}
// Now insert the region for tid in the front of m_region. We do
// this last so that the region ends up in (quasi-)topological order
// (it'll be in topological order for acyclic regions).
m_region->prepend(*tidRegion);
always_assert(m_numBCInstrs >= 0);
m_visiting.erase(tid);
}
private:
const ProfData* m_profData;
const TransCFG& m_cfg;
RegionDescPtr m_region;
int32_t m_numBCInstrs;
jit::hash_set<TransID> m_visiting;
jit::hash_set<TransID> m_visited;
jit::vector<RegionDesc::Arc> m_arcs;
double m_minBlockWeight;
double m_minArcProb;
bool m_inlining;
};
//////////////////////////////////////////////////////////////////////
}
RegionDescPtr selectHotCFG(HotTransContext& ctx) {
ITRACE(1, "selectHotCFG: starting with maxBCInstrs = {}\n", ctx.maxBCInstrs);
auto const region =
DFS(ctx.profData, *ctx.cfg, ctx.maxBCInstrs, ctx.inlining)
.formRegion(ctx.tid);
if (region->empty()) return nullptr;
ITRACE(3, "selectHotCFG: before region_prune_arcs:\n{}\n",
show(*region));
region_prune_arcs(*region, ctx.inputTypes);
ITRACE(3, "selectHotCFG: before chainRetransBlocks:\n{}\n",
show(*region));
region->chainRetransBlocks();
// Relax the region guards.
if (RuntimeOption::EvalRegionRelaxGuards) {
ITRACE(3, "selectHotCFG: before optimizeProfiledGuards:\n{}\n",
show(*region));
optimizeProfiledGuards(*region, *ctx.profData);
}
ITRACE(1, "selectHotCFG: final version after optimizeProfiledGuards:\n{}\n",
show(*region));
return region;
}
}}