forked from riscvarchive/riscv-gcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreorg.c
4005 lines (3372 loc) · 129 KB
/
reorg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Perform instruction reorganizations for delay slot filling.
Copyright (C) 1992-2020 Free Software Foundation, Inc.
Contributed by Richard Kenner ([email protected]).
Hacked by Michael Tiemann ([email protected]).
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* Instruction reorganization pass.
This pass runs after register allocation and final jump
optimization. It should be the last pass to run before peephole.
It serves primarily to fill delay slots of insns, typically branch
and call insns. Other insns typically involve more complicated
interactions of data dependencies and resource constraints, and
are better handled by scheduling before register allocation (by the
function `schedule_insns').
The Branch Penalty is the number of extra cycles that are needed to
execute a branch insn. On an ideal machine, branches take a single
cycle, and the Branch Penalty is 0. Several RISC machines approach
branch delays differently:
The MIPS has a single branch delay slot. Most insns
(except other branches) can be used to fill this slot. When the
slot is filled, two insns execute in two cycles, reducing the
branch penalty to zero.
The SPARC always has a branch delay slot, but its effects can be
annulled when the branch is not taken. This means that failing to
find other sources of insns, we can hoist an insn from the branch
target that would only be safe to execute knowing that the branch
is taken.
The HP-PA always has a branch delay slot. For unconditional branches
its effects can be annulled when the branch is taken. The effects
of the delay slot in a conditional branch can be nullified for forward
taken branches, or for untaken backward branches. This means
we can hoist insns from the fall-through path for forward branches or
steal insns from the target of backward branches.
The TMS320C3x and C4x have three branch delay slots. When the three
slots are filled, the branch penalty is zero. Most insns can fill the
delay slots except jump insns.
Three techniques for filling delay slots have been implemented so far:
(1) `fill_simple_delay_slots' is the simplest, most efficient way
to fill delay slots. This pass first looks for insns which come
from before the branch and which are safe to execute after the
branch. Then it searches after the insn requiring delay slots or,
in the case of a branch, for insns that are after the point at
which the branch merges into the fallthrough code, if such a point
exists. When such insns are found, the branch penalty decreases
and no code expansion takes place.
(2) `fill_eager_delay_slots' is more complicated: it is used for
scheduling conditional jumps, or for scheduling jumps which cannot
be filled using (1). A machine need not have annulled jumps to use
this strategy, but it helps (by keeping more options open).
`fill_eager_delay_slots' tries to guess the direction the branch
will go; if it guesses right 100% of the time, it can reduce the
branch penalty as much as `fill_simple_delay_slots' does. If it
guesses wrong 100% of the time, it might as well schedule nops. When
`fill_eager_delay_slots' takes insns from the fall-through path of
the jump, usually there is no code expansion; when it takes insns
from the branch target, there is code expansion if it is not the
only way to reach that target.
(3) `relax_delay_slots' uses a set of rules to simplify code that
has been reorganized by (1) and (2). It finds cases where
conditional test can be eliminated, jumps can be threaded, extra
insns can be eliminated, etc. It is the job of (1) and (2) to do a
good job of scheduling locally; `relax_delay_slots' takes care of
making the various individual schedules work well together. It is
especially tuned to handle the control flow interactions of branch
insns. It does nothing for insns with delay slots that do not
branch.
On machines that use CC0, we are very conservative. We will not make
a copy of an insn involving CC0 since we want to maintain a 1-1
correspondence between the insn that sets and uses CC0. The insns are
allowed to be separated by placing an insn that sets CC0 (but not an insn
that uses CC0; we could do this, but it doesn't seem worthwhile) in a
delay slot. In that case, we point each insn at the other with REG_CC_USER
and REG_CC_SETTER notes. Note that these restrictions affect very few
machines because most RISC machines with delay slots will not use CC0
(the RT is the only known exception at this point). */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "predict.h"
#include "memmodel.h"
#include "tm_p.h"
#include "expmed.h"
#include "insn-config.h"
#include "emit-rtl.h"
#include "recog.h"
#include "insn-attr.h"
#include "resource.h"
#include "tree-pass.h"
/* First, some functions that were used before GCC got a control flow graph.
These functions are now only used here in reorg.c, and have therefore
been moved here to avoid inadvertent misuse elsewhere in the compiler. */
/* Return the last label to mark the same position as LABEL. Return LABEL
itself if it is null or any return rtx. */
static rtx
skip_consecutive_labels (rtx label_or_return)
{
rtx_insn *insn;
if (label_or_return && ANY_RETURN_P (label_or_return))
return label_or_return;
rtx_insn *label = as_a <rtx_insn *> (label_or_return);
/* __builtin_unreachable can create a CODE_LABEL followed by a BARRIER.
Since reaching the CODE_LABEL is undefined behavior, we can return
any code label and we're OK at runtime.
However, if we return a CODE_LABEL which leads to a shrinked wrapped
epilogue, but the path does not have a prologue, then we will trip
a sanity check in the dwarf2 cfi code which wants to verify that
the CFIs are all the same on the traces leading to the epilogue.
So we explicitly disallow looking through BARRIERS here. */
for (insn = label;
insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
insn = NEXT_INSN (insn))
if (LABEL_P (insn))
label = insn;
return label;
}
/* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
and REG_CC_USER notes so we can find it. */
static void
link_cc0_insns (rtx_insn *insn)
{
rtx user = next_nonnote_insn (insn);
if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
user = XVECEXP (PATTERN (user), 0, 0);
add_reg_note (user, REG_CC_SETTER, insn);
add_reg_note (insn, REG_CC_USER, user);
}
/* Insns which have delay slots that have not yet been filled. */
static struct obstack unfilled_slots_obstack;
static rtx *unfilled_firstobj;
/* Define macros to refer to the first and last slot containing unfilled
insns. These are used because the list may move and its address
should be recomputed at each use. */
#define unfilled_slots_base \
((rtx_insn **) obstack_base (&unfilled_slots_obstack))
#define unfilled_slots_next \
((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
/* Points to the label before the end of the function, or before a
return insn. */
static rtx_code_label *function_return_label;
/* Likewise for a simple_return. */
static rtx_code_label *function_simple_return_label;
/* Mapping between INSN_UID's and position in the code since INSN_UID's do
not always monotonically increase. */
static int *uid_to_ruid;
/* Highest valid index in `uid_to_ruid'. */
static int max_uid;
static int stop_search_p (rtx_insn *, int);
static int resource_conflicts_p (struct resources *, struct resources *);
static int insn_references_resource_p (rtx, struct resources *, bool);
static int insn_sets_resource_p (rtx, struct resources *, bool);
static rtx_code_label *find_end_label (rtx);
static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
int);
static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
static rtx_insn *delete_from_delay_slot (rtx_insn *);
static void delete_scheduled_jump (rtx_insn *);
static void note_delay_statistics (int, int);
static int get_jump_flags (const rtx_insn *, rtx);
static int mostly_true_jump (rtx);
static rtx get_branch_condition (const rtx_insn *, rtx);
static int condition_dominates_p (rtx, const rtx_insn *);
static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
const vec<rtx_insn *> &);
static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
vec<rtx_insn *> *,
struct resources *,
struct resources *,
struct resources *,
int, int *, int *,
rtx *);
static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
vec<rtx_insn *> *,
struct resources *,
struct resources *,
struct resources *,
int, int *, int *);
static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
static int own_thread_p (rtx, rtx, int);
static void update_block (rtx_insn *, rtx_insn *);
static int reorg_redirect_jump (rtx_jump_insn *, rtx);
static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
static void fix_reg_dead_note (rtx_insn *, rtx);
static void update_reg_unused_notes (rtx_insn *, rtx);
static void fill_simple_delay_slots (int);
static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
int, int, int, int,
int *, vec<rtx_insn *> *);
static void fill_eager_delay_slots (void);
static void relax_delay_slots (rtx_insn *);
static void make_return_insns (rtx_insn *);
/* A wrapper around next_active_insn which takes care to return ret_rtx
unchanged. */
static rtx
first_active_target_insn (rtx insn)
{
if (ANY_RETURN_P (insn))
return insn;
return next_active_insn (as_a <rtx_insn *> (insn));
}
/* Return true iff INSN is a simplejump, or any kind of return insn. */
static bool
simplejump_or_return_p (rtx insn)
{
return (JUMP_P (insn)
&& (simplejump_p (as_a <rtx_insn *> (insn))
|| ANY_RETURN_P (PATTERN (insn))));
}
/* Return TRUE if this insn should stop the search for insn to fill delay
slots. LABELS_P indicates that labels should terminate the search.
In all cases, jumps terminate the search. */
static int
stop_search_p (rtx_insn *insn, int labels_p)
{
if (insn == 0)
return 1;
/* If the insn can throw an exception that is caught within the function,
it may effectively perform a jump from the viewpoint of the function.
Therefore act like for a jump. */
if (can_throw_internal (insn))
return 1;
switch (GET_CODE (insn))
{
case NOTE:
case CALL_INSN:
case DEBUG_INSN:
return 0;
case CODE_LABEL:
return labels_p;
case JUMP_INSN:
case BARRIER:
return 1;
case INSN:
/* OK unless it contains a delay slot or is an `asm' insn of some type.
We don't know anything about these. */
return (GET_CODE (PATTERN (insn)) == SEQUENCE
|| GET_CODE (PATTERN (insn)) == ASM_INPUT
|| asm_noperands (PATTERN (insn)) >= 0);
default:
gcc_unreachable ();
}
}
/* Return TRUE if any resources are marked in both RES1 and RES2 or if either
resource set contains a volatile memory reference. Otherwise, return FALSE. */
static int
resource_conflicts_p (struct resources *res1, struct resources *res2)
{
if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
|| res1->volatil || res2->volatil)
return 1;
return hard_reg_set_intersect_p (res1->regs, res2->regs);
}
/* Return TRUE if any resource marked in RES, a `struct resources', is
referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
routine is using those resources.
We compute this by computing all the resources referenced by INSN and
seeing if this conflicts with RES. It might be faster to directly check
ourselves, and this is the way it used to work, but it means duplicating
a large block of complex code. */
static int
insn_references_resource_p (rtx insn, struct resources *res,
bool include_delayed_effects)
{
struct resources insn_res;
CLEAR_RESOURCE (&insn_res);
mark_referenced_resources (insn, &insn_res, include_delayed_effects);
return resource_conflicts_p (&insn_res, res);
}
/* Return TRUE if INSN modifies resources that are marked in RES.
INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
included. CC0 is only modified if it is explicitly set; see comments
in front of mark_set_resources for details. */
static int
insn_sets_resource_p (rtx insn, struct resources *res,
bool include_delayed_effects)
{
struct resources insn_sets;
CLEAR_RESOURCE (&insn_sets);
mark_set_resources (insn, &insn_sets, 0,
(include_delayed_effects
? MARK_SRC_DEST_CALL
: MARK_SRC_DEST));
return resource_conflicts_p (&insn_sets, res);
}
/* Find a label at the end of the function or before a RETURN. If there
is none, try to make one. If that fails, returns 0.
The property of such a label is that it is placed just before the
epilogue or a bare RETURN insn, so that another bare RETURN can be
turned into a jump to the label unconditionally. In particular, the
label cannot be placed before a RETURN insn with a filled delay slot.
??? There may be a problem with the current implementation. Suppose
we start with a bare RETURN insn and call find_end_label. It may set
function_return_label just before the RETURN. Suppose the machinery
is able to fill the delay slot of the RETURN insn afterwards. Then
function_return_label is no longer valid according to the property
described above and find_end_label will still return it unmodified.
Note that this is probably mitigated by the following observation:
once function_return_label is made, it is very likely the target of
a jump, so filling the delay slot of the RETURN will be much more
difficult.
KIND is either simple_return_rtx or ret_rtx, indicating which type of
return we're looking for. */
static rtx_code_label *
find_end_label (rtx kind)
{
rtx_insn *insn;
rtx_code_label **plabel;
if (kind == ret_rtx)
plabel = &function_return_label;
else
{
gcc_assert (kind == simple_return_rtx);
plabel = &function_simple_return_label;
}
/* If we found one previously, return it. */
if (*plabel)
return *plabel;
/* Otherwise, see if there is a label at the end of the function. If there
is, it must be that RETURN insns aren't needed, so that is our return
label and we don't have to do anything else. */
insn = get_last_insn ();
while (NOTE_P (insn)
|| (NONJUMP_INSN_P (insn)
&& (GET_CODE (PATTERN (insn)) == USE
|| GET_CODE (PATTERN (insn)) == CLOBBER)))
insn = PREV_INSN (insn);
/* When a target threads its epilogue we might already have a
suitable return insn. If so put a label before it for the
function_return_label. */
if (BARRIER_P (insn)
&& JUMP_P (PREV_INSN (insn))
&& PATTERN (PREV_INSN (insn)) == kind)
{
rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
rtx_code_label *label = gen_label_rtx ();
LABEL_NUSES (label) = 0;
/* Put the label before any USE insns that may precede the RETURN
insn. */
while (GET_CODE (temp) == USE)
temp = PREV_INSN (temp);
emit_label_after (label, temp);
*plabel = label;
}
else if (LABEL_P (insn))
*plabel = as_a <rtx_code_label *> (insn);
else
{
rtx_code_label *label = gen_label_rtx ();
LABEL_NUSES (label) = 0;
/* If the basic block reorder pass moves the return insn to
some other place try to locate it again and put our
function_return_label there. */
while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
insn = PREV_INSN (insn);
if (insn)
{
insn = PREV_INSN (insn);
/* Put the label before any USE insns that may precede the
RETURN insn. */
while (GET_CODE (insn) == USE)
insn = PREV_INSN (insn);
emit_label_after (label, insn);
}
else
{
if (targetm.have_epilogue () && ! targetm.have_return ())
/* The RETURN insn has its delay slot filled so we cannot
emit the label just before it. Since we already have
an epilogue and cannot emit a new RETURN, we cannot
emit the label at all. */
return NULL;
/* Otherwise, make a new label and emit a RETURN and BARRIER,
if needed. */
emit_label (label);
if (targetm.have_return ())
{
/* The return we make may have delay slots too. */
rtx_insn *pat = targetm.gen_return ();
rtx_insn *insn = emit_jump_insn (pat);
set_return_jump_label (insn);
emit_barrier ();
if (num_delay_slots (insn) > 0)
obstack_ptr_grow (&unfilled_slots_obstack, insn);
}
}
*plabel = label;
}
/* Show one additional use for this label so it won't go away until
we are done. */
++LABEL_NUSES (*plabel);
return *plabel;
}
/* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
the pattern of INSN with the SEQUENCE.
Returns the insn containing the SEQUENCE that replaces INSN. */
static rtx_insn *
emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
{
/* Allocate the rtvec to hold the insns and the SEQUENCE. */
rtvec seqv = rtvec_alloc (length + 1);
rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
rtx_insn *seq_insn = make_insn_raw (seq);
/* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
not have a location, but one of the delayed insns does, we pick up a
location from there later. */
INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
/* Unlink INSN from the insn chain, so that we can put it into
the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
rtx_insn *after = PREV_INSN (insn);
remove_insn (insn);
SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
/* Build our SEQUENCE and rebuild the insn chain. */
start_sequence ();
XVECEXP (seq, 0, 0) = emit_insn (insn);
unsigned int delay_insns = list.length ();
gcc_assert (delay_insns == (unsigned int) length);
for (unsigned int i = 0; i < delay_insns; i++)
{
rtx_insn *tem = list[i];
rtx note, next;
/* Show that this copy of the insn isn't deleted. */
tem->set_undeleted ();
/* Unlink insn from its original place, and re-emit it into
the sequence. */
SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
XVECEXP (seq, 0, i + 1) = emit_insn (tem);
/* SPARC assembler, for instance, emit warning when debug info is output
into the delay slot. */
if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
INSN_LOCATION (tem) = 0;
for (note = REG_NOTES (tem); note; note = next)
{
next = XEXP (note, 1);
switch (REG_NOTE_KIND (note))
{
case REG_DEAD:
/* Remove any REG_DEAD notes because we can't rely on them now
that the insn has been moved. */
remove_note (tem, note);
break;
case REG_LABEL_OPERAND:
case REG_LABEL_TARGET:
/* Keep the label reference count up to date. */
if (LABEL_P (XEXP (note, 0)))
LABEL_NUSES (XEXP (note, 0)) ++;
break;
default:
break;
}
}
}
end_sequence ();
/* Splice our SEQUENCE into the insn stream where INSN used to be. */
add_insn_after (seq_insn, after, NULL);
return seq_insn;
}
/* Add INSN to DELAY_LIST and return the head of the new list. The list must
be in the order in which the insns are to be executed. */
static void
add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
{
/* If INSN has its block number recorded, clear it since we may
be moving the insn to a new block. */
clear_hashed_info_for_insn (insn);
delay_list->safe_push (insn);
}
/* Delete INSN from the delay slot of the insn that it is in, which may
produce an insn with no delay slots. Return the new insn. */
static rtx_insn *
delete_from_delay_slot (rtx_insn *insn)
{
rtx_insn *trial, *seq_insn, *prev;
rtx_sequence *seq;
int i;
int had_barrier = 0;
/* We first must find the insn containing the SEQUENCE with INSN in its
delay slot. Do this by finding an insn, TRIAL, where
PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
for (trial = insn;
PREV_INSN (NEXT_INSN (trial)) == trial;
trial = NEXT_INSN (trial))
;
seq_insn = PREV_INSN (NEXT_INSN (trial));
seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
had_barrier = 1;
/* Create a delay list consisting of all the insns other than the one
we are deleting (unless we were the only one). */
auto_vec<rtx_insn *, 5> delay_list;
if (seq->len () > 2)
for (i = 1; i < seq->len (); i++)
if (seq->insn (i) != insn)
add_to_delay_list (seq->insn (i), &delay_list);
/* Delete the old SEQUENCE, re-emit the insn that used to have the delay
list, and rebuild the delay list if non-empty. */
prev = PREV_INSN (seq_insn);
trial = seq->insn (0);
delete_related_insns (seq_insn);
add_insn_after (trial, prev, NULL);
/* If there was a barrier after the old SEQUENCE, remit it. */
if (had_barrier)
emit_barrier_after (trial);
/* If there are any delay insns, remit them. Otherwise clear the
annul flag. */
if (!delay_list.is_empty ())
trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
else if (JUMP_P (trial))
INSN_ANNULLED_BRANCH_P (trial) = 0;
INSN_FROM_TARGET_P (insn) = 0;
/* Show we need to fill this insn again. */
obstack_ptr_grow (&unfilled_slots_obstack, trial);
return trial;
}
/* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
the insn that sets CC0 for it and delete it too. */
static void
delete_scheduled_jump (rtx_insn *insn)
{
/* Delete the insn that sets cc0 for us. On machines without cc0, we could
delete the insn that sets the condition code, but it is hard to find it.
Since this case is rare anyway, don't bother trying; there would likely
be other insns that became dead anyway, which we wouldn't know to
delete. */
if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
{
rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
/* If a reg-note was found, it points to an insn to set CC0. This
insn is in the delay list of some other insn. So delete it from
the delay list it was in. */
if (note)
{
if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
&& sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
}
else
{
/* The insn setting CC0 is our previous insn, but it may be in
a delay slot. It will be the last insn in the delay slot, if
it is. */
rtx_insn *trial = previous_insn (insn);
if (NOTE_P (trial))
trial = prev_nonnote_insn (trial);
if (sets_cc0_p (PATTERN (trial)) != 1
|| FIND_REG_INC_NOTE (trial, NULL_RTX))
return;
if (PREV_INSN (NEXT_INSN (trial)) == trial)
delete_related_insns (trial);
else
delete_from_delay_slot (trial);
}
}
delete_related_insns (insn);
}
/* Counters for delay-slot filling. */
#define NUM_REORG_FUNCTIONS 2
#define MAX_DELAY_HISTOGRAM 3
#define MAX_REORG_PASSES 2
static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
static int reorg_pass_number;
static void
note_delay_statistics (int slots_filled, int index)
{
num_insns_needing_delays[index][reorg_pass_number]++;
if (slots_filled > MAX_DELAY_HISTOGRAM)
slots_filled = MAX_DELAY_HISTOGRAM;
num_filled_delays[index][slots_filled][reorg_pass_number]++;
}
/* Optimize the following cases:
1. When a conditional branch skips over only one instruction,
use an annulling branch and put that insn in the delay slot.
Use either a branch that annuls when the condition if true or
invert the test with a branch that annuls when the condition is
false. This saves insns, since otherwise we must copy an insn
from the L1 target.
(orig) (skip) (otherwise)
Bcc.n L1 Bcc',a L1 Bcc,a L1'
insn insn insn2
L1: L1: L1:
insn2 insn2 insn2
insn3 insn3 L1':
insn3
2. When a conditional branch skips over only one instruction,
and after that, it unconditionally branches somewhere else,
perform the similar optimization. This saves executing the
second branch in the case where the inverted condition is true.
Bcc.n L1 Bcc',a L2
insn insn
L1: L1:
Bra L2 Bra L2
INSN is a JUMP_INSN.
This should be expanded to skip over N insns, where N is the number
of delay slots required. */
static void
optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
{
rtx_insn *trial = next_nonnote_insn (insn);
rtx_insn *next_trial = next_active_insn (trial);
int flags;
flags = get_jump_flags (insn, JUMP_LABEL (insn));
if (trial == 0
|| !NONJUMP_INSN_P (trial)
|| GET_CODE (PATTERN (trial)) == SEQUENCE
|| recog_memoized (trial) < 0
|| (! eligible_for_annul_false (insn, 0, trial, flags)
&& ! eligible_for_annul_true (insn, 0, trial, flags))
|| RTX_FRAME_RELATED_P (trial)
|| can_throw_internal (trial))
return;
/* There are two cases where we are just executing one insn (we assume
here that a branch requires only one insn; this should be generalized
at some point): Where the branch goes around a single insn or where
we have one insn followed by a branch to the same label we branch to.
In both of these cases, inverting the jump and annulling the delay
slot give the same effect in fewer insns. */
if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
|| (next_trial != 0
&& simplejump_or_return_p (next_trial)
&& JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
{
if (eligible_for_annul_false (insn, 0, trial, flags))
{
if (invert_jump (insn, JUMP_LABEL (insn), 1))
INSN_FROM_TARGET_P (trial) = 1;
else if (! eligible_for_annul_true (insn, 0, trial, flags))
return;
}
add_to_delay_list (trial, delay_list);
next_trial = next_active_insn (trial);
update_block (trial, trial);
delete_related_insns (trial);
/* Also, if we are targeting an unconditional
branch, thread our jump to the target of that branch. Don't
change this into a RETURN here, because it may not accept what
we have in the delay slot. We'll fix this up later. */
if (next_trial && simplejump_or_return_p (next_trial))
{
rtx target_label = JUMP_LABEL (next_trial);
if (ANY_RETURN_P (target_label))
target_label = find_end_label (target_label);
if (target_label)
{
/* Recompute the flags based on TARGET_LABEL since threading
the jump to TARGET_LABEL may change the direction of the
jump (which may change the circumstances in which the
delay slot is nullified). */
flags = get_jump_flags (insn, target_label);
if (eligible_for_annul_true (insn, 0, trial, flags))
reorg_redirect_jump (insn, target_label);
}
}
INSN_ANNULLED_BRANCH_P (insn) = 1;
}
}
/* Encode and return branch direction and prediction information for
INSN assuming it will jump to LABEL.
Non conditional branches return no direction information and
are predicted as very likely taken. */
static int
get_jump_flags (const rtx_insn *insn, rtx label)
{
int flags;
/* get_jump_flags can be passed any insn with delay slots, these may
be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
direction information, and only if they are conditional jumps.
If LABEL is a return, then there is no way to determine the branch
direction. */
if (JUMP_P (insn)
&& (condjump_p (insn) || condjump_in_parallel_p (insn))
&& !ANY_RETURN_P (label)
&& INSN_UID (insn) <= max_uid
&& INSN_UID (label) <= max_uid)
flags
= (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
? ATTR_FLAG_forward : ATTR_FLAG_backward;
/* No valid direction information. */
else
flags = 0;
return flags;
}
/* Return truth value of the statement that this branch
is mostly taken. If we think that the branch is extremely likely
to be taken, we return 2. If the branch is slightly more likely to be
taken, return 1. If the branch is slightly less likely to be taken,
return 0 and if the branch is highly unlikely to be taken, return -1. */
static int
mostly_true_jump (rtx jump_insn)
{
/* If branch probabilities are available, then use that number since it
always gives a correct answer. */
rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
if (note)
{
int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
.to_reg_br_prob_base ();
if (prob >= REG_BR_PROB_BASE * 9 / 10)
return 2;
else if (prob >= REG_BR_PROB_BASE / 2)
return 1;
else if (prob >= REG_BR_PROB_BASE / 10)
return 0;
else
return -1;
}
/* If there is no note, assume branches are not taken.
This should be rare. */
return 0;
}
/* Return the condition under which INSN will branch to TARGET. If TARGET
is zero, return the condition under which INSN will return. If INSN is
an unconditional branch, return const_true_rtx. If INSN isn't a simple
type of jump, or it doesn't go to TARGET, return 0. */
static rtx
get_branch_condition (const rtx_insn *insn, rtx target)
{
rtx pat = PATTERN (insn);
rtx src;
if (condjump_in_parallel_p (insn))
pat = XVECEXP (pat, 0, 0);
if (ANY_RETURN_P (pat) && pat == target)
return const_true_rtx;
if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
return 0;
src = SET_SRC (pat);
if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
return const_true_rtx;
else if (GET_CODE (src) == IF_THEN_ELSE
&& XEXP (src, 2) == pc_rtx
&& ((GET_CODE (XEXP (src, 1)) == LABEL_REF
&& label_ref_label (XEXP (src, 1)) == target)
|| (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
return XEXP (src, 0);
else if (GET_CODE (src) == IF_THEN_ELSE
&& XEXP (src, 1) == pc_rtx
&& ((GET_CODE (XEXP (src, 2)) == LABEL_REF
&& label_ref_label (XEXP (src, 2)) == target)
|| (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
{
enum rtx_code rev;
rev = reversed_comparison_code (XEXP (src, 0), insn);
if (rev != UNKNOWN)
return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
XEXP (XEXP (src, 0), 0),
XEXP (XEXP (src, 0), 1));
}
return 0;
}
/* Return nonzero if CONDITION is more strict than the condition of
INSN, i.e., if INSN will always branch if CONDITION is true. */
static int
condition_dominates_p (rtx condition, const rtx_insn *insn)
{
rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
enum rtx_code code = GET_CODE (condition);
enum rtx_code other_code;
if (rtx_equal_p (condition, other_condition)
|| other_condition == const_true_rtx)
return 1;
else if (condition == const_true_rtx || other_condition == 0)
return 0;
other_code = GET_CODE (other_condition);
if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
|| ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
|| ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
return 0;
return comparison_dominates_p (code, other_code);
}
/* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
any insns already in the delay slot of JUMP. */
static int
redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
{
int flags, i;
rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
/* Make sure all the delay slots of this jump would still
be valid after threading the jump. If they are still
valid, then return nonzero. */
flags = get_jump_flags (jump, newlabel);
for (i = 1; i < pat->len (); i++)
if (! (
#if ANNUL_IFFALSE_SLOTS
(INSN_ANNULLED_BRANCH_P (jump)
&& INSN_FROM_TARGET_P (pat->insn (i)))
? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
#endif
#if ANNUL_IFTRUE_SLOTS
(INSN_ANNULLED_BRANCH_P (jump)
&& ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
#endif
eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
break;
return (i == pat->len ());
}
/* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
any insns we wish to place in the delay slot of JUMP. */
static int
redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
const vec<rtx_insn *> &delay_list)
{
/* Make sure all the insns in DELAY_LIST would still be
valid after threading the jump. If they are still
valid, then return nonzero. */
int flags = get_jump_flags (jump, newlabel);
unsigned int delay_insns = delay_list.length ();
unsigned int i = 0;
for (; i < delay_insns; i++)
if (! (
#if ANNUL_IFFALSE_SLOTS
(INSN_ANNULLED_BRANCH_P (jump)
&& INSN_FROM_TARGET_P (delay_list[i]))
? eligible_for_annul_false (jump, i, delay_list[i], flags) :