forked from riscvarchive/riscv-gcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal.c
5035 lines (4255 loc) · 135 KB
/
final.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Convert RTL to assembler code and output it, for GNU compiler.
Copyright (C) 1987-2020 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* This is the final pass of the compiler.
It looks at the rtl code for a function and outputs assembler code.
Call `final_start_function' to output the assembler code for function entry,
`final' to output assembler code for some RTL code,
`final_end_function' to output assembler code for function exit.
If a function is compiled in several pieces, each piece is
output separately with `final'.
Some optimizations are also done at this level.
Move instructions that were made unnecessary by good register allocation
are detected and omitted from the output. (Though most of these
are removed by the last jump pass.)
Instructions to set the condition codes are omitted when it can be
seen that the condition codes already had the desired values.
In some cases it is sufficient if the inherited condition codes
have related values, but this may require the following insn
(the one that tests the condition codes) to be modified.
The code for the function prologue and epilogue are generated
directly in assembler by the target functions function_prologue and
function_epilogue. Those instructions never exist as rtl. */
#include "config.h"
#define INCLUDE_ALGORITHM /* reverse */
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "cfghooks.h"
#include "df.h"
#include "memmodel.h"
#include "tm_p.h"
#include "insn-config.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "tree-pretty-print.h" /* for dump_function_header */
#include "varasm.h"
#include "insn-attr.h"
#include "conditions.h"
#include "flags.h"
#include "output.h"
#include "except.h"
#include "rtl-error.h"
#include "toplev.h" /* exact_log2, floor_log2 */
#include "reload.h"
#include "intl.h"
#include "cfgrtl.h"
#include "debug.h"
#include "tree-pass.h"
#include "tree-ssa.h"
#include "cfgloop.h"
#include "stringpool.h"
#include "attribs.h"
#include "asan.h"
#include "rtl-iter.h"
#include "print-rtl.h"
#include "function-abi.h"
#ifdef XCOFF_DEBUGGING_INFO
#include "xcoffout.h" /* Needed for external data declarations. */
#endif
#include "dwarf2out.h"
#ifdef DBX_DEBUGGING_INFO
#include "dbxout.h"
#endif
/* Most ports that aren't using cc0 don't need to define CC_STATUS_INIT.
So define a null default for it to save conditionalization later. */
#ifndef CC_STATUS_INIT
#define CC_STATUS_INIT
#endif
/* Is the given character a logical line separator for the assembler? */
#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
#endif
#ifndef JUMP_TABLES_IN_TEXT_SECTION
#define JUMP_TABLES_IN_TEXT_SECTION 0
#endif
/* Bitflags used by final_scan_insn. */
#define SEEN_NOTE 1
#define SEEN_EMITTED 2
#define SEEN_NEXT_VIEW 4
/* Last insn processed by final_scan_insn. */
static rtx_insn *debug_insn;
rtx_insn *current_output_insn;
/* Line number of last NOTE. */
static int last_linenum;
/* Column number of last NOTE. */
static int last_columnnum;
/* Discriminator written to assembly. */
static int last_discriminator;
/* Discriminator to be written to assembly for current instruction.
Note: actual usage depends on loc_discriminator_kind setting. */
static int discriminator;
static inline int compute_discriminator (location_t loc);
/* Discriminator identifying current basic block among others sharing
the same locus. */
static int bb_discriminator;
/* Basic block discriminator for previous instruction. */
static int last_bb_discriminator;
/* Highest line number in current block. */
static int high_block_linenum;
/* Likewise for function. */
static int high_function_linenum;
/* Filename of last NOTE. */
static const char *last_filename;
/* Override filename, line and column number. */
static const char *override_filename;
static int override_linenum;
static int override_columnnum;
static int override_discriminator;
/* Whether to force emission of a line note before the next insn. */
static bool force_source_line = false;
extern const int length_unit_log; /* This is defined in insn-attrtab.c. */
/* Nonzero while outputting an `asm' with operands.
This means that inconsistencies are the user's fault, so don't die.
The precise value is the insn being output, to pass to error_for_asm. */
const rtx_insn *this_is_asm_operands;
/* Number of operands of this insn, for an `asm' with operands. */
static unsigned int insn_noperands;
/* Compare optimization flag. */
static rtx last_ignored_compare = 0;
/* Assign a unique number to each insn that is output.
This can be used to generate unique local labels. */
static int insn_counter = 0;
/* This variable contains machine-dependent flags (defined in tm.h)
set and examined by output routines
that describe how to interpret the condition codes properly. */
CC_STATUS cc_status;
/* During output of an insn, this contains a copy of cc_status
from before the insn. */
CC_STATUS cc_prev_status;
/* Number of unmatched NOTE_INSN_BLOCK_BEG notes we have seen. */
static int block_depth;
/* Nonzero if have enabled APP processing of our assembler output. */
static int app_on;
/* If we are outputting an insn sequence, this contains the sequence rtx.
Zero otherwise. */
rtx_sequence *final_sequence;
#ifdef ASSEMBLER_DIALECT
/* Number of the assembler dialect to use, starting at 0. */
static int dialect_number;
#endif
/* Nonnull if the insn currently being emitted was a COND_EXEC pattern. */
rtx current_insn_predicate;
/* True if printing into -fdump-final-insns= dump. */
bool final_insns_dump_p;
/* True if profile_function should be called, but hasn't been called yet. */
static bool need_profile_function;
static int asm_insn_count (rtx);
static void profile_function (FILE *);
static void profile_after_prologue (FILE *);
static bool notice_source_line (rtx_insn *, bool *);
static rtx walk_alter_subreg (rtx *, bool *);
static void output_asm_name (void);
static void output_alternate_entry_point (FILE *, rtx_insn *);
static tree get_mem_expr_from_op (rtx, int *);
static void output_asm_operand_names (rtx *, int *, int);
#ifdef LEAF_REGISTERS
static void leaf_renumber_regs (rtx_insn *);
#endif
#if HAVE_cc0
static int alter_cond (rtx);
#endif
static int align_fuzz (rtx, rtx, int, unsigned);
static void collect_fn_hard_reg_usage (void);
/* Initialize data in final at the beginning of a compilation. */
void
init_final (const char *filename ATTRIBUTE_UNUSED)
{
app_on = 0;
final_sequence = 0;
#ifdef ASSEMBLER_DIALECT
dialect_number = ASSEMBLER_DIALECT;
#endif
}
/* Default target function prologue and epilogue assembler output.
If not overridden for epilogue code, then the function body itself
contains return instructions wherever needed. */
void
default_function_pro_epilogue (FILE *)
{
}
void
default_function_switched_text_sections (FILE *file ATTRIBUTE_UNUSED,
tree decl ATTRIBUTE_UNUSED,
bool new_is_cold ATTRIBUTE_UNUSED)
{
}
/* Default target hook that outputs nothing to a stream. */
void
no_asm_to_stream (FILE *file ATTRIBUTE_UNUSED)
{
}
/* Enable APP processing of subsequent output.
Used before the output from an `asm' statement. */
void
app_enable (void)
{
if (! app_on)
{
fputs (ASM_APP_ON, asm_out_file);
app_on = 1;
}
}
/* Disable APP processing of subsequent output.
Called from varasm.c before most kinds of output. */
void
app_disable (void)
{
if (app_on)
{
fputs (ASM_APP_OFF, asm_out_file);
app_on = 0;
}
}
/* Return the number of slots filled in the current
delayed branch sequence (we don't count the insn needing the
delay slot). Zero if not in a delayed branch sequence. */
int
dbr_sequence_length (void)
{
if (final_sequence != 0)
return XVECLEN (final_sequence, 0) - 1;
else
return 0;
}
/* The next two pages contain routines used to compute the length of an insn
and to shorten branches. */
/* Arrays for insn lengths, and addresses. The latter is referenced by
`insn_current_length'. */
static int *insn_lengths;
vec<int> insn_addresses_;
/* Max uid for which the above arrays are valid. */
static int insn_lengths_max_uid;
/* Address of insn being processed. Used by `insn_current_length'. */
int insn_current_address;
/* Address of insn being processed in previous iteration. */
int insn_last_address;
/* known invariant alignment of insn being processed. */
int insn_current_align;
/* After shorten_branches, for any insn, uid_align[INSN_UID (insn)]
gives the next following alignment insn that increases the known
alignment, or NULL_RTX if there is no such insn.
For any alignment obtained this way, we can again index uid_align with
its uid to obtain the next following align that in turn increases the
alignment, till we reach NULL_RTX; the sequence obtained this way
for each insn we'll call the alignment chain of this insn in the following
comments. */
static rtx *uid_align;
static int *uid_shuid;
static vec<align_flags> label_align;
/* Indicate that branch shortening hasn't yet been done. */
void
init_insn_lengths (void)
{
if (uid_shuid)
{
free (uid_shuid);
uid_shuid = 0;
}
if (insn_lengths)
{
free (insn_lengths);
insn_lengths = 0;
insn_lengths_max_uid = 0;
}
if (HAVE_ATTR_length)
INSN_ADDRESSES_FREE ();
if (uid_align)
{
free (uid_align);
uid_align = 0;
}
}
/* Obtain the current length of an insn. If branch shortening has been done,
get its actual length. Otherwise, use FALLBACK_FN to calculate the
length. */
static int
get_attr_length_1 (rtx_insn *insn, int (*fallback_fn) (rtx_insn *))
{
rtx body;
int i;
int length = 0;
if (!HAVE_ATTR_length)
return 0;
if (insn_lengths_max_uid > INSN_UID (insn))
return insn_lengths[INSN_UID (insn)];
else
switch (GET_CODE (insn))
{
case NOTE:
case BARRIER:
case CODE_LABEL:
case DEBUG_INSN:
return 0;
case CALL_INSN:
case JUMP_INSN:
length = fallback_fn (insn);
break;
case INSN:
body = PATTERN (insn);
if (GET_CODE (body) == USE || GET_CODE (body) == CLOBBER)
return 0;
else if (GET_CODE (body) == ASM_INPUT || asm_noperands (body) >= 0)
length = asm_insn_count (body) * fallback_fn (insn);
else if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (body))
for (i = 0; i < seq->len (); i++)
length += get_attr_length_1 (seq->insn (i), fallback_fn);
else
length = fallback_fn (insn);
break;
default:
break;
}
#ifdef ADJUST_INSN_LENGTH
ADJUST_INSN_LENGTH (insn, length);
#endif
return length;
}
/* Obtain the current length of an insn. If branch shortening has been done,
get its actual length. Otherwise, get its maximum length. */
int
get_attr_length (rtx_insn *insn)
{
return get_attr_length_1 (insn, insn_default_length);
}
/* Obtain the current length of an insn. If branch shortening has been done,
get its actual length. Otherwise, get its minimum length. */
int
get_attr_min_length (rtx_insn *insn)
{
return get_attr_length_1 (insn, insn_min_length);
}
/* Code to handle alignment inside shorten_branches. */
/* Here is an explanation how the algorithm in align_fuzz can give
proper results:
Call a sequence of instructions beginning with alignment point X
and continuing until the next alignment point `block X'. When `X'
is used in an expression, it means the alignment value of the
alignment point.
Call the distance between the start of the first insn of block X, and
the end of the last insn of block X `IX', for the `inner size of X'.
This is clearly the sum of the instruction lengths.
Likewise with the next alignment-delimited block following X, which we
shall call block Y.
Call the distance between the start of the first insn of block X, and
the start of the first insn of block Y `OX', for the `outer size of X'.
The estimated padding is then OX - IX.
OX can be safely estimated as
if (X >= Y)
OX = round_up(IX, Y)
else
OX = round_up(IX, X) + Y - X
Clearly est(IX) >= real(IX), because that only depends on the
instruction lengths, and those being overestimated is a given.
Clearly round_up(foo, Z) >= round_up(bar, Z) if foo >= bar, so
we needn't worry about that when thinking about OX.
When X >= Y, the alignment provided by Y adds no uncertainty factor
for branch ranges starting before X, so we can just round what we have.
But when X < Y, we don't know anything about the, so to speak,
`middle bits', so we have to assume the worst when aligning up from an
address mod X to one mod Y, which is Y - X. */
#ifndef LABEL_ALIGN
#define LABEL_ALIGN(LABEL) align_labels
#endif
#ifndef LOOP_ALIGN
#define LOOP_ALIGN(LABEL) align_loops
#endif
#ifndef LABEL_ALIGN_AFTER_BARRIER
#define LABEL_ALIGN_AFTER_BARRIER(LABEL) 0
#endif
#ifndef JUMP_ALIGN
#define JUMP_ALIGN(LABEL) align_jumps
#endif
#ifndef ADDR_VEC_ALIGN
static int
final_addr_vec_align (rtx_jump_table_data *addr_vec)
{
int align = GET_MODE_SIZE (addr_vec->get_data_mode ());
if (align > BIGGEST_ALIGNMENT / BITS_PER_UNIT)
align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
return exact_log2 (align);
}
#define ADDR_VEC_ALIGN(ADDR_VEC) final_addr_vec_align (ADDR_VEC)
#endif
#ifndef INSN_LENGTH_ALIGNMENT
#define INSN_LENGTH_ALIGNMENT(INSN) length_unit_log
#endif
#define INSN_SHUID(INSN) (uid_shuid[INSN_UID (INSN)])
static int min_labelno, max_labelno;
#define LABEL_TO_ALIGNMENT(LABEL) \
(label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno])
/* For the benefit of port specific code do this also as a function. */
align_flags
label_to_alignment (rtx label)
{
if (CODE_LABEL_NUMBER (label) <= max_labelno)
return LABEL_TO_ALIGNMENT (label);
return align_flags ();
}
/* The differences in addresses
between a branch and its target might grow or shrink depending on
the alignment the start insn of the range (the branch for a forward
branch or the label for a backward branch) starts out on; if these
differences are used naively, they can even oscillate infinitely.
We therefore want to compute a 'worst case' address difference that
is independent of the alignment the start insn of the range end
up on, and that is at least as large as the actual difference.
The function align_fuzz calculates the amount we have to add to the
naively computed difference, by traversing the part of the alignment
chain of the start insn of the range that is in front of the end insn
of the range, and considering for each alignment the maximum amount
that it might contribute to a size increase.
For casesi tables, we also want to know worst case minimum amounts of
address difference, in case a machine description wants to introduce
some common offset that is added to all offsets in a table.
For this purpose, align_fuzz with a growth argument of 0 computes the
appropriate adjustment. */
/* Compute the maximum delta by which the difference of the addresses of
START and END might grow / shrink due to a different address for start
which changes the size of alignment insns between START and END.
KNOWN_ALIGN_LOG is the alignment known for START.
GROWTH should be ~0 if the objective is to compute potential code size
increase, and 0 if the objective is to compute potential shrink.
The return value is undefined for any other value of GROWTH. */
static int
align_fuzz (rtx start, rtx end, int known_align_log, unsigned int growth)
{
int uid = INSN_UID (start);
rtx align_label;
int known_align = 1 << known_align_log;
int end_shuid = INSN_SHUID (end);
int fuzz = 0;
for (align_label = uid_align[uid]; align_label; align_label = uid_align[uid])
{
int align_addr, new_align;
uid = INSN_UID (align_label);
align_addr = INSN_ADDRESSES (uid) - insn_lengths[uid];
if (uid_shuid[uid] > end_shuid)
break;
align_flags alignment = LABEL_TO_ALIGNMENT (align_label);
new_align = 1 << alignment.levels[0].log;
if (new_align < known_align)
continue;
fuzz += (-align_addr ^ growth) & (new_align - known_align);
known_align = new_align;
}
return fuzz;
}
/* Compute a worst-case reference address of a branch so that it
can be safely used in the presence of aligned labels. Since the
size of the branch itself is unknown, the size of the branch is
not included in the range. I.e. for a forward branch, the reference
address is the end address of the branch as known from the previous
branch shortening pass, minus a value to account for possible size
increase due to alignment. For a backward branch, it is the start
address of the branch as known from the current pass, plus a value
to account for possible size increase due to alignment.
NB.: Therefore, the maximum offset allowed for backward branches needs
to exclude the branch size. */
int
insn_current_reference_address (rtx_insn *branch)
{
rtx dest;
int seq_uid;
if (! INSN_ADDRESSES_SET_P ())
return 0;
rtx_insn *seq = NEXT_INSN (PREV_INSN (branch));
seq_uid = INSN_UID (seq);
if (!jump_to_label_p (branch))
/* This can happen for example on the PA; the objective is to know the
offset to address something in front of the start of the function.
Thus, we can treat it like a backward branch.
We assume here that FUNCTION_BOUNDARY / BITS_PER_UNIT is larger than
any alignment we'd encounter, so we skip the call to align_fuzz. */
return insn_current_address;
dest = JUMP_LABEL (branch);
/* BRANCH has no proper alignment chain set, so use SEQ.
BRANCH also has no INSN_SHUID. */
if (INSN_SHUID (seq) < INSN_SHUID (dest))
{
/* Forward branch. */
return (insn_last_address + insn_lengths[seq_uid]
- align_fuzz (seq, dest, length_unit_log, ~0));
}
else
{
/* Backward branch. */
return (insn_current_address
+ align_fuzz (dest, seq, length_unit_log, ~0));
}
}
/* Compute branch alignments based on CFG profile. */
unsigned int
compute_alignments (void)
{
basic_block bb;
align_flags max_alignment;
label_align.truncate (0);
max_labelno = max_label_num ();
min_labelno = get_first_label_num ();
label_align.safe_grow_cleared (max_labelno - min_labelno + 1);
/* If not optimizing or optimizing for size, don't assign any alignments. */
if (! optimize || optimize_function_for_size_p (cfun))
return 0;
if (dump_file)
{
dump_reg_info (dump_file);
dump_flow_info (dump_file, TDF_DETAILS);
flow_loops_dump (dump_file, NULL, 1);
}
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
profile_count count_threshold = cfun->cfg->count_max.apply_scale
(1, param_align_threshold);
if (dump_file)
{
fprintf (dump_file, "count_max: ");
cfun->cfg->count_max.dump (dump_file);
fprintf (dump_file, "\n");
}
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *label = BB_HEAD (bb);
bool has_fallthru = 0;
edge e;
edge_iterator ei;
if (!LABEL_P (label)
|| optimize_bb_for_size_p (bb))
{
if (dump_file)
fprintf (dump_file,
"BB %4i loop %2i loop_depth %2i skipped.\n",
bb->index,
bb->loop_father->num,
bb_loop_depth (bb));
continue;
}
max_alignment = LABEL_ALIGN (label);
profile_count fallthru_count = profile_count::zero ();
profile_count branch_count = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->preds)
{
if (e->flags & EDGE_FALLTHRU)
has_fallthru = 1, fallthru_count += e->count ();
else
branch_count += e->count ();
}
if (dump_file)
{
fprintf (dump_file, "BB %4i loop %2i loop_depth"
" %2i fall ",
bb->index, bb->loop_father->num,
bb_loop_depth (bb));
fallthru_count.dump (dump_file);
fprintf (dump_file, " branch ");
branch_count.dump (dump_file);
if (!bb->loop_father->inner && bb->loop_father->num)
fprintf (dump_file, " inner_loop");
if (bb->loop_father->header == bb)
fprintf (dump_file, " loop_header");
fprintf (dump_file, "\n");
}
if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
continue;
/* There are two purposes to align block with no fallthru incoming edge:
1) to avoid fetch stalls when branch destination is near cache boundary
2) to improve cache efficiency in case the previous block is not executed
(so it does not need to be in the cache).
We to catch first case, we align frequently executed blocks.
To catch the second, we align blocks that are executed more frequently
than the predecessor and the predecessor is likely to not be executed
when function is called. */
if (!has_fallthru
&& (branch_count > count_threshold
|| (bb->count > bb->prev_bb->count.apply_scale (10, 1)
&& (bb->prev_bb->count
<= ENTRY_BLOCK_PTR_FOR_FN (cfun)
->count.apply_scale (1, 2)))))
{
align_flags alignment = JUMP_ALIGN (label);
if (dump_file)
fprintf (dump_file, " jump alignment added.\n");
max_alignment = align_flags::max (max_alignment, alignment);
}
/* In case block is frequent and reached mostly by non-fallthru edge,
align it. It is most likely a first block of loop. */
if (has_fallthru
&& !(single_succ_p (bb)
&& single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
&& optimize_bb_for_speed_p (bb)
&& branch_count + fallthru_count > count_threshold
&& (branch_count
> fallthru_count.apply_scale
(param_align_loop_iterations, 1)))
{
align_flags alignment = LOOP_ALIGN (label);
if (dump_file)
fprintf (dump_file, " internal loop alignment added.\n");
max_alignment = align_flags::max (max_alignment, alignment);
}
LABEL_TO_ALIGNMENT (label) = max_alignment;
}
loop_optimizer_finalize ();
free_dominance_info (CDI_DOMINATORS);
return 0;
}
/* Grow the LABEL_ALIGN array after new labels are created. */
static void
grow_label_align (void)
{
int old = max_labelno;
int n_labels;
int n_old_labels;
max_labelno = max_label_num ();
n_labels = max_labelno - min_labelno + 1;
n_old_labels = old - min_labelno + 1;
label_align.safe_grow_cleared (n_labels);
/* Range of labels grows monotonically in the function. Failing here
means that the initialization of array got lost. */
gcc_assert (n_old_labels <= n_labels);
}
/* Update the already computed alignment information. LABEL_PAIRS is a vector
made up of pairs of labels for which the alignment information of the first
element will be copied from that of the second element. */
void
update_alignments (vec<rtx> &label_pairs)
{
unsigned int i = 0;
rtx iter, label = NULL_RTX;
if (max_labelno != max_label_num ())
grow_label_align ();
FOR_EACH_VEC_ELT (label_pairs, i, iter)
if (i & 1)
LABEL_TO_ALIGNMENT (label) = LABEL_TO_ALIGNMENT (iter);
else
label = iter;
}
namespace {
const pass_data pass_data_compute_alignments =
{
RTL_PASS, /* type */
"alignments", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_compute_alignments : public rtl_opt_pass
{
public:
pass_compute_alignments (gcc::context *ctxt)
: rtl_opt_pass (pass_data_compute_alignments, ctxt)
{}
/* opt_pass methods: */
virtual unsigned int execute (function *) { return compute_alignments (); }
}; // class pass_compute_alignments
} // anon namespace
rtl_opt_pass *
make_pass_compute_alignments (gcc::context *ctxt)
{
return new pass_compute_alignments (ctxt);
}
/* Make a pass over all insns and compute their actual lengths by shortening
any branches of variable length if possible. */
/* shorten_branches might be called multiple times: for example, the SH
port splits out-of-range conditional branches in MACHINE_DEPENDENT_REORG.
In order to do this, it needs proper length information, which it obtains
by calling shorten_branches. This cannot be collapsed with
shorten_branches itself into a single pass unless we also want to integrate
reorg.c, since the branch splitting exposes new instructions with delay
slots. */
void
shorten_branches (rtx_insn *first)
{
rtx_insn *insn;
int max_uid;
int i;
rtx_insn *seq;
int something_changed = 1;
char *varying_length;
rtx body;
int uid;
rtx align_tab[MAX_CODE_ALIGN + 1];
/* Compute maximum UID and allocate label_align / uid_shuid. */
max_uid = get_max_uid ();
/* Free uid_shuid before reallocating it. */
free (uid_shuid);
uid_shuid = XNEWVEC (int, max_uid);
if (max_labelno != max_label_num ())
grow_label_align ();
/* Initialize label_align and set up uid_shuid to be strictly
monotonically rising with insn order. */
/* We use alignment here to keep track of the maximum alignment we want to
impose on the next CODE_LABEL (or the current one if we are processing
the CODE_LABEL itself). */
align_flags max_alignment;
for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
{
INSN_SHUID (insn) = i++;
if (INSN_P (insn))
continue;
if (rtx_code_label *label = dyn_cast <rtx_code_label *> (insn))
{
/* Merge in alignments computed by compute_alignments. */
align_flags alignment = LABEL_TO_ALIGNMENT (label);
max_alignment = align_flags::max (max_alignment, alignment);
rtx_jump_table_data *table = jump_table_for_label (label);
if (!table)
{
align_flags alignment = LABEL_ALIGN (label);
max_alignment = align_flags::max (max_alignment, alignment);
}
/* ADDR_VECs only take room if read-only data goes into the text
section. */
if ((JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section == text_section)
&& table)
{
align_flags alignment = align_flags (ADDR_VEC_ALIGN (table));
max_alignment = align_flags::max (max_alignment, alignment);
}
LABEL_TO_ALIGNMENT (label) = max_alignment;
max_alignment = align_flags ();
}
else if (BARRIER_P (insn))
{
rtx_insn *label;
for (label = insn; label && ! INSN_P (label);
label = NEXT_INSN (label))
if (LABEL_P (label))
{
align_flags alignment
= align_flags (LABEL_ALIGN_AFTER_BARRIER (insn));
max_alignment = align_flags::max (max_alignment, alignment);
break;
}
}
}
if (!HAVE_ATTR_length)
return;
/* Allocate the rest of the arrays. */
insn_lengths = XNEWVEC (int, max_uid);
insn_lengths_max_uid = max_uid;
/* Syntax errors can lead to labels being outside of the main insn stream.
Initialize insn_addresses, so that we get reproducible results. */
INSN_ADDRESSES_ALLOC (max_uid);
varying_length = XCNEWVEC (char, max_uid);
/* Initialize uid_align. We scan instructions
from end to start, and keep in align_tab[n] the last seen insn
that does an alignment of at least n+1, i.e. the successor
in the alignment chain for an insn that does / has a known
alignment of n. */
uid_align = XCNEWVEC (rtx, max_uid);
for (i = MAX_CODE_ALIGN + 1; --i >= 0;)
align_tab[i] = NULL_RTX;
seq = get_last_insn ();
for (; seq; seq = PREV_INSN (seq))
{
int uid = INSN_UID (seq);
int log;
log = (LABEL_P (seq) ? LABEL_TO_ALIGNMENT (seq).levels[0].log : 0);
uid_align[uid] = align_tab[0];
if (log)
{
/* Found an alignment label. */
gcc_checking_assert (log < MAX_CODE_ALIGN + 1);
uid_align[uid] = align_tab[log];
for (i = log - 1; i >= 0; i--)
align_tab[i] = seq;
}
}
/* When optimizing, we start assuming minimum length, and keep increasing
lengths as we find the need for this, till nothing changes.
When not optimizing, we start assuming maximum lengths, and
do a single pass to update the lengths. */
bool increasing = optimize != 0;
#ifdef CASE_VECTOR_SHORTEN_MODE
if (optimize)
{
/* Look for ADDR_DIFF_VECs, and initialize their minimum and maximum
label fields. */
int min_shuid = INSN_SHUID (get_insns ()) - 1;
int max_shuid = INSN_SHUID (get_last_insn ()) + 1;
int rel;
for (insn = first; insn != 0; insn = NEXT_INSN (insn))
{
rtx min_lab = NULL_RTX, max_lab = NULL_RTX, pat;
int len, i, min, max, insn_shuid;
int min_align;
addr_diff_vec_flags flags;
if (! JUMP_TABLE_DATA_P (insn)
|| GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
continue;
pat = PATTERN (insn);
len = XVECLEN (pat, 1);
gcc_assert (len > 0);
min_align = MAX_CODE_ALIGN;
for (min = max_shuid, max = min_shuid, i = len - 1; i >= 0; i--)
{
rtx lab = XEXP (XVECEXP (pat, 1, i), 0);
int shuid = INSN_SHUID (lab);
if (shuid < min)
{
min = shuid;
min_lab = lab;