@@ -351,7 +351,7 @@ aarch64*-*-*)
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
- extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o aarch64-early-ra.o aarch64-ldp-fusion.o"
+ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o aarch-bti-insert.o aarch64-cc-fusion.o aarch64-early-ra.o aarch64-ldp-fusion.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.h \$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
target_has_targetm_common=yes
;;
@@ -23,7 +23,6 @@ INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
-INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
@@ -1084,7 +1084,6 @@ rtl_opt_pass *make_pass_aarch64_early_ra (gcc::context *);
rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
-rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
@@ -36,8 +36,6 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
are not considered cheap. */
AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
-AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
-
AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
deleted file mode 100644
@@ -1,888 +0,0 @@
-/* Tag Collision Avoidance pass for Falkor.
- Copyright (C) 2018-2024 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-#define INCLUDE_LIST
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "target.h"
-#include "rtl.h"
-#include "tree.h"
-#include "tree-pass.h"
-#include "aarch64-protos.h"
-#include "hash-map.h"
-#include "cfgloop.h"
-#include "cfgrtl.h"
-#include "rtl-iter.h"
-#include "df.h"
-#include "memmodel.h"
-#include "optabs.h"
-#include "regs.h"
-#include "recog.h"
-#include "function-abi.h"
-#include "regrename.h"
-#include "print-rtl.h"
-
-/* The Falkor hardware prefetching system uses the encoding of the registers
- and offsets of loads to decide which of the multiple hardware prefetchers to
- assign the load to. This has the positive effect of accelerating prefetches
- when all related loads with uniform strides are assigned to the same
- prefetcher unit. The down side is that because of the way the assignment
- works, multiple unrelated loads may end up on the same prefetch unit, thus
- causing the unit to bounce between different sets of addresses and never
- train correctly. The point of this pass is to avoid such collisions so that
- unrelated loads are spread out to different prefetchers. It also makes a
- rudimentary attempt to ensure that related loads with the same tags don't
- get moved out unnecessarily.
-
- Perhaps a future enhancement would be to make a more concerted attempt to
- get related loads under the same tag. See the memcpy/memset implementation
- for falkor in glibc to understand the kind of impact this can have on
- falkor.
-
- The assignment of loads is based on a tag that is computed from the encoding
- of the first destination register (the only destination in case of LDR), the
- base register and the offset (either the register or the immediate value, as
- encoded in the instruction). This is what the 14 bit tag looks like:
-
- |<- 6 bits ->|<- 4b ->|<- 4b ->|
- --------------------------------
- | OFFSET | SRC | DST |
- --------------------------------
-
- For all cases, the SRC and DST are the 4 LSB of the encoding of the register
- in the instruction. Offset computation is more involved and is as follows:
-
- - For register offset addressing: 4 LSB of the offset register with the MSB
- of the 6 bits set to 1.
-
- - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
- depends on the width of the load and is expressed as multiples of the
- width.
-
- - For loads with update: 4 LSB of the offset. The encoding here is the
- exact number by which the base is offset and incremented.
-
- Based on the above it is clear that registers 0 and 16 will result in
- collisions, 1 and 17 and so on. This pass detects such collisions within a
- def/use chain of the source register in a loop and tries to resolve the
- collision by renaming one of the destination registers. */
-
-/* Get the destination part of the tag. */
-#define TAG_GET_DEST(__tag) ((__tag) & 0xf)
-
-/* Get the tag with the destination part updated. */
-#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
-
-#define MAX_PREFETCH_STRIDE 2048
-
-/* The instruction information structure. This is used to cache information
- about the INSN that we derive when traversing through all of the insns in
- loops. */
-class tag_insn_info
-{
-public:
- rtx_insn *insn;
- rtx dest;
- rtx base;
- rtx offset;
- bool writeback;
- bool ldp;
-
- tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
- : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
- {}
-
- /* Compute the tag based on BASE, DEST and OFFSET of the load. */
- unsigned tag ()
- {
- unsigned int_offset = 0;
- rtx offset = this->offset;
- unsigned dest = REGNO (this->dest);
- unsigned base = REGNO (this->base);
- machine_mode dest_mode = GET_MODE (this->dest);
-
- /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
- destination mode is constant here. */
- unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
-
- /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
- if ((dest_mode_size << this->ldp) > 16)
- dest = 0;
-
- if (offset && REG_P (offset))
- int_offset = (1 << 5) | REGNO (offset);
- else if (offset && CONST_INT_P (offset))
- {
- int_offset = INTVAL (offset);
- int_offset /= dest_mode_size;
- if (!this->writeback)
- int_offset >>= 2;
- }
- return ((dest & 0xf)
- | ((base & 0xf) << 4)
- | ((int_offset & 0x3f) << 8));
- }
-};
-
-/* Hash map to traverse and process instructions with colliding tags. */
-typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
-
-/* Vector of instructions with colliding tags. */
-typedef auto_vec <tag_insn_info *> insn_info_list_t;
-
-/* Pair of instruction information and unavailable register set to pass to
- CHECK_COLLIDING_TAGS. */
-typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
-
-
-/* Callback to free all tag_insn_info objects. */
-bool
-free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
- void *arg ATTRIBUTE_UNUSED)
-{
- while (v->length () > 0)
- delete v->pop ();
-
- return true;
-}
-
-
-/* Add all aliases of the register to the unavailable register set. REG is the
- smallest register number that can then be used to reference its aliases.
- UNAVAILABLE is the hard register set to add the ignored register numbers to
- and MODE is the mode in which the registers would have been used. */
-static void
-ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
-{
- add_to_hard_reg_set (unavailable, mode, reg);
- add_to_hard_reg_set (unavailable, mode, reg + 16);
- add_to_hard_reg_set (unavailable, mode, reg + 32);
- add_to_hard_reg_set (unavailable, mode, reg + 48);
-}
-
-
-/* Callback to check which destination registers are unavailable to us for
- renaming because of the base and offset colliding. This is a callback that
- gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
- std::pair of the tag_insn_info of the original insn and the hard register
- set UNAVAILABLE that is used to record hard register numbers that cannot be
- used for the renaming. This always returns true since we want to traverse
- through the entire TAG_MAP. */
-bool
-check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
-{
- HARD_REG_SET *unavailable = arg->second;
- unsigned orig_tag = arg->first->tag ();
- unsigned tag = INTVAL (t);
- machine_mode mode = GET_MODE (arg->first->dest);
-
- /* Can't collide with emptiness. */
- if (v.length () == 0)
- return true;
-
- /* Drop all aliased destination registers that result in the same
- tag. It is not necessary to drop all of them but we do anyway
- because it is quicker than checking ranges. */
- if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
- ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
-
- return true;
-}
-
-
-/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
- renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
- insns indexed by their tags, HEAD is the def/use chain head of the
- destination register of the original insn. The routine returns the super
- class of register classes that may be used during the renaming. */
-static enum reg_class
-init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
- HARD_REG_SET *unavailable)
-{
- unsigned dest = head->regno;
- enum reg_class super_class = NO_REGS;
- machine_mode mode = GET_MODE (insn_info->dest);
-
- CLEAR_HARD_REG_SET (*unavailable);
-
- for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
- {
- if (DEBUG_INSN_P (tmp->insn))
- continue;
-
- *unavailable |= ~reg_class_contents[tmp->cl];
- super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
- }
-
- for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (fixed_regs[i] || global_regs[i])
- add_to_hard_reg_set (unavailable, mode, i);
-
- arg_pair_t arg = arg_pair_t (insn_info, unavailable);
-
- /* Exclude all registers that would lead to collisions with other loads. */
- tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
-
- /* Finally, also ignore all aliases of the current reg. */
- ignore_all_aliases (unavailable, mode, dest & 0xf);
-
- return super_class;
-}
-
-
-/* Find a suitable and available register and rename the chain of occurrences
- of the register defined in the def/use chain headed by HEAD in which INSN
- exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
- unavailable due to a potential collision due to the rename. The routine
- returns the register number in case of a successful rename or -1 to indicate
- failure. */
-static int
-rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
-{
- unsigned dest_regno = head->regno;
-
- if (head->cannot_rename || head->renamed)
- return -1;
-
- HARD_REG_SET unavailable;
-
- enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
- &unavailable);
-
- unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
- dest_regno, false);
-
- /* Attempt to rename as long as regrename doesn't just throw the same
- register at us. */
- if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
- INSN_UID (insn_info->insn), dest_regno, new_regno);
-
- return new_regno;
- }
-
- return -1;
-}
-
-
-/* Return true if REGNO is not safe to rename. */
-static bool
-unsafe_rename_p (unsigned regno)
-{
- /* Avoid renaming registers used for argument passing and return value. In
- future we could be a little less conservative and walk through the basic
- blocks to see if there are any call or syscall sites. */
- if (regno <= R8_REGNUM
- || (regno >= V0_REGNUM && regno < V8_REGNUM))
- return true;
-
- /* Don't attempt to rename registers that may have specific meanings. */
- switch (regno)
- {
- case LR_REGNUM:
- case HARD_FRAME_POINTER_REGNUM:
- case FRAME_POINTER_REGNUM:
- case STACK_POINTER_REGNUM:
- return true;
- }
-
- return false;
-}
-
-
-/* Go through the def/use chains for the register and find the chain for this
- insn to rename. The function returns the hard register number in case of a
- successful rename and -1 otherwise. */
-static int
-rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
-{
- struct du_chain *chain = NULL;
- du_head_p head = NULL;
- int i;
-
- unsigned dest_regno = REGNO (insn_info->dest);
-
- if (unsafe_rename_p (dest_regno))
- return -1;
-
- /* Search the chain where this instruction is (one of) the root. */
- rtx_insn *insn = insn_info->insn;
- operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
-
- for (i = 0; i < dest_op_info->n_chains; i++)
- {
- /* The register tracked by this chain does not match the
- destination register of insn. */
- if (dest_op_info->heads[i]->regno != dest_regno)
- continue;
-
- head = dest_op_info->heads[i];
- /* The chain was merged in another, find the new head. */
- if (!head->first)
- head = regrename_chain_from_id (head->id);
-
- for (chain = head->first; chain; chain = chain->next_use)
- /* Found the insn in the chain, so try renaming the register in this
- chain. */
- if (chain->insn == insn)
- return rename_chain (insn_info, tag_map, head);
- }
-
- return -1;
-}
-
-
-/* Flag to track if the map has changed. */
-static bool map_changed = false;
-
-/* The actual reallocation logic. For each vector of collisions V, try to
- resolve the collision by attempting to rename the destination register of
- all but one of the loads. This is a callback that is invoked for each
- name-value pair (T, V) in TAG_MAP. The function returns true whenever it
- returns unchanged and false otherwise to halt traversal. */
-bool
-avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
-{
- /* We need at least two loads to cause a tag collision, return unchanged. */
- if (v->length () < 2)
- return true;
-
- tag_insn_info *vec_start = v->pop ();
- tag_insn_info *insn_info = vec_start;
-
- /* Try to rename at least one register to reduce the collision. If we
- iterate all the way through, we end up dropping one of the loads from the
- list. This is fine because we want at most one element to ensure that a
- subsequent rename attempt does not end up worsening the collision. */
- do
- {
- int new_regno;
-
- if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
- {
- rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
-
- tag_map->get_or_insert (new_tag).safe_push (insn_info);
- df_set_regs_ever_live (new_regno, true);
- map_changed = true;
- return false;
- }
-
- v->safe_insert (0, insn_info);
- insn_info = v->pop ();
- }
- while (insn_info != vec_start);
-
- if (dump_file)
- fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
- INSN_UID (insn_info->insn));
-
- /* Drop the last element and move on to the next tag. */
- delete insn_info;
- return true;
-}
-
-
-/* For each set of collisions, attempt to rename the registers or insert a move
- to avoid the collision. We repeatedly traverse through TAG_MAP using
- AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
- full traversal results in no change in the map. */
-static void
-avoid_collisions (tag_map_t &tag_map)
-{
- do
- {
- map_changed = false;
- tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
- }
- while (map_changed);
-}
-
-
-
-/* Find the use def chain in which INSN exists and then see if there is a
- definition inside the loop and outside it. We use this as a simple
- approximation to determine whether the base register is an IV. The basic
- idea is to find INSN in the use-def chains for its base register and find
- all definitions that reach it. Of all these definitions, there should be at
- least one definition that is a simple addition of a constant value, either
- as a binary operation or a pre or post update.
-
- The function returns true if the base register is estimated to be an IV. */
-static bool
-iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
-{
- df_ref ause;
- unsigned regno = REGNO (reg);
-
- /* Ignore loads from the stack. */
- if (regno == SP_REGNUM)
- return false;
-
- for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
- {
- if (!DF_REF_INSN_INFO (ause)
- || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
- continue;
-
- if (insn != DF_REF_INSN (ause))
- continue;
-
- struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
- df_ref def_rec;
-
- FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
- {
- rtx_insn *insn = DF_REF_INSN (def_rec);
- basic_block bb = BLOCK_FOR_INSN (insn);
-
- if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
- && bb->loop_father == loop)
- {
- if (recog_memoized (insn) < 0)
- continue;
-
- rtx pat = PATTERN (insn);
-
- /* Prefetch or clobber; unlikely to be a constant stride. The
- falkor software prefetcher tuning is pretty conservative, so
- its presence indicates that the access pattern is probably
- strided but most likely with an unknown stride size or a
- stride size that is quite large. */
- if (GET_CODE (pat) != SET)
- continue;
-
- rtx x = SET_SRC (pat);
- if (GET_CODE (x) == ZERO_EXTRACT
- || GET_CODE (x) == ZERO_EXTEND
- || GET_CODE (x) == SIGN_EXTEND)
- x = XEXP (x, 0);
-
- /* Loading the value from memory; unlikely to be a constant
- stride. */
- if (MEM_P (x))
- continue;
-
- /* An increment or decrement by a constant MODE_SIZE amount or
- the result of a binary expression is likely to be an IV. */
- if (GET_CODE (x) == POST_INC
- || GET_CODE (x) == POST_DEC
- || GET_CODE (x) == PRE_INC
- || GET_CODE (x) == PRE_DEC)
- return true;
- else if (BINARY_P (x)
- && (CONST_INT_P (XEXP (x, 0))
- || CONST_INT_P (XEXP (x, 1))))
- {
- rtx stride = (CONST_INT_P (XEXP (x, 0))
- ? XEXP (x, 0) : XEXP (x, 1));
-
- /* Don't bother with very long strides because the prefetcher
- is unable to train on them anyway. */
- if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
- return true;
- }
- }
- }
- return false;
- }
- return false;
-}
-
-
-/* Return true if SRC is a strided load in the LOOP, false otherwise.
- If it is a strided load, set the BASE and OFFSET. Also, if this is
- a pre/post increment load, set PRE_POST to true. */
-static bool
-valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
- rtx *base, rtx *offset, bool load_pair)
-{
- subrtx_var_iterator::array_type array;
- rtx x = NULL_RTX;
-
- FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
- if (MEM_P (*iter))
- {
- x = *iter;
- break;
- }
-
- if (!x)
- return false;
-
- struct aarch64_address_info addr;
- machine_mode mode = GET_MODE (x);
-
- if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
- return false;
-
- if (addr.type != ADDRESS_REG_IMM
- && addr.type != ADDRESS_REG_WB
- && addr.type != ADDRESS_REG_REG
- && addr.type != ADDRESS_REG_UXTW
- && addr.type != ADDRESS_REG_SXTW)
- return false;
-
- unsigned regno = REGNO (addr.base);
- if (global_regs[regno] || fixed_regs[regno])
- return false;
-
- if (addr.type == ADDRESS_REG_WB)
- {
- unsigned code = GET_CODE (XEXP (x, 0));
-
- *pre_post = true;
- *base = addr.base;
-
- if (code == PRE_MODIFY || code == POST_MODIFY)
- *offset = addr.offset;
- else
- {
- /*Writeback is only supported for fixed-width modes. */
- unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
-
- /* For post-incremented load pairs we would increment the base twice
- over, so make that adjustment. */
- if (load_pair && (code == POST_INC || code == POST_DEC))
- int_offset *= 2;
-
- *offset = GEN_INT (int_offset);
- }
- return true;
- }
- else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
- {
- /* Check if the load is strided. */
- if (!iv_p (insn, addr.base, loop))
- return false;
-
- *base = addr.base;
- *offset = addr.offset;
- return true;
- }
-
- return false;
-}
-
-
-/* Return true if INSN is a strided load in LOOP. If it is a strided load, set
- the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
- PRE_POST to true.
-
- The routine does checks on the destination of the insn and depends on
- STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
-static bool
-get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
- rtx *offset, bool *pre_post, bool *ldp)
-{
- if (!INSN_P (insn) || recog_memoized (insn) < 0)
- return false;
-
- rtx pat = PATTERN (insn);
- unsigned code = GET_CODE (pat);
- bool load_pair = (code == PARALLEL);
-
- /* For a load pair we need only the first base and destination
- registers. We however need to ensure that our pre/post increment
- offset is doubled; we do that in STRIDED_LOAD_P. */
- if (load_pair)
- {
- pat = XVECEXP (pat, 0, 0);
- code = GET_CODE (pat);
- }
-
- if (code != SET)
- return false;
-
- rtx dest_rtx = SET_DEST (pat);
-
- if (!REG_P (dest_rtx))
- return false;
-
- unsigned regno = REGNO (dest_rtx);
- machine_mode mode = GET_MODE (dest_rtx);
- machine_mode inner_mode = GET_MODE_INNER (mode);
-
- /* Falkor does not support SVE vectors. */
- if (!GET_MODE_SIZE (mode).is_constant ())
- return false;
-
- /* Ignore vector struct or lane loads. */
- if (GET_MODE_SIZE (mode).to_constant ()
- != GET_MODE_SIZE (inner_mode).to_constant ())
- return false;
-
- /* The largest width we want to bother with is a load of a pair of
- quad-words. */
- if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
- > GET_MODE_SIZE (OImode))
- return false;
-
- /* Ignore loads into the stack pointer because it is unlikely to be a
- stream. */
- if (regno == SP_REGNUM)
- return false;
-
- if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
- load_pair))
- {
- *dest = dest_rtx;
- *ldp = load_pair;
-
- return true;
- }
-
- return false;
-}
-
-
-/* Return whether INSN and CAND are in the same def/use chain. */
-static bool
-in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
-{
- struct du_chain *chain = NULL;
- du_head_p head = NULL;
- int i;
-
- /* Search the chain where this instruction is (one of) the root. */
- operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
-
- for (i = 0; i < op_info->n_chains; i++)
- {
- /* The register tracked by this chain does not match the
- dest register of insn. */
- if (op_info->heads[i]->regno != regno)
- continue;
-
- head = op_info->heads[i];
- /* The chain was merged in another, find the new head. */
- if (!head->first)
- head = regrename_chain_from_id (head->id);
-
- bool found_insn = false, found_cand = false;
-
- for (chain = head->first; chain; chain = chain->next_use)
- {
- rtx *loc = &SET_DEST (PATTERN (chain->insn));
-
- if (chain->loc != loc)
- continue;
-
- if (chain->insn == insn)
- found_insn = true;
-
- if (chain->insn == cand)
- found_cand = true;
-
- if (found_insn && found_cand)
- return true;
- }
- }
-
- return false;
-}
-
-
-/* Callback function to traverse the tag map and drop loads that have the same
- destination and are in the same chain of occurrence. Routine always returns
- true to allow traversal through all of TAG_MAP. */
-bool
-single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
- void *arg ATTRIBUTE_UNUSED)
-{
- for (int i = v->length () - 1; i>= 1; i--)
- {
- tag_insn_info *insn_info = (*v)[i];
-
- for (int j = v->length () - 2; j >= 0; j--)
- {
- /* Filter out destinations in the same chain. */
- if (in_same_chain (insn_info->insn, (*v)[j]->insn,
- REGNO (insn_info->dest)))
- {
- v->ordered_remove (j);
- i = v->length ();
- break;
- }
- }
- }
-
- return true;
-}
-
-
-/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
- list INSN_INFO for tag T. */
-bool
-dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
- void *unused ATTRIBUTE_UNUSED)
-{
- gcc_assert (dump_file);
- fprintf (dump_file, "Tag 0x" HOST_WIDE_INT_PRINT_HEX_PURE " ::\n", INTVAL (t));
-
- for (unsigned i = 0; i < insn_info.length (); i++)
- dump_insn_slim (dump_file, insn_info[i]->insn);
-
- fprintf (dump_file, "\n");
-
- return true;
-}
-
-
-/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
- prefetcher memory tags. */
-static void
-record_loads (tag_map_t &tag_map, struct loop *loop)
-{
- rtx_insn *insn;
- basic_block *body, bb;
-
- body = get_loop_body (loop);
-
- for (unsigned i = 0; i < loop->num_nodes; i++)
- {
- bb = body[i];
- FOR_BB_INSNS (bb, insn)
- {
- rtx base = NULL_RTX;
- rtx dest = NULL_RTX;
- rtx offset = NULL_RTX;
- bool writeback = false;
- bool ldp = false;
-
- if (!INSN_P (insn) || DEBUG_INSN_P (insn))
- continue;
-
- if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
- &ldp))
- {
- tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
- writeback, ldp);
- rtx tag = GEN_INT (i->tag ());
- tag_map.get_or_insert (tag).safe_push (i);
- }
- }
- }
-
- if (dump_file)
- {
- fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
- tag_map.traverse <void *, dump_insn_list> (NULL);
- }
-
- /* Try to reduce the dataset before launching into the rename attempt. Drop
- destinations in the same collision chain that appear in the same def/use
- chain, all as defs. These chains will move together in a rename so
- there's no point in keeping both in there. */
- tag_map.traverse <void *, single_dest_per_chain> (NULL);
-}
-
-
-/* Tag collision avoidance pass for Falkor. The pass runs in two phases for
- each loop; the first phase collects all loads that we consider as
- interesting for renaming into a tag-indexed map of lists. The second phase
- renames the destination register of the loads in an attempt to spread out
- the loads into different tags. */
-void
-execute_tag_collision_avoidance ()
-{
- df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
- df_chain_add_problem (DF_UD_CHAIN);
- df_compute_regs_ever_live (true);
- df_note_add_problem ();
- df_analyze ();
- df_set_flags (DF_DEFER_INSN_RESCAN);
-
- regrename_init (true);
- regrename_analyze (NULL);
-
- compute_bb_for_insn ();
- calculate_dominance_info (CDI_DOMINATORS);
- loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
-
- for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
- {
- tag_map_t tag_map (512);
-
- record_loads (tag_map, loop);
- avoid_collisions (tag_map);
- if (dump_file)
- {
- fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
- tag_map.traverse <void *, dump_insn_list> (NULL);
- }
- tag_map.traverse <void *, free_insn_info> (NULL);
- }
-
- loop_optimizer_finalize ();
- free_dominance_info (CDI_DOMINATORS);
- regrename_finish ();
-}
-
-
-const pass_data pass_data_tag_collision_avoidance =
-{
- RTL_PASS, /* type */
- "tag_collision_avoidance", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_NONE, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_df_finish, /* todo_flags_finish */
-};
-
-
-class pass_tag_collision_avoidance : public rtl_opt_pass
-{
-public:
- pass_tag_collision_avoidance (gcc::context *ctxt)
- : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
- {}
-
- /* opt_pass methods: */
- virtual bool gate (function *)
- {
- return ((aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
- && optimize >= 2);
- }
-
- virtual unsigned int execute (function *)
- {
- execute_tag_collision_avoidance ();
- return 0;
- }
-
-}; // class pass_tag_collision_avoidance
-
-
-/* Create a new pass instance. */
-rtl_opt_pass *
-make_pass_tag_collision_avoidance (gcc::context *ctxt)
-{
- return new pass_tag_collision_avoidance (ctxt);
-}
@@ -169,16 +169,6 @@ aarch64-speculation.o: $(srcdir)/config/aarch64/aarch64-speculation.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-speculation.cc
-falkor-tag-collision-avoidance.o: \
- $(srcdir)/config/aarch64/falkor-tag-collision-avoidance.cc \
- $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
- dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
- output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
- $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
- $(srcdir)/config/aarch64/aarch64-protos.h
- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
- $(srcdir)/config/aarch64/falkor-tag-collision-avoidance.cc
-
aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \
$(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
@@ -128,7 +128,7 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&qdf24xx_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */