@@ -138,6 +138,198 @@ struct alt_base
poly_int64 offset;
};
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+ virtual bool conflict_p (int &budget) const = 0;
+ virtual insn_info *insn () const = 0;
+ virtual bool valid () const = 0;
+ virtual void advance () = 0;
+};
+
+// Forward declaration to be used inside the aarch64_pair_fusion class.
+bool ldp_operand_mode_ok_p (machine_mode mode);
+rtx aarch64_destructure_load_pair (rtx regs[2], rtx pattern);
+rtx aarch64_destructure_store_pair (rtx regs[2], rtx pattern);
+rtx aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
+ bool load_p);
+enum class writeback{
+ WRITEBACK_PAIR_P,
+ WRITEBACK
+};
+
+struct pair_fusion {
+
+ pair_fusion ()
+ {
+ calculate_dominance_info (CDI_DOMINATORS);
+ df_analyze ();
+ crtl->ssa = new rtl_ssa::function_info (cfun);
+ };
+ // Return true if GPR is FP or SIMD accesses, passed
+ // with GPR reg_op rtx, machine mode and load_p.
+ virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+ {
+ return false;
+ }
+ // Return true if pair operand mode is ok. Passed with
+ // machine mode.
+ virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+ // Return true if reg operand is ok, passed with load_p,
+ // reg_op rtx and machine mode.
+ virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mem_mode) = 0;
+ // Return alias check limit.
+ virtual int pair_mem_alias_check_limit () = 0;
+ // Return true if there is writeback opportunities. Passed
+ // with enum writeback.
+ virtual bool handle_writeback_opportunities (enum writeback wback) = 0 ;
+ // Return true if mem ok ldp stp policy model passed with
+ // rtx mem, load_p and machine mode.
+ virtual bool pair_mem_ok_with_policy (rtx first_mem, bool load_p,
+ machine_mode mode) = 0;
+ // Gen load store mem pair. Return load store rtx passed
+ // with arguments load store pattern, writeback rtx and
+ // load_p.
+ virtual rtx gen_mem_pair (rtx *pats, rtx writeback,
+ bool load_p) = 0;
+ // Return true if memory writeback can be promoted, passed
+ // with insn, rtx pattern and load_p. load_p is set by this
+ // hook.
+ virtual bool pair_mem_promote_writeback_p (insn_info *, rtx, bool &)
+ {
+ return false;
+ }
+ // Return true if we track loads.
+ virtual bool track_loads_p ()
+ {
+ return true;
+ }
+ // Return true if we track stores.
+ virtual bool track_stores_p ()
+ {
+ return true;
+ }
+ // Return true if offset is out of range.
+ virtual bool pair_mem_out_of_range_p (HOST_WIDE_INT off) = 0;
+ // Return destructure pair. Passed with rtx reg, insn pattern
+ // and load_p.
+ virtual rtx gen_destructure_pair (rtx regs[2], rtx rti, bool load_p) = 0;
+ // Return writeback pair. Passed with rtx writeback effect, mem rtx
+ // regs rtx and load_p.
+ virtual rtx gen_writeback_pair (rtx wb_effect, rtx mem,
+ rtx regs[2], bool load_p) = 0;
+ // Return true if offset is aligned and multiple of 32.
+ // Passed with offset and access_size to check multiple of 32.
+ virtual bool pair_offset_alignment_ok_p (poly_int64 offset,
+ unsigned access_size) = 0;
+ void ldp_fusion_bb (bb_info *bb);
+ insn_info * find_trailing_add (insn_info *insns[2],
+ const insn_range_info &pair_range,
+ int initial_writeback,
+ rtx *writeback_effect,
+ def_info **add_def,
+ def_info *base_def,
+ poly_int64 initial_offset,
+ unsigned access_size);
+ int get_viable_bases (insn_info *insns[2],
+ vec<base_cand> &base_cands,
+ rtx cand_mems[2],
+ unsigned access_size,
+ bool reversed);
+ void do_alias_analysis (insn_info *alias_hazards[4],
+ alias_walker *walkers[4],
+ bool load_p);
+ void run ();
+ ~pair_fusion()
+ {
+ if (crtl->ssa->perform_pending_updates ())
+ cleanup_cfg (0);
+
+ free_dominance_info (CDI_DOMINATORS);
+
+ delete crtl->ssa;
+ crtl->ssa = nullptr;
+ }
+};
+
+void
+pair_fusion::run ()
+{
+ if (!track_loads_p () && !track_stores_p ())
+ return;
+
+ for (auto bb : crtl->ssa->bbs ())
+ ldp_fusion_bb (bb);
+}
+
+struct aarch64_pair_fusion : public pair_fusion
+{
+ bool fpsimd_op_p (rtx reg_op, machine_mode mem_mode,
+ bool load_p) override final
+ {
+ return reload_completed
+ ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
+ : (GET_MODE_CLASS (mem_mode) != MODE_INT
+ && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
+ }
+ bool pair_mem_promote_writeback_p (insn_info *insn, rtx pat, bool &load_p);
+ bool pair_mem_ok_with_policy (rtx first_mem, bool load_p,
+ machine_mode mode)
+ {
+ return aarch64_mem_ok_with_ldpstp_policy_model (first_mem,
+ load_p,
+ mode);
+ }
+ bool pair_operand_mode_ok_p (machine_mode mode)
+ {
+ return ldp_operand_mode_ok_p (mode);
+ }
+ rtx gen_mem_pair (rtx *pats, rtx writeback, bool load_p);
+ bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mem_mode)
+ {
+ return (load_p
+ ? aarch64_ldp_reg_operand (reg_op, mem_mode)
+ : aarch64_stp_reg_operand (reg_op, mem_mode));
+ }
+ int pair_mem_alias_check_limit ()
+ {
+ return aarch64_ldp_alias_check_limit;
+ }
+ bool handle_writeback_opportunities (enum writeback wback)
+ {
+ if (wback == writeback::WRITEBACK_PAIR_P)
+ return aarch64_ldp_writeback > 1;
+ else
+ return aarch64_ldp_writeback;
+ }
+ bool track_loads_p ()
+ {
+ return
+ aarch64_tune_params.ldp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
+ }
+ bool track_stores_p ()
+ {
+ return
+ aarch64_tune_params.stp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
+ }
+ bool pair_mem_out_of_range_p (HOST_WIDE_INT off)
+ {
+ return (off < LDP_MIN_IMM || off > LDP_MAX_IMM);
+ }
+ rtx gen_writeback_pair (rtx wb_effect, rtx mem, rtx regs[2], bool load_p)
+ {
+ return aarch64_gen_writeback_pair (wb_effect, mem, regs, load_p);
+ }
+ rtx gen_destructure_pair (rtx regs[2], rtx rti, bool load_p);
+ bool pair_offset_alignment_ok_p (poly_int64 offset,
+ unsigned access_size)
+ {
+ return multiple_p (offset, access_size);
+ }
+};
+
// State used by the pass for a given basic block.
struct ldp_bb_info
{
@@ -160,8 +352,11 @@ struct ldp_bb_info
static const size_t obstack_alignment = sizeof (void *);
bb_info *m_bb;
+ pair_fusion *m_pass;
- ldp_bb_info (bb_info *bb) : m_bb (bb), m_emitted_tombstone (false)
+ ldp_bb_info (bb_info *bb,
+ pair_fusion *d) : m_bb (bb),
+ m_pass (d), m_emitted_tombstone (false)
{
obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
obstack_alignment, obstack_chunk_alloc,
@@ -177,10 +372,28 @@ struct ldp_bb_info
bitmap_obstack_release (&m_bitmap_obstack);
}
}
-
inline void track_access (insn_info *, bool load, rtx mem);
inline void transform ();
inline void cleanup_tombstones ();
+ inline void merge_pairs (insn_list_t &, insn_list_t &,
+ bool load_p, unsigned access_size);
+ inline void transform_for_base (int load_size, access_group &group);
+
+ inline bool try_fuse_pair (bool load_p, unsigned access_size,
+ insn_info *i1, insn_info *i2);
+
+ inline bool fuse_pair (bool load_p, unsigned access_size,
+ int writeback,
+ insn_info *i1, insn_info *i2,
+ base_cand &base,
+ const insn_range_info &move_range);
+
+ inline void track_tombstone (int uid);
+
+ inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
+
+ template<typename Map>
+ void traverse_base_map (Map &map);
private:
obstack m_obstack;
@@ -191,27 +404,60 @@ private:
bool m_emitted_tombstone;
inline splay_tree_node<access_record *> *node_alloc (access_record *);
+};
- template<typename Map>
- inline void traverse_base_map (Map &map);
- inline void transform_for_base (int load_size, access_group &group);
-
- inline void merge_pairs (insn_list_t &, insn_list_t &,
- bool load_p, unsigned access_size);
+bool
+aarch64_pair_fusion::pair_mem_promote_writeback_p (insn_info *insn, rtx pat,
+ bool &load_p)
+{
+ if (reload_completed
+ && aarch64_ldp_writeback > 1
+ && GET_CODE (pat) == PARALLEL
+ && XVECLEN (pat, 0) == 2)
+ {
+ auto rti = insn->rtl ();
+ const auto attr = get_attr_ldpstp (rti);
+ if (attr == LDPSTP_NONE)
+ return false;
- inline bool try_fuse_pair (bool load_p, unsigned access_size,
- insn_info *i1, insn_info *i2);
+ load_p = (attr == LDPSTP_LDP);
+ gcc_checking_assert (load_p || attr == LDPSTP_STP);
+ return true;
+ }
+ return false;
+}
- inline bool fuse_pair (bool load_p, unsigned access_size,
- int writeback,
- insn_info *i1, insn_info *i2,
- base_cand &base,
- const insn_range_info &move_range);
+rtx
+aarch64_pair_fusion::gen_destructure_pair (rtx regs[2], rtx rti, bool load_p)
+{
+ if (load_p)
+ return aarch64_destructure_load_pair (regs, rti);
+ else
+ return aarch64_destructure_store_pair (regs, rti);
+}
- inline void track_tombstone (int uid);
+rtx
+aarch64_pair_fusion::gen_mem_pair (rtx *pats,
+ rtx writeback,
+ bool load_p)
+ {
+ rtx pair_pat;
- inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
-};
+ if (writeback)
+ {
+ auto patvec = gen_rtvec (3, writeback, pats[0], pats[1]);
+ return gen_rtx_PARALLEL (VOIDmode, patvec);
+ }
+ else if (load_p)
+ return aarch64_gen_load_pair (XEXP (pats[0], 0),
+ XEXP (pats[1], 0),
+ XEXP (pats[0], 1));
+ else
+ return aarch64_gen_store_pair (XEXP (pats[0], 0),
+ XEXP (pats[0], 1),
+ XEXP (pats[1], 1));
+ return pair_pat;
+ }
splay_tree_node<access_record *> *
ldp_bb_info::node_alloc (access_record *access)
@@ -312,7 +558,7 @@ any_post_modify_p (rtx x)
// Return true if we should consider forming ldp/stp insns from memory
// accesses with operand mode MODE at this stage in compilation.
-static bool
+bool
ldp_operand_mode_ok_p (machine_mode mode)
{
const bool allow_qregs
@@ -412,9 +658,10 @@ ldp_bb_info::track_via_mem_expr (insn_info *insn, rtx mem, lfs_fields lfs)
const machine_mode mem_mode = GET_MODE (mem);
const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
- // Punt on misaligned offsets. LDP/STP instructions require offsets to be a
- // multiple of the access size, and we believe that misaligned offsets on
- // MEM_EXPR bases are likely to lead to misaligned offsets w.r.t. RTL bases.
+ // Punt on misaligned offsets. Paired memory access instructions require
+ // offsets to be a multiple of the access size, and we believe that
+ // misaligned offsets on MEM_EXPR bases are likely to lead to misaligned
+ // offsets w.r.t. RTL bases.
if (!multiple_p (offset, mem_size))
return false;
@@ -438,10 +685,10 @@ ldp_bb_info::track_via_mem_expr (insn_info *insn, rtx mem, lfs_fields lfs)
}
// Main function to begin pair discovery. Given a memory access INSN,
-// determine whether it could be a candidate for fusing into an ldp/stp,
-// and if so, track it in the appropriate data structure for this basic
-// block. LOAD_P is true if the access is a load, and MEM is the mem
-// rtx that occurs in INSN.
+// determine whether it could be a candidate for fusing into a paired
+// access, and if so, track it in the appropriate data structure for
+// this basic block. LOAD_P is true if the access is a load, and MEM
+// is the mem rtx that occurs in INSN.
void
ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
{
@@ -449,35 +696,26 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
if (MEM_VOLATILE_P (mem))
return;
- // Ignore writeback accesses if the param says to do so.
- if (!aarch64_ldp_writeback
+ // Ignore writeback accesses if the hook says to do so.
+ if (!m_pass->handle_writeback_opportunities (writeback::WRITEBACK)
&& GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
return;
const machine_mode mem_mode = GET_MODE (mem);
- if (!ldp_operand_mode_ok_p (mem_mode))
+ if (!m_pass->pair_operand_mode_ok_p (mem_mode))
return;
rtx reg_op = XEXP (PATTERN (insn->rtl ()), !load_p);
- // Ignore the access if the register operand isn't suitable for ldp/stp.
- if (load_p
- ? !aarch64_ldp_reg_operand (reg_op, mem_mode)
- : !aarch64_stp_reg_operand (reg_op, mem_mode))
+ if (!m_pass->pair_reg_operand_ok_p (load_p, reg_op, mem_mode))
return;
-
// We want to segregate FP/SIMD accesses from GPR accesses.
//
// Before RA, we use the modes, noting that stores of constant zero
// operands use GPRs (even in non-integer modes). After RA, we use
// the hard register numbers.
- const bool fpsimd_op_p
- = reload_completed
- ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
- : (GET_MODE_CLASS (mem_mode) != MODE_INT
- && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
-
- // Note ldp_operand_mode_ok_p already rejected VL modes.
+ const bool fpsimd_op_p = m_pass->fpsimd_op_p (reg_op, mem_mode, load_p);
+ // Note pair_operand_mode_ok_p already rejected VL modes.
const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
const lfs_fields lfs = { load_p, fpsimd_op_p, mem_size };
@@ -506,8 +744,8 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
// elimination offset pre-RA, we should postpone forming pairs on such
// accesses until after RA.
//
- // As it stands, addresses with offsets in range for LDR but not
- // in range for LDP/STP are currently reloaded inefficiently,
+ // As it stands, addresses in range for an individual load/store but not
+ // for a paired access are currently reloaded inefficiently,
// ending up with a separate base register for each pair.
//
// In theory LRA should make use of
@@ -519,8 +757,8 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
// that calls targetm.legitimize_address_displacement.
//
// So for now, it's better to punt when we can't be sure that the
- // offset is in range for LDP/STP. Out-of-range cases can then be
- // handled after RA by the out-of-range LDP/STP peepholes. Eventually, it
+ // offset is in range for paired access. Out-of-range cases can then be
+ // handled after RA by the out-of-range PAIR MEM peepholes. Eventually, it
// would be nice to handle known out-of-range opportunities in the
// pass itself (for stack accesses, this would be in the post-RA pass).
if (!reload_completed
@@ -573,8 +811,8 @@ ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
gcc_unreachable (); // Base defs should be unique.
}
- // Punt on misaligned offsets. LDP/STP require offsets to be a multiple of
- // the access size.
+ // Punt on misaligned offsets. Paired memory accesses require offsets
+ // to be a multiple of the access size.
if (!multiple_p (mem_off, mem_size))
return;
@@ -1207,8 +1445,8 @@ extract_writebacks (bool load_p, rtx pats[2], int changed)
// base register. If there is one, we choose the first such update after
// PAIR_DST that is still in the same BB as our pair. We return the new def in
// *ADD_DEF and the resulting writeback effect in *WRITEBACK_EFFECT.
-static insn_info *
-find_trailing_add (insn_info *insns[2],
+insn_info *
+pair_fusion::find_trailing_add (insn_info *insns[2],
const insn_range_info &pair_range,
int initial_writeback,
rtx *writeback_effect,
@@ -1286,7 +1524,7 @@ find_trailing_add (insn_info *insns[2],
off_hwi /= access_size;
- if (off_hwi < LDP_MIN_IMM || off_hwi > LDP_MAX_IMM)
+ if (pair_mem_out_of_range_p (off_hwi))
return nullptr;
auto dump_prefix = [&]()
@@ -1800,7 +2038,7 @@ ldp_bb_info::fuse_pair (bool load_p,
{
if (dump_file)
fprintf (dump_file,
- " ldp: i%d has wb but subsequent i%d has non-wb "
+ " load pair: i%d has wb but subsequent i%d has non-wb "
"update of base (r%d), dropping wb\n",
insns[0]->uid (), insns[1]->uid (), base_regno);
gcc_assert (writeback_effect);
@@ -1823,7 +2061,7 @@ ldp_bb_info::fuse_pair (bool load_p,
}
// If either of the original insns had writeback, but the resulting pair insn
- // does not (can happen e.g. in the ldp edge case above, or if the writeback
+ // does not (can happen e.g. in the load pair edge case above, or if the writeback
// effects cancel out), then drop the def(s) of the base register as
// appropriate.
//
@@ -1842,7 +2080,7 @@ ldp_bb_info::fuse_pair (bool load_p,
// update of the base register and try and fold it in to make this into a
// writeback pair.
insn_info *trailing_add = nullptr;
- if (aarch64_ldp_writeback > 1
+ if (m_pass->handle_writeback_opportunities (writeback::WRITEBACK_PAIR_P)
&& !writeback_effect
&& (!load_p || (!refers_to_regno_p (base_regno, base_regno + 1,
XEXP (pats[0], 0), nullptr)
@@ -1850,7 +2088,7 @@ ldp_bb_info::fuse_pair (bool load_p,
XEXP (pats[1], 0), nullptr))))
{
def_info *add_def;
- trailing_add = find_trailing_add (insns, move_range, writeback,
+ trailing_add = m_pass->find_trailing_add (insns, move_range, writeback,
&writeback_effect,
&add_def, base.def, offsets[0],
access_size);
@@ -1863,14 +2101,14 @@ ldp_bb_info::fuse_pair (bool load_p,
}
// Now that we know what base mem we're going to use, check if it's OK
- // with the ldp/stp policy.
+ // with the pair mem policy.
rtx first_mem = XEXP (pats[0], load_p);
- if (!aarch64_mem_ok_with_ldpstp_policy_model (first_mem,
- load_p,
- GET_MODE (first_mem)))
+ if (!m_pass->pair_mem_ok_with_policy (first_mem,
+ load_p,
+ GET_MODE (first_mem)))
{
if (dump_file)
- fprintf (dump_file, "punting on pair (%d,%d), ldp/stp policy says no\n",
+ fprintf (dump_file, "punting on pair (%d,%d), pair mem policy says no\n",
i1->uid (), i2->uid ());
return false;
}
@@ -1878,21 +2116,10 @@ ldp_bb_info::fuse_pair (bool load_p,
rtx reg_notes = combine_reg_notes (first, second, load_p);
rtx pair_pat;
- if (writeback_effect)
- {
- auto patvec = gen_rtvec (3, writeback_effect, pats[0], pats[1]);
- pair_pat = gen_rtx_PARALLEL (VOIDmode, patvec);
- }
- else if (load_p)
- pair_pat = aarch64_gen_load_pair (XEXP (pats[0], 0),
- XEXP (pats[1], 0),
- XEXP (pats[0], 1));
- else
- pair_pat = aarch64_gen_store_pair (XEXP (pats[0], 0),
- XEXP (pats[0], 1),
- XEXP (pats[1], 1));
+ pair_pat = m_pass->gen_mem_pair (pats, writeback_effect, load_p);
insn_change *pair_change = nullptr;
+
auto set_pair_pat = [pair_pat,reg_notes](insn_change *change) {
rtx_insn *rti = change->insn ()->rtl ();
validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
@@ -2133,15 +2360,6 @@ load_modified_by_store_p (insn_info *load,
return false;
}
-// Virtual base class for load/store walkers used in alias analysis.
-struct alias_walker
-{
- virtual bool conflict_p (int &budget) const = 0;
- virtual insn_info *insn () const = 0;
- virtual bool valid () const = 0;
- virtual void advance () = 0;
-};
-
// Implement some common functionality used by both store_walker
// and load_walker.
template<bool reverse>
@@ -2259,13 +2477,13 @@ public:
//
// We try to maintain the invariant that if a walker becomes invalid, we
// set its pointer to null.
-static void
-do_alias_analysis (insn_info *alias_hazards[4],
+void
+pair_fusion::do_alias_analysis (insn_info *alias_hazards[4],
alias_walker *walkers[4],
bool load_p)
{
const int n_walkers = 2 + (2 * !load_p);
- int budget = aarch64_ldp_alias_check_limit;
+ int budget = pair_mem_alias_check_limit ();
auto next_walker = [walkers,n_walkers](int current) -> int {
for (int j = 1; j <= n_walkers; j++)
@@ -2350,8 +2568,8 @@ do_alias_analysis (insn_info *alias_hazards[4],
//
// Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
// addressing.
-static int
-get_viable_bases (insn_info *insns[2],
+int
+pair_fusion::get_viable_bases (insn_info *insns[2],
vec<base_cand> &base_cands,
rtx cand_mems[2],
unsigned access_size,
@@ -2397,7 +2615,7 @@ get_viable_bases (insn_info *insns[2],
if (!is_lower)
base_off--;
- if (base_off < LDP_MIN_IMM || base_off > LDP_MAX_IMM)
+ if (pair_mem_out_of_range_p (base_off))
continue;
use_info *use = find_access (insns[i]->uses (), REGNO (base));
@@ -2454,7 +2672,7 @@ get_viable_bases (insn_info *insns[2],
}
// Given two adjacent memory accesses of the same size, I1 and I2, try
-// and see if we can merge them into a ldp or stp.
+// and see if we can merge them into a paired access load and store.
//
// ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
// if the accesses are both loads, otherwise they are both stores.
@@ -2494,7 +2712,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
{
if (dump_file)
fprintf (dump_file,
- "punting on ldp due to reg conflcits (%d,%d)\n",
+ "punting on pair mem load due to reg conflcits (%d,%d)\n",
insns[0]->uid (), insns[1]->uid ());
return false;
}
@@ -2512,7 +2730,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
auto_vec<base_cand, 2> base_cands (2);
- int writeback = get_viable_bases (insns, base_cands, cand_mems,
+ int writeback = m_pass->get_viable_bases (insns, base_cands, cand_mems,
access_size, reversed);
if (base_cands.is_empty ())
{
@@ -2641,7 +2859,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
walkers[1] = &backward_store_walker;
if (load_p && (mem_defs[0] || mem_defs[1]))
- do_alias_analysis (alias_hazards, walkers, load_p);
+ m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
else
{
// We want to find any loads hanging off the first store.
@@ -2650,7 +2868,7 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
load_walker<true> backward_load_walker (mem_defs[1], insns[1], insns[0]);
walkers[2] = &forward_load_walker;
walkers[3] = &backward_load_walker;
- do_alias_analysis (alias_hazards, walkers, load_p);
+ m_pass->do_alias_analysis (alias_hazards, walkers, load_p);
// Now consolidate hazards back down.
if (alias_hazards[2]
&& (!alias_hazards[0] || (*alias_hazards[2] < *alias_hazards[0])))
@@ -2891,7 +3109,7 @@ ldp_bb_info::merge_pairs (insn_list_t &left_list,
// merge_pairs.
void
ldp_bb_info::transform_for_base (int encoded_lfs,
- access_group &group)
+ access_group &group)
{
const auto lfs = decode_lfs (encoded_lfs);
const unsigned access_size = lfs.size;
@@ -2903,7 +3121,9 @@ ldp_bb_info::transform_for_base (int encoded_lfs,
{
if (skip_next)
skip_next = false;
- else if (known_eq (access.offset, prev_access->offset + access_size))
+ else if (m_pass->pair_offset_alignment_ok_p (prev_access->offset,
+ access_size)
+ && known_eq (access.offset, prev_access->offset + access_size))
{
merge_pairs (prev_access->cand_insns,
access.cand_insns,
@@ -2964,29 +3184,9 @@ ldp_bb_info::transform ()
traverse_base_map (def_map);
}
-static void
-ldp_fusion_init ()
-{
- calculate_dominance_info (CDI_DOMINATORS);
- df_analyze ();
- crtl->ssa = new rtl_ssa::function_info (cfun);
-}
-
-static void
-ldp_fusion_destroy ()
-{
- if (crtl->ssa->perform_pending_updates ())
- cleanup_cfg (0);
-
- free_dominance_info (CDI_DOMINATORS);
-
- delete crtl->ssa;
- crtl->ssa = nullptr;
-}
-
// Given a load pair insn in PATTERN, unpack the insn, storing
// the registers in REGS and returning the mem.
-static rtx
+rtx
aarch64_destructure_load_pair (rtx regs[2], rtx pattern)
{
rtx mem = NULL_RTX;
@@ -3012,7 +3212,7 @@ aarch64_destructure_load_pair (rtx regs[2], rtx pattern)
// Given a store pair insn in PATTERN, unpack the insn, storing
// the register operands in REGS, and returning the mem.
-static rtx
+rtx
aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
{
rtx mem = XEXP (pattern, 0);
@@ -3030,7 +3230,7 @@ aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
//
// This is used when promoting existing non-writeback pairs to writeback
// variants.
-static rtx
+rtx
aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
bool load_p)
{
@@ -3068,22 +3268,13 @@ aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
// the base register which we can fold in to make this pair use
// a writeback addressing mode.
static void
-try_promote_writeback (insn_info *insn)
+try_promote_writeback (insn_info *insn, bool load_p, pair_fusion *pass)
{
- auto rti = insn->rtl ();
- const auto attr = get_attr_ldpstp (rti);
- if (attr == LDPSTP_NONE)
- return;
-
- bool load_p = (attr == LDPSTP_LDP);
- gcc_checking_assert (load_p || attr == LDPSTP_STP);
-
rtx regs[2];
rtx mem = NULL_RTX;
- if (load_p)
- mem = aarch64_destructure_load_pair (regs, PATTERN (rti));
- else
- mem = aarch64_destructure_store_pair (regs, PATTERN (rti));
+
+ mem = pass->gen_destructure_pair (regs, PATTERN (insn->rtl ()), load_p);
+
gcc_checking_assert (MEM_P (mem));
poly_int64 offset;
@@ -3120,9 +3311,10 @@ try_promote_writeback (insn_info *insn)
def_info *add_def;
const insn_range_info pair_range (insn);
insn_info *insns[2] = { nullptr, insn };
- insn_info *trailing_add = find_trailing_add (insns, pair_range, 0, &wb_effect,
- &add_def, base_def, offset,
- access_size);
+ insn_info *trailing_add
+ = pass->find_trailing_add (insns, pair_range, 0, &wb_effect,
+ &add_def, base_def, offset,
+ access_size);
if (!trailing_add)
return;
@@ -3132,8 +3324,9 @@ try_promote_writeback (insn_info *insn)
insn_change del_change (trailing_add, insn_change::DELETE);
insn_change *changes[] = { &pair_change, &del_change };
- rtx pair_pat = aarch64_gen_writeback_pair (wb_effect, mem, regs, load_p);
- validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
+ rtx pair_pat = pass->gen_writeback_pair (wb_effect, mem, regs, load_p);
+
+ validate_unshare_change (insn->rtl(), &PATTERN (insn->rtl()), pair_pat, true);
// The pair must gain the def of the base register from the add.
pair_change.new_defs = insert_access (attempt,
@@ -3167,14 +3360,12 @@ try_promote_writeback (insn_info *insn)
// for load/store candidates. If running after RA, also try and promote
// non-writeback pairs to use writeback addressing. Then try to fuse
// candidates into pairs.
-void ldp_fusion_bb (bb_info *bb)
+void pair_fusion::ldp_fusion_bb (bb_info *bb)
{
- const bool track_loads
- = aarch64_tune_params.ldp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
- const bool track_stores
- = aarch64_tune_params.stp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
+ const bool track_loads = track_loads_p ();
+ const bool track_stores = track_stores_p ();
- ldp_bb_info bb_state (bb);
+ ldp_bb_info bb_state (bb, this);
for (auto insn : bb->nondebug_insns ())
{
@@ -3184,11 +3375,9 @@ void ldp_fusion_bb (bb_info *bb)
continue;
rtx pat = PATTERN (rti);
- if (reload_completed
- && aarch64_ldp_writeback > 1
- && GET_CODE (pat) == PARALLEL
- && XVECLEN (pat, 0) == 2)
- try_promote_writeback (insn);
+ bool load_p;
+ if (pair_mem_promote_writeback_p (insn, pat, load_p))
+ try_promote_writeback (insn, load_p, this);
if (GET_CODE (pat) != SET)
continue;
@@ -3205,12 +3394,8 @@ void ldp_fusion_bb (bb_info *bb)
void ldp_fusion ()
{
- ldp_fusion_init ();
-
- for (auto bb : crtl->ssa->bbs ())
- ldp_fusion_bb (bb);
-
- ldp_fusion_destroy ();
+ aarch64_pair_fusion pass;
+ pass.run ();
}
namespace {
@@ -3242,14 +3427,6 @@ public:
if (!optimize || optimize_debug)
return false;
- // If the tuning policy says never to form ldps or stps, don't run
- // the pass.
- if ((aarch64_tune_params.ldp_policy_model
- == AARCH64_LDP_STP_POLICY_NEVER)
- && (aarch64_tune_params.stp_policy_model
- == AARCH64_LDP_STP_POLICY_NEVER))
- return false;
-
if (reload_completed)
return flag_aarch64_late_ldp_fusion;
else