[6/7] arm: elide some cases where the AES erratum workaround is not required.
Commit Message
Some common cases where the AES erratum workaround are not required
are when there are 64- or 128-bit loads from memory, moving a 128-bit
value from core registers, and where a 128-bit constant is being
loaded from a literal pool. The loads may also be misaligned or
generated via a neon intrinsic function.
gcc/ChangeLog:
* config/arm/crypto.md (aes_op_protect): Allow moves from core
registers and from memory.
(aes_op_protect_misalign_load): New pattern.
(aes_op_protect_neon_vld1v16qi): New pattern.
---
gcc/config/arm/crypto.md | 55 ++++++++++++++++++++++++++++++++++------
1 file changed, 47 insertions(+), 8 deletions(-)
@@ -62,17 +62,56 @@ (define_insn "*crypto_<CRYPTO_AES:crypto_pattern>_insn"
[(set_attr "type" "<crypto_type>")]
)
-; Mitigate against AES erratum on Cortex-A57 and Cortex-A72 by performing
-; a 128-bit operation on an operand producer. This can be eliminated only
-; if we know that the operand was produced by a full-width operation.
-; V16QImode matches <crypto_mode> for the AES instructions.
+;; Mitigate against AES erratum on Cortex-A57 and Cortex-A72 by
+;; performing a 128-bit operation on an operand producer. This can be
+;; eliminated only if we know that the operand was produced by a
+;; full-width operation. V16QImode matches <crypto_mode> for the AES
+;; instructions. Handle some very common cases where the source is
+;; known to be safe (transfers from core registers and memory).
(define_insn "aes_op_protect"
- [(set (match_operand:V16QI 0 "register_operand" "=w")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")]
+ [(set (match_operand:V16QI 0 "register_operand" "=w,w,w")
+ (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "w,r,Uni")]
+ UNSPEC_AES_PROTECT))]
+ "TARGET_CRYPTO && fix_aes_erratum_1742098"
+ {
+ switch (which_alternative)
+ {
+ case 0: return "vmov\t%q0, %q1";
+ case 1: return "vmov\t%e0, %Q1, %R1 @ V16QI\;vmov\t%f0, %J1, %K1";
+ case 2: return output_move_neon (operands);
+ default: gcc_unreachable ();
+ }
+ }
+ [(set_attr "type" "neon_move_q,neon_from_gp_q,neon_load1_4reg")
+ (set_attr "length" "4,8,8")
+ (set_attr "arm_pool_range" "*,*,1020")
+ (set_attr "thumb2_pool_range" "*,*,1018")
+ (set_attr "neg_pool_range" "*,*,996")]
+)
+
+;; Another safe case is when a movmisalign load is used as the source.
+(define_insn "*aes_op_protect_misalign_load"
+ [(set (match_operand:V16QI 0 "s_register_operand" "=w")
+ (unspec:V16QI
+ [(unspec:V16QI
+ [(match_operand:V16QI 1 "neon_permissive_struct_operand" "Um")]
+ UNSPEC_MISALIGNED_ACCESS)]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098"
- "vmov\\t%q0, %q1"
- [(set_attr "type" "neon_move_q")]
+ "vld1.8\t%{q0}, %A1"
+ [(set_attr "type" "neon_load1_1reg_q")]
+)
+
+;; Similarly for the vld1 intrinsic
+(define_insn "aes_op_protect_neon_vld1v16qi"
+ [(set (match_operand:V16QI 0 "s_register_operand" "=w")
+ (unspec:V16QI
+ [(unspec:V16QI [(match_operand:V16QI 1 "neon_struct_operand" "Um")]
+ UNSPEC_VLD1)]
+ UNSPEC_AES_PROTECT))]
+ "TARGET_NEON"
+ "vld1.8\t%h0, %A1"
+ [(set_attr "type" "neon_load1_1reg_q")]
)
;; An AESMC operation can feed directly into a subsequent AES