i386: Support APX NF and NDD for imul/mul
Checks
Commit Message
Add some missing APX NF and NDD support for imul and mul.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
* config/i386/i386.md (*imulhi<mode>zu): Added APX
NF support.
(*imulhi<mode>zu<nf_name>): New define_insn.
(*mulsi3_1_zext<nf_name>): Ditto.
(*mul<mode><dwi>3_1<nf_name>): Ditto.
(*<u>mulqihi3_1<nf_name>): Ditto.
(*mul<mode>3_1<nf_name>): Added APX NDD support.
(*mulv<mode>4): Ditto.
(*mulvhi4): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add test for imul ndd.
---
gcc/config/i386/i386.md | 98 +++++++++++++------------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 8 ++
2 files changed, 61 insertions(+), 45 deletions(-)
"lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times
"add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%(?:r|e)di\\),
%(?:|r|e)a(?:x|l)" 4 } } */
@@ -200,3 +206,5 @@ FOO4 (uint64_t, rol, <<, >>, 1)
/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]*1,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]*1,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times
"imul(?:l|q)\[^\n\r]%(?:|r|e)(?:|s|d)i, %(?:r|e)(?:|s|d)i, %(?:|r|e)ax" 3 }
} */
+/* { dg-final { scan-assembler-times "imul(?:l|w|q)\[^\n\r]\\(%rdi\\),
%(?:|r|e)si, %(?:|r|e)ax" 3 } } */
--
2.31.1
Comments
On Mon, Jul 1, 2024 at 4:51 PM kong lingling <lingling.kong7@gmail.com> wrote:
>
> Add some missing APX NF and NDD support for imul and mul.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
>
> Ok for trunk?
Ok.
>
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*imulhi<mode>zu): Added APX
> NF support.
> (*imulhi<mode>zu<nf_name>): New define_insn.
> (*mulsi3_1_zext<nf_name>): Ditto.
> (*mul<mode><dwi>3_1<nf_name>): Ditto.
> (*<u>mulqihi3_1<nf_name>): Ditto.
> (*mul<mode>3_1<nf_name>): Added APX NDD support.
> (*mulv<mode>4): Ditto.
> (*mulvhi4): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/apx-ndd.c: Add test for imul ndd.
> ---
> gcc/config/i386/i386.md | 98 +++++++++++++------------
> gcc/testsuite/gcc.target/i386/apx-ndd.c | 8 ++
> 2 files changed, 61 insertions(+), 45 deletions(-)
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index fd48e764469..c1f29fee412 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6488,8 +6488,8 @@
> (define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
>
> (define_subst "nf_subst"
> - [(set (match_operand:SWI 0)
> - (match_operand:SWI 1))]
> + [(set (match_operand:SWIDWI 0)
> + (match_operand:SWIDWI 1))]
> ""
> [(set (match_dup 0)
> (match_dup 1))
> @@ -10028,24 +10028,26 @@
> ;; On BDVER1, all HI MULs use DoublePath
>
> (define_insn "*mul<mode>3_1<nf_name>"
> - [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
> + [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
> (mult:SWIM248
> - (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
> - (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
> + (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
> + (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r,<m>r")))]
> "!(MEM_P (operands[1]) && MEM_P (operands[2]))
> && <nf_condition>"
> "@
> <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
> <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
> - <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}"
> + <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}
> + <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "type" "imul")
> - (set_attr "prefix_0f" "0,0,1")
> + (set_attr "prefix_0f" "0,0,1,1")
> + (set_attr "isa" "*,*,*,apx_ndd")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
> (const_string "vector")
> (eq_attr "alternative" "1")
> (const_string "vector")
> - (and (eq_attr "alternative" "2")
> + (and (eq_attr "alternative" "2,3")
> (ior (match_test "<MODE>mode == HImode")
> (match_operand 1 "memory_operand")))
> (const_string "vector")]
> @@ -10063,33 +10065,34 @@
> (const_string "direct")))
> (set_attr "mode" "<MODE>")])
>
> -(define_insn "*imulhi<mode>zu"
> +(define_insn "*imulhi<mode>zu<nf_name>"
> [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
> (zero_extend:SWI48x
> (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
> - (match_operand:HI 2 "immediate_operand" "K,n"))))
> - (clobber (reg:CC FLAGS_REG))]
> - "TARGET_APX_ZU"
> + (match_operand:HI 2 "immediate_operand" "K,n"))))]
> + "TARGET_APX_ZU && <nf_condition>"
> "@
> - imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
> - imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
> + <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
> + <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
> [(set_attr "type" "imul")
> (set_attr "mode" "HI")])
>
> -(define_insn "*mulsi3_1_zext"
> - [(set (match_operand:DI 0 "register_operand" "=r,r,r")
> +(define_insn "*mulsi3_1_zext<nf_name>"
> + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
> (zero_extend:DI
> - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
> - (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"))))
> - (clobber (reg:CC FLAGS_REG))]
> + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
> + (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr,BMr"))))]
> "TARGET_64BIT
> - && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> + && !(MEM_P (operands[1]) && MEM_P (operands[2]))
> + && <nf_condition>"
> "@
> - imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> - imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> - imul{l}\t{%2, %k0|%k0, %2}"
> + <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> + <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
> + <nf_prefix>imul{l}\t{%2, %k0|%k0, %2}
> + <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
> [(set_attr "type" "imul")
> - (set_attr "prefix_0f" "0,0,1")
> + (set_attr "prefix_0f" "0,0,1,1")
> + (set_attr "isa" "*,*,*,apx_ndd")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
> (const_string "vector")
> @@ -10158,30 +10161,32 @@
> [(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:<DWI>
> (sign_extend:<DWI>
> - (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
> + (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0,r"))
> (sign_extend:<DWI>
> - (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
> + (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr,mr")))
> (sign_extend:<DWI>
> (mult:SWI48 (match_dup 1) (match_dup 2)))))
> - (set (match_operand:SWI48 0 "register_operand" "=r,r")
> + (set (match_operand:SWI48 0 "register_operand" "=r,r,r")
> (mult:SWI48 (match_dup 1) (match_dup 2)))]
> "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> "@
> imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
> - imul{<imodesuffix>}\t{%2, %0|%0, %2}"
> + imul{<imodesuffix>}\t{%2, %0|%0, %2}
> + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "type" "imul")
> - (set_attr "prefix_0f" "0,1")
> + (set_attr "prefix_0f" "0,1,1")
> + (set_attr "isa" "*,*,apx_ndd")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
> (const_string "vector")
> (eq_attr "alternative" "0")
> (const_string "vector")
> - (and (eq_attr "alternative" "1")
> + (and (eq_attr "alternative" "1,2")
> (match_operand 1 "memory_operand"))
> (const_string "vector")]
> (const_string "direct")))
> (set (attr "amdfam10_decode")
> - (cond [(and (eq_attr "alternative" "1")
> + (cond [(and (eq_attr "alternative" "1,2")
> (match_operand 1 "memory_operand"))
> (const_string "vector")]
> (const_string "direct")))
> @@ -10192,17 +10197,20 @@
> [(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:SI
> (sign_extend:SI
> - (match_operand:HI 1 "nonimmediate_operand" "%0"))
> + (match_operand:HI 1 "nonimmediate_operand" "%0,r"))
> (sign_extend:SI
> - (match_operand:HI 2 "nonimmediate_operand" "mr")))
> + (match_operand:HI 2 "nonimmediate_operand" "mr,mr")))
> (sign_extend:SI
> (mult:HI (match_dup 1) (match_dup 2)))))
> - (set (match_operand:HI 0 "register_operand" "=r")
> + (set (match_operand:HI 0 "register_operand" "=r,r")
> (mult:HI (match_dup 1) (match_dup 2)))]
> "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> - "imul{w}\t{%2, %0|%0, %2}"
> + "@
> + imul{w}\t{%2, %0|%0, %2}
> + imul{w}\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "type" "imul")
> (set_attr "prefix_0f" "1")
> + (set_attr "isa" "*,apx_ndd")
> (set_attr "athlon_decode" "vector")
> (set_attr "amdfam10_decode" "direct")
> (set_attr "bdver1_decode" "double")
> @@ -10451,16 +10459,16 @@
> operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
> })
>
> -(define_insn "*mul<mode><dwi>3_1"
> +(define_insn "*mul<mode><dwi>3_1<nf_name>"
> [(set (match_operand:<DWI> 0 "register_operand" "=A")
> (mult:<DWI>
> (sign_extend:<DWI>
> (match_operand:DWIH 1 "register_operand" "%a"))
> (sign_extend:<DWI>
> - (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
> - (clobber (reg:CC FLAGS_REG))]
> - "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> - "imul{<imodesuffix>}\t%2"
> + (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
> + "!(MEM_P (operands[1]) && MEM_P (operands[2]))
> + && <nf_condition>"
> + "<nf_prefix>imul{<imodesuffix>}\t%2"
> [(set_attr "type" "imul")
> (set_attr "length_immediate" "0")
> (set (attr "athlon_decode")
> @@ -10471,17 +10479,17 @@
> (set_attr "bdver1_decode" "direct")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn "*<u>mulqihi3_1"
> +(define_insn "*<u>mulqihi3_1<nf_name>"
> [(set (match_operand:HI 0 "register_operand" "=a")
> (mult:HI
> (any_extend:HI
> (match_operand:QI 1 "register_operand" "%0"))
> (any_extend:HI
> - (match_operand:QI 2 "nonimmediate_operand" "qm"))))
> - (clobber (reg:CC FLAGS_REG))]
> + (match_operand:QI 2 "nonimmediate_operand" "qm"))))]
> "TARGET_QIMODE_MATH
> - && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> - "<sgnprefix>mul{b}\t%2"
> + && !(MEM_P (operands[1]) && MEM_P (operands[2]))
> + && <nf_condition>"
> + "<nf_prefix><sgnprefix>mul{b}\t%2"
> [(set_attr "type" "imul")
> (set_attr "length_immediate" "0")
> (set (attr "athlon_decode")
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0ff4df0780c..6c88aff911a 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -170,6 +170,12 @@ FOO4 (uint16_t, rol, <<, >>, 1)
> FOO4 (uint32_t, rol, <<, >>, 1)
> FOO4 (uint64_t, rol, <<, >>, 1)
>
> +FOO1 (short, imul, *)
> +FOO1 (int, imul, *)
> +FOO1 (int64_t, imul, *)
> +FOO2 (short, imul, *)
> +FOO2 (int, imul, *)
> +FOO2 (int64_t, imul, *)
> /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
> /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
> /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
> @@ -200,3 +206,5 @@ FOO4 (uint64_t, rol, <<, >>, 1)
> /* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
> /* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]*1, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
> /* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]*1, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
> +/* { dg-final { scan-assembler-times "imul(?:l|q)\[^\n\r]%(?:|r|e)(?:|s|d)i, %(?:r|e)(?:|s|d)i, %(?:|r|e)ax" 3 } } */
> +/* { dg-final { scan-assembler-times "imul(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)si, %(?:|r|e)ax" 3 } } */
> --
> 2.31.1
@@ -6488,8 +6488,8 @@
(define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
(define_subst "nf_subst"
- [(set (match_operand:SWI 0)
- (match_operand:SWI 1))]
+ [(set (match_operand:SWIDWI 0)
+ (match_operand:SWIDWI 1))]
""
[(set (match_dup 0)
(match_dup 1))
@@ -10028,24 +10028,26 @@
;; On BDVER1, all HI MULs use DoublePath
(define_insn "*mul<mode>3_1<nf_name>"
- [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
+ [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
(mult:SWIM248
- (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+ (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
+ (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r,<m>r")))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <nf_condition>"
"@
<nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
<nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,0,1")
+ (set_attr "prefix_0f" "0,0,1,1")
+ (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
(eq_attr "alternative" "1")
(const_string "vector")
- (and (eq_attr "alternative" "2")
+ (and (eq_attr "alternative" "2,3")
(ior (match_test "<MODE>mode == HImode")
(match_operand 1 "memory_operand")))
(const_string "vector")]
@@ -10063,33 +10065,34 @@
(const_string "direct")))
(set_attr "mode" "<MODE>")])
-(define_insn "*imulhi<mode>zu"
+(define_insn "*imulhi<mode>zu<nf_name>"
[(set (match_operand:SWI48x 0 "register_operand" "=r,r")
(zero_extend:SWI48x
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
- (match_operand:HI 2 "immediate_operand" "K,n"))))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_ZU"
+ (match_operand:HI 2 "immediate_operand" "K,n"))))]
+ "TARGET_APX_ZU && <nf_condition>"
"@
- imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
- imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
+ <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
+ <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
[(set_attr "type" "imul")
(set_attr "mode" "HI")])
-(define_insn "*mulsi3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+(define_insn "*mulsi3_1_zext<nf_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr"))))
- (clobber (reg:CC FLAGS_REG))]
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
+ (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr,BMr"))))]
"TARGET_64BIT
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
"@
- imul{l}\t{%2, %1, %k0|%k0, %1, %2}
- imul{l}\t{%2, %1, %k0|%k0, %1, %2}
- imul{l}\t{%2, %k0|%k0, %2}"
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <nf_prefix>imul{l}\t{%2, %k0|%k0, %2}
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,0,1")
+ (set_attr "prefix_0f" "0,0,1,1")
+ (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
@@ -10158,30 +10161,32 @@
[(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0"))
+ (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0,r"))
(sign_extend:<DWI>
- (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr")))
+ (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr,mr")))
(sign_extend:<DWI>
(mult:SWI48 (match_dup 1) (match_dup 2)))))
- (set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (set (match_operand:SWI48 0 "register_operand" "=r,r,r")
(mult:SWI48 (match_dup 1) (match_dup 2)))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+ imul{<imodesuffix>}\t{%2, %0|%0, %2}
+ imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,1")
+ (set_attr "prefix_0f" "0,1,1")
+ (set_attr "isa" "*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
(eq_attr "alternative" "0")
(const_string "vector")
- (and (eq_attr "alternative" "1")
+ (and (eq_attr "alternative" "1,2")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
(set (attr "amdfam10_decode")
- (cond [(and (eq_attr "alternative" "1")
+ (cond [(and (eq_attr "alternative" "1,2")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
@@ -10192,17 +10197,20 @@
[(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:SI
(sign_extend:SI
- (match_operand:HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0,r"))
(sign_extend:SI
- (match_operand:HI 2 "nonimmediate_operand" "mr")))
+ (match_operand:HI 2 "nonimmediate_operand" "mr,mr")))
(sign_extend:SI
(mult:HI (match_dup 1) (match_dup 2)))))
- (set (match_operand:HI 0 "register_operand" "=r")
+ (set (match_operand:HI 0 "register_operand" "=r,r")
(mult:HI (match_dup 1) (match_dup 2)))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "imul{w}\t{%2, %0|%0, %2}"
+ "@
+ imul{w}\t{%2, %0|%0, %2}
+ imul{w}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
(set_attr "prefix_0f" "1")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "direct")
(set_attr "bdver1_decode" "double")
@@ -10451,16 +10459,16 @@
operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
})
-(define_insn "*mul<mode><dwi>3_1"
+(define_insn "*mul<mode><dwi>3_1<nf_name>"
[(set (match_operand:<DWI> 0 "register_operand" "=A")
(mult:<DWI>
(sign_extend:<DWI>
(match_operand:DWIH 1 "register_operand" "%a"))
(sign_extend:<DWI>
- (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
- (clobber (reg:CC FLAGS_REG))]
- "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "imul{<imodesuffix>}\t%2"
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
+ "<nf_prefix>imul{<imodesuffix>}\t%2"
[(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
@@ -10471,17 +10479,17 @@
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "<MODE>")])
-(define_insn "*<u>mulqihi3_1"
+(define_insn "*<u>mulqihi3_1<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=a")
(mult:HI
(any_extend:HI
(match_operand:QI 1 "register_operand" "%0"))
(any_extend:HI
- (match_operand:QI 2 "nonimmediate_operand" "qm"))))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 2 "nonimmediate_operand" "qm"))))]
"TARGET_QIMODE_MATH
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "<sgnprefix>mul{b}\t%2"
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
+ "<nf_prefix><sgnprefix>mul{b}\t%2"
[(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -170,6 +170,12 @@ FOO4 (uint16_t, rol, <<, >>, 1)
FOO4 (uint32_t, rol, <<, >>, 1)
FOO4 (uint64_t, rol, <<, >>, 1)
+FOO1 (short, imul, *)
+FOO1 (int, imul, *)
+FOO1 (int64_t, imul, *)
+FOO2 (short, imul, *)
+FOO2 (int, imul, *)
+FOO2 (int64_t, imul, *)
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1,
\\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times