[v3] LoongArch: Add prefetch instructions.

Message ID 20221116021027.519897-1-chenglulu@loongson.cn
State Committed
Commit 3138db588a46d445876c0358df55fa3995c6f221
Headers
Series [v3] LoongArch: Add prefetch instructions. |

Commit Message

Lulu Cheng Nov. 16, 2022, 2:10 a.m. UTC
  v2 -> v3:
1. Remove preldx support.

---------------------------------------
Enable sw prefetching at -O3 and higher.

Co-Authored-By: xujiahao <xujiahao@loongson.cn>

gcc/ChangeLog:

	* config/loongarch/constraints.md (ZD): New constraint.
	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
	Define number of parallel prefetch.
	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
	Set up parameters to be used in prefetching algorithm.
	* config/loongarch/loongarch.md (prefetch): New template.
---
 gcc/config/loongarch/constraints.md   | 10 ++++++++++
 gcc/config/loongarch/loongarch-def.c  |  2 ++
 gcc/config/loongarch/loongarch-tune.h |  1 +
 gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
 gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
 5 files changed, 55 insertions(+)
  

Comments

WANG Xuerui Nov. 16, 2022, 3:06 a.m. UTC | #1
On 2022/11/16 10:10, Lulu Cheng wrote:
> v2 -> v3:
> 1. Remove preldx support.
>
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
>
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> 	* config/loongarch/constraints.md (ZD): New constraint.
> 	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
> 	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
> 	Define number of parallel prefetch.
> 	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
> 	Set up parameters to be used in prefetching algorithm.
> 	* config/loongarch/loongarch.md (prefetch): New template.
> ---
>   gcc/config/loongarch/constraints.md   | 10 ++++++++++
>   gcc/config/loongarch/loongarch-def.c  |  2 ++
>   gcc/config/loongarch/loongarch-tune.h |  1 +
>   gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
>   gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
>   5 files changed, 55 insertions(+)
>
> diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
>   ;;    "ZB"
>   ;;      "An address that is held in a general-purpose register.
>   ;;      The offset is zero"
> +;;    "ZD"
> +;;	"An address operand whose address is formed by a base register
> +;;	 and offset that is suitable for use in instructions with the same
> +;;	 addressing mode as @code{preld}."
>   ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
>   ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
>   
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
>     The offset is zero"
>     (and (match_code "mem")
>          (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> +  "An address operand whose address is formed by a base register
> +   and offset that is suitable for use in instructions with the same
> +   addressing mode as @code{preld}."
> +   (match_test "loongarch_12bit_offset_address_p (op, mode)"))

How is this different with the "m" constraint? AFAIK preld and ld share 
the same addressing mode (i.e. base register + 12-bit signed immediate 
offset).

> diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
>         .l1d_line_size = 64,
>         .l1d_size = 64,
>         .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>     },
>     [CPU_LA464] = {
>         .l1d_line_size = 64,
>         .l1d_size = 64,
>         .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>     },
>   };
>   
> diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
>       int l1d_line_size;  /* bytes */
>       int l1d_size;       /* KiB */
>       int l2d_size;       /* kiB */
> +    int simultaneous_prefetches; /* number of parallel prefetch */
nit: "prefetches" or "prefetch ops" or "int prefetch_width"?
>   };
>   
>   #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "context.h"
>   #include "builtins.h"
>   #include "rtl-iter.h"
> +#include "opts.h"
>   
>   /* This file should be included last.  */
>   #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
>     if (loongarch_branch_cost == 0)
>       loongarch_branch_cost = loongarch_cost->branch_cost;
>   
> +  /* Set up parameters to be used in prefetching algorithm.  */
> +  int simultaneous_prefetches
> +    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_simultaneous_prefetches,
> +		       simultaneous_prefetches);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l1_cache_line_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l1_cache_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l2_cache_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> +  /* Enable sw prefetching at -O3 and higher.  */
> +  if (opts->x_flag_prefetch_loop_arrays < 0
> +      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> +      && !opts->x_optimize_size)
> +    opts->x_flag_prefetch_loop_arrays = 1;
> +
>     if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
>       error ("%qs cannot be used for compiling a shared library",
>   	   "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
>   ;;  ....................
>   ;;
>   
> +(define_insn "prefetch"
> +  [(prefetch (match_operand 0 "address_operand" "ZD")
> +	     (match_operand 1 "const_int_operand" "n")
> +	     (match_operand 2 "const_int_operand" "n"))]
> +  ""
> +{
> +  switch (INTVAL (operands[1]))
> +  {
> +    case 0: return "preld\t0,%a0";
> +    case 1: return "preld\t8,%a0";
> +    default: gcc_unreachable ();
> +  }
> +})
> +
>   (define_insn "nop"
>     [(const_int 0)]
>     ""
  
Lulu Cheng Nov. 16, 2022, 3:19 a.m. UTC | #2
在 2022/11/16 上午11:06, WANG Xuerui 写道:
>
> On 2022/11/16 10:10, Lulu Cheng wrote:
>> v2 -> v3:
>> 1. Remove preldx support.
>>
>> ---------------------------------------
>> Enable sw prefetching at -O3 and higher.
>>
>> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>>
>> gcc/ChangeLog:
>>
>>     * config/loongarch/constraints.md (ZD): New constraint.
>>     * config/loongarch/loongarch-def.c: Initial number of parallel 
>> prefetch.
>>     * config/loongarch/loongarch-tune.h (struct loongarch_cache):
>>     Define number of parallel prefetch.
>>     * config/loongarch/loongarch.cc 
>> (loongarch_option_override_internal):
>>     Set up parameters to be used in prefetching algorithm.
>>     * config/loongarch/loongarch.md (prefetch): New template.
>> ---
>>   gcc/config/loongarch/constraints.md   | 10 ++++++++++
>>   gcc/config/loongarch/loongarch-def.c  |  2 ++
>>   gcc/config/loongarch/loongarch-tune.h |  1 +
>>   gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
>>   gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
>>   5 files changed, 55 insertions(+)
>>
>> diff --git a/gcc/config/loongarch/constraints.md 
>> b/gcc/config/loongarch/constraints.md
>> index 43cb7b5f0f5..46f7f63ae31 100644
>> --- a/gcc/config/loongarch/constraints.md
>> +++ b/gcc/config/loongarch/constraints.md
>> @@ -86,6 +86,10 @@
>>   ;;    "ZB"
>>   ;;      "An address that is held in a general-purpose register.
>>   ;;      The offset is zero"
>> +;;    "ZD"
>> +;;    "An address operand whose address is formed by a base register
>> +;;     and offset that is suitable for use in instructions with the 
>> same
>> +;;     addressing mode as @code{preld}."
>>   ;; "<" "Matches a pre-dec or post-dec operand." (Global 
>> non-architectural)
>>   ;; ">" "Matches a pre-inc or post-inc operand." (Global 
>> non-architectural)
>>   @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
>>     The offset is zero"
>>     (and (match_code "mem")
>>          (match_test "REG_P (XEXP (op, 0))")))
>> +
>> +(define_address_constraint "ZD"
>> +  "An address operand whose address is formed by a base register
>> +   and offset that is suitable for use in instructions with the same
>> +   addressing mode as @code{preld}."
>> +   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
>
> How is this different with the "m" constraint? AFAIK preld and ld 
> share the same addressing mode (i.e. base register + 12-bit signed 
> immediate offset).
The "m" constraint is defined as follows:
(define_memory_constraint "m"
*  (and (match_code "mem")*

        (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), 
mode)")))

This setting must be a memory operand.

''ZD" constraint is a address operand.

I think (mem:mode (address operand)) = memory operand.


>
>> diff --git a/gcc/config/loongarch/loongarch-def.c 
>> b/gcc/config/loongarch/loongarch-def.c
>> index cbf995d81b5..80ab10a52a8 100644
>> --- a/gcc/config/loongarch/loongarch-def.c
>> +++ b/gcc/config/loongarch/loongarch-def.c
>> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
>>         .l1d_line_size = 64,
>>         .l1d_size = 64,
>>         .l2d_size = 256,
>> +      .simultaneous_prefetches = 4,
>>     },
>>     [CPU_LA464] = {
>>         .l1d_line_size = 64,
>>         .l1d_size = 64,
>>         .l2d_size = 256,
>> +      .simultaneous_prefetches = 4,
>>     },
>>   };
>>   diff --git a/gcc/config/loongarch/loongarch-tune.h 
>> b/gcc/config/loongarch/loongarch-tune.h
>> index 6f3530f5c02..8e3eb29472b 100644
>> --- a/gcc/config/loongarch/loongarch-tune.h
>> +++ b/gcc/config/loongarch/loongarch-tune.h
>> @@ -45,6 +45,7 @@ struct loongarch_cache {
>>       int l1d_line_size;  /* bytes */
>>       int l1d_size;       /* KiB */
>>       int l2d_size;       /* kiB */
>> +    int simultaneous_prefetches; /* number of parallel prefetch */
> nit: "prefetches" or "prefetch ops" or "int prefetch_width"?
>>   };
>>     #endif /* LOONGARCH_TUNE_H */
>> diff --git a/gcc/config/loongarch/loongarch.cc 
>> b/gcc/config/loongarch/loongarch.cc
>> index 8d5d8d965dd..8ee32c90573 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>>   #include "context.h"
>>   #include "builtins.h"
>>   #include "rtl-iter.h"
>> +#include "opts.h"
>>     /* This file should be included last.  */
>>   #include "target-def.h"
>> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct 
>> gcc_options *opts)
>>     if (loongarch_branch_cost == 0)
>>       loongarch_branch_cost = loongarch_cost->branch_cost;
>>   +  /* Set up parameters to be used in prefetching algorithm. */
>> +  int simultaneous_prefetches
>> +    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
>> +
>> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
>> +               param_simultaneous_prefetches,
>> +               simultaneous_prefetches);
>> +
>> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
>> +               param_l1_cache_line_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
>> +
>> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
>> +               param_l1_cache_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
>> +
>> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
>> +               param_l2_cache_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
>> +
>> +
>> +  /* Enable sw prefetching at -O3 and higher.  */
>> +  if (opts->x_flag_prefetch_loop_arrays < 0
>> +      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
>> +      && !opts->x_optimize_size)
>> +    opts->x_flag_prefetch_loop_arrays = 1;
>> +
>>     if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
>>       error ("%qs cannot be used for compiling a shared library",
>>          "-mdirect-extern-access");
>> diff --git a/gcc/config/loongarch/loongarch.md 
>> b/gcc/config/loongarch/loongarch.md
>> index 682ab961741..2fda5381904 100644
>> --- a/gcc/config/loongarch/loongarch.md
>> +++ b/gcc/config/loongarch/loongarch.md
>> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
>>   ;;  ....................
>>   ;;
>>   +(define_insn "prefetch"
>> +  [(prefetch (match_operand 0 "address_operand" "ZD")
>> +         (match_operand 1 "const_int_operand" "n")
>> +         (match_operand 2 "const_int_operand" "n"))]
>> +  ""
>> +{
>> +  switch (INTVAL (operands[1]))
>> +  {
>> +    case 0: return "preld\t0,%a0";
>> +    case 1: return "preld\t8,%a0";
>> +    default: gcc_unreachable ();
>> +  }
>> +})
>> +
>>   (define_insn "nop"
>>     [(const_int 0)]
>>     ""
  
Xi Ruoyao Nov. 16, 2022, 4:22 p.m. UTC | #3
On Wed, 2022-11-16 at 11:19 +0800, Lulu Cheng wrote:
> The "m" constraint is defined as follows:
>  (define_memory_constraint "m"
>    (and (match_code "mem")
>        (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0),
> mode)")))
> This setting must be a memory operand.
> ''ZD" constraint is a address operand. 
> I think (mem:mode (address operand)) = memory operand.

Yes they are different.  I tried reusing "m" in my previous attempt to
add prefetch instruction but it didn't work.
  
Xi Ruoyao Nov. 17, 2022, 6:28 a.m. UTC | #4
LGTM.  A minor issue is "enabling -fprefetch-loop-arrays at -O3" is not
documented, but AArch64 and i386 are already doing this anyway.  We can
add the fact into the doc later.

On Wed, 2022-11-16 at 10:10 +0800, Lulu Cheng wrote:
> v2 -> v3:
> 1. Remove preldx support.
> 
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
> 
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
> 
> gcc/ChangeLog:
> 
>         * config/loongarch/constraints.md (ZD): New constraint.
>         * config/loongarch/loongarch-def.c: Initial number of parallel
> prefetch.
>         * config/loongarch/loongarch-tune.h (struct loongarch_cache):
>         Define number of parallel prefetch.
>         * config/loongarch/loongarch.cc
> (loongarch_option_override_internal):
>         Set up parameters to be used in prefetching algorithm.
>         * config/loongarch/loongarch.md (prefetch): New template.
> ---
>  gcc/config/loongarch/constraints.md   | 10 ++++++++++
>  gcc/config/loongarch/loongarch-def.c  |  2 ++
>  gcc/config/loongarch/loongarch-tune.h |  1 +
>  gcc/config/loongarch/loongarch.cc     | 28
> +++++++++++++++++++++++++++
>  gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
>  5 files changed, 55 insertions(+)
> 
> diff --git a/gcc/config/loongarch/constraints.md
> b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
>  ;;    "ZB"
>  ;;      "An address that is held in a general-purpose register.
>  ;;      The offset is zero"
> +;;    "ZD"
> +;;     "An address operand whose address is formed by a base register
> +;;      and offset that is suitable for use in instructions with the
> same
> +;;      addressing mode as @code{preld}."
>  ;; "<" "Matches a pre-dec or post-dec operand." (Global non-
> architectural)
>  ;; ">" "Matches a pre-inc or post-inc operand." (Global non-
> architectural)
>  
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
>    The offset is zero"
>    (and (match_code "mem")
>         (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> +  "An address operand whose address is formed by a base register
> +   and offset that is suitable for use in instructions with the same
> +   addressing mode as @code{preld}."
> +   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
> diff --git a/gcc/config/loongarch/loongarch-def.c
> b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
>        .l1d_line_size = 64,
>        .l1d_size = 64,
>        .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>    },
>    [CPU_LA464] = {
>        .l1d_line_size = 64,
>        .l1d_size = 64,
>        .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>    },
>  };
>  
> diff --git a/gcc/config/loongarch/loongarch-tune.h
> b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
>      int l1d_line_size;  /* bytes */
>      int l1d_size;       /* KiB */
>      int l2d_size;       /* kiB */
> +    int simultaneous_prefetches; /* number of parallel prefetch */
>  };
>  
>  #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc
> b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "context.h"
>  #include "builtins.h"
>  #include "rtl-iter.h"
> +#include "opts.h"
>  
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct
> gcc_options *opts)
>    if (loongarch_branch_cost == 0)
>      loongarch_branch_cost = loongarch_cost->branch_cost;
>  
> +  /* Set up parameters to be used in prefetching algorithm.  */
> +  int simultaneous_prefetches
> +    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +                      param_simultaneous_prefetches,
> +                      simultaneous_prefetches);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +                      param_l1_cache_line_size,
> +                     
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +                      param_l1_cache_size,
> +                     
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +                      param_l2_cache_size,
> +                     
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> +  /* Enable sw prefetching at -O3 and higher.  */
> +  if (opts->x_flag_prefetch_loop_arrays < 0
> +      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> +      && !opts->x_optimize_size)
> +    opts->x_flag_prefetch_loop_arrays = 1;
> +
>    if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
>      error ("%qs cannot be used for compiling a shared library",
>            "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md
> b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
>  ;;  ....................
>  ;;
>  
> +(define_insn "prefetch"
> +  [(prefetch (match_operand 0 "address_operand" "ZD")
> +            (match_operand 1 "const_int_operand" "n")
> +            (match_operand 2 "const_int_operand" "n"))]
> +  ""
> +{
> +  switch (INTVAL (operands[1]))
> +  {
> +    case 0: return "preld\t0,%a0";
> +    case 1: return "preld\t8,%a0";
> +    default: gcc_unreachable ();
> +  }
> +})
> +
>  (define_insn "nop"
>    [(const_int 0)]
>    ""
  
Lulu Cheng Nov. 23, 2022, 3:08 a.m. UTC | #5
Pushed r13-4259.

在 2022/11/16 10:10, Lulu Cheng 写道:
> v2 -> v3:
> 1. Remove preldx support.
>
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
>
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> 	* config/loongarch/constraints.md (ZD): New constraint.
> 	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
> 	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
> 	Define number of parallel prefetch.
> 	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
> 	Set up parameters to be used in prefetching algorithm.
> 	* config/loongarch/loongarch.md (prefetch): New template.
> ---
>   gcc/config/loongarch/constraints.md   | 10 ++++++++++
>   gcc/config/loongarch/loongarch-def.c  |  2 ++
>   gcc/config/loongarch/loongarch-tune.h |  1 +
>   gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
>   gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
>   5 files changed, 55 insertions(+)
>
> diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
>   ;;    "ZB"
>   ;;      "An address that is held in a general-purpose register.
>   ;;      The offset is zero"
> +;;    "ZD"
> +;;	"An address operand whose address is formed by a base register
> +;;	 and offset that is suitable for use in instructions with the same
> +;;	 addressing mode as @code{preld}."
>   ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
>   ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
>   
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
>     The offset is zero"
>     (and (match_code "mem")
>          (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> +  "An address operand whose address is formed by a base register
> +   and offset that is suitable for use in instructions with the same
> +   addressing mode as @code{preld}."
> +   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
> diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
>         .l1d_line_size = 64,
>         .l1d_size = 64,
>         .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>     },
>     [CPU_LA464] = {
>         .l1d_line_size = 64,
>         .l1d_size = 64,
>         .l2d_size = 256,
> +      .simultaneous_prefetches = 4,
>     },
>   };
>   
> diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
>       int l1d_line_size;  /* bytes */
>       int l1d_size;       /* KiB */
>       int l2d_size;       /* kiB */
> +    int simultaneous_prefetches; /* number of parallel prefetch */
>   };
>   
>   #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "context.h"
>   #include "builtins.h"
>   #include "rtl-iter.h"
> +#include "opts.h"
>   
>   /* This file should be included last.  */
>   #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
>     if (loongarch_branch_cost == 0)
>       loongarch_branch_cost = loongarch_cost->branch_cost;
>   
> +  /* Set up parameters to be used in prefetching algorithm.  */
> +  int simultaneous_prefetches
> +    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_simultaneous_prefetches,
> +		       simultaneous_prefetches);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l1_cache_line_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l1_cache_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> +  SET_OPTION_IF_UNSET (opts, &global_options_set,
> +		       param_l2_cache_size,
> +		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> +  /* Enable sw prefetching at -O3 and higher.  */
> +  if (opts->x_flag_prefetch_loop_arrays < 0
> +      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> +      && !opts->x_optimize_size)
> +    opts->x_flag_prefetch_loop_arrays = 1;
> +
>     if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
>       error ("%qs cannot be used for compiling a shared library",
>   	   "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
>   ;;  ....................
>   ;;
>   
> +(define_insn "prefetch"
> +  [(prefetch (match_operand 0 "address_operand" "ZD")
> +	     (match_operand 1 "const_int_operand" "n")
> +	     (match_operand 2 "const_int_operand" "n"))]
> +  ""
> +{
> +  switch (INTVAL (operands[1]))
> +  {
> +    case 0: return "preld\t0,%a0";
> +    case 1: return "preld\t8,%a0";
> +    default: gcc_unreachable ();
> +  }
> +})
> +
>   (define_insn "nop"
>     [(const_int 0)]
>     ""
  

Patch

diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 43cb7b5f0f5..46f7f63ae31 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -86,6 +86,10 @@ 
 ;;    "ZB"
 ;;      "An address that is held in a general-purpose register.
 ;;      The offset is zero"
+;;    "ZD"
+;;	"An address operand whose address is formed by a base register
+;;	 and offset that is suitable for use in instructions with the same
+;;	 addressing mode as @code{preld}."
 ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
 ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
 
@@ -190,3 +194,9 @@  (define_memory_constraint "ZB"
   The offset is zero"
   (and (match_code "mem")
        (match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+  "An address operand whose address is formed by a base register
+   and offset that is suitable for use in instructions with the same
+   addressing mode as @code{preld}."
+   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
index cbf995d81b5..80ab10a52a8 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -62,11 +62,13 @@  loongarch_cpu_cache[N_TUNE_TYPES] = {
       .l1d_line_size = 64,
       .l1d_size = 64,
       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
   },
   [CPU_LA464] = {
       .l1d_line_size = 64,
       .l1d_size = 64,
       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
   },
 };
 
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
index 6f3530f5c02..8e3eb29472b 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -45,6 +45,7 @@  struct loongarch_cache {
     int l1d_line_size;  /* bytes */
     int l1d_size;       /* KiB */
     int l2d_size;       /* kiB */
+    int simultaneous_prefetches; /* number of parallel prefetch */
 };
 
 #endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 8d5d8d965dd..8ee32c90573 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -63,6 +63,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "context.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "opts.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -6100,6 +6101,33 @@  loongarch_option_override_internal (struct gcc_options *opts)
   if (loongarch_branch_cost == 0)
     loongarch_branch_cost = loongarch_cost->branch_cost;
 
+  /* Set up parameters to be used in prefetching algorithm.  */
+  int simultaneous_prefetches
+    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_simultaneous_prefetches,
+		       simultaneous_prefetches);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_line_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l2_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+  /* Enable sw prefetching at -O3 and higher.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && !opts->x_optimize_size)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
     error ("%qs cannot be used for compiling a shared library",
 	   "-mdirect-extern-access");
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 682ab961741..2fda5381904 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3282,6 +3282,20 @@  (define_expand "untyped_call"
 ;;  ....................
 ;;
 
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "ZD")
+	     (match_operand 1 "const_int_operand" "n")
+	     (match_operand 2 "const_int_operand" "n"))]
+  ""
+{
+  switch (INTVAL (operands[1]))
+  {
+    case 0: return "preld\t0,%a0";
+    case 1: return "preld\t8,%a0";
+    default: gcc_unreachable ();
+  }
+})
+
 (define_insn "nop"
   [(const_int 0)]
   ""