[3/4,RISC-V] resolve confilct between zcmp multi push/pop and shrink-wrap-separate

Message ID 20230607055215.29332-4-gaofei@eswincomputing.com
State Superseded
Headers
Series support zcmp extention |

Commit Message

Fei Gao June 7, 2023, 5:52 a.m. UTC
  Disable zcmp multi push/pop if shrink-wrap-separate is active.

So in -Os that prefers smaller code size, by default shrink-wrap-separate
is disabled while zcmp multi push/pop is enabled.

And in -O2 and others that prefers speed, by default shrink-wrap-separate
is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.

The following TC shows the issues in -O2 before this patch with both
shrink-wrap-separate and zcmp multi push/pop active.
1. duplicated store of s regs.
2. cm.push pushes ra, s0-s11 in reverse order than what normal
   prologue does, causing stack corruption and failure to resotre s regs.

TC: zcmp_shrink_wrap_separate.c included in this patch.

output asm before this patch:
calc_func:
	cm.push	{ra, s0-s3}, -32
	...
	beq	a5,zero,.L2
	...
.L2:
	...
	sw	s1,20(sp) //issue here
	sw	s3,12(sp) //issue here
	...
	sw	s2,16(sp) //issue here

output asm after this patch:
calc_func:
	addi	sp,sp,-32
	sw	s0,24(sp)
	...
	beq	a5,zero,.L2
	...
.L2:
	...
	sw	s1,20(sp)
	sw	s3,12(sp)
	...
	sw	s2,16(sp)
gcc/ChangeLog:

        * config/riscv/riscv.cc
        (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
        riscv_avoid_shrink_wrapping_separate.
        (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
          is active.
        (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
        * shrink-wrap.cc (try_shrink_wrapping_separate): call
          use_shrink_wrapping_separate.
        (use_shrink_wrapping_separate):wrap the condition
          check in use_shrink_wrapping_separate 
        * shrink-wrap.h (use_shrink_wrapping_separate): add to extern

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
        * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.

Signed-off-by: Fei Gao <gaofei@eswincomputing.com>
Co-Authored-By: Zhangjin Liao <liaozhangjin@eswincomputing.com>
---
 gcc/config/riscv/riscv.cc                     | 19 +++-
 gcc/shrink-wrap.cc                            | 25 +++--
 gcc/shrink-wrap.h                             |  1 +
 .../riscv/zcmp_shrink_wrap_separate.c         | 97 +++++++++++++++++++
 .../riscv/zcmp_shrink_wrap_separate2.c        | 97 +++++++++++++++++++
 5 files changed, 228 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
  

Comments

Kito Cheng June 12, 2023, 3:17 p.m. UTC | #1
I would suggest breaking this patch into two parts: RISC-V part and
the rest part (shrink-wrap.h / shrink-wrap.cc).


On Wed, Jun 7, 2023 at 1:55 PM Fei Gao <gaofei@eswincomputing.com> wrote:
>
> Disable zcmp multi push/pop if shrink-wrap-separate is active.
>
> So in -Os that prefers smaller code size, by default shrink-wrap-separate
> is disabled while zcmp multi push/pop is enabled.
>
> And in -O2 and others that prefers speed, by default shrink-wrap-separate
> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
>
> The following TC shows the issues in -O2 before this patch with both
> shrink-wrap-separate and zcmp multi push/pop active.
> 1. duplicated store of s regs.
> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
>    prologue does, causing stack corruption and failure to resotre s regs.
>
> TC: zcmp_shrink_wrap_separate.c included in this patch.
>
> output asm before this patch:
> calc_func:
>         cm.push {ra, s0-s3}, -32
>         ...
>         beq     a5,zero,.L2
>         ...
> .L2:
>         ...
>         sw      s1,20(sp) //issue here
>         sw      s3,12(sp) //issue here
>         ...
>         sw      s2,16(sp) //issue here
>
> output asm after this patch:
> calc_func:
>         addi    sp,sp,-32
>         sw      s0,24(sp)
>         ...
>         beq     a5,zero,.L2
>         ...
> .L2:
>         ...
>         sw      s1,20(sp)
>         sw      s3,12(sp)
>         ...
>         sw      s2,16(sp)
> gcc/ChangeLog:
>
>         * config/riscv/riscv.cc
>         (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
>         riscv_avoid_shrink_wrapping_separate.
>         (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
>           is active.
>         (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
>         * shrink-wrap.cc (try_shrink_wrapping_separate): call
>           use_shrink_wrapping_separate.
>         (use_shrink_wrapping_separate):wrap the condition
>           check in use_shrink_wrapping_separate
>         * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
>         * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
>
> Signed-off-by: Fei Gao <gaofei@eswincomputing.com>
> Co-Authored-By: Zhangjin Liao <liaozhangjin@eswincomputing.com>
> ---
>  gcc/config/riscv/riscv.cc                     | 19 +++-
>  gcc/shrink-wrap.cc                            | 25 +++--
>  gcc/shrink-wrap.h                             |  1 +
>  .../riscv/zcmp_shrink_wrap_separate.c         | 97 +++++++++++++++++++
>  .../riscv/zcmp_shrink_wrap_separate2.c        | 97 +++++++++++++++++++
>  5 files changed, 228 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index f60c241a526..b505cdeca34 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfghooks.h"
>  #include "cfgloop.h"
>  #include "cfgrtl.h"
> +#include "shrink-wrap.h"
>  #include "sel-sched.h"
>  #include "fold-const.h"
>  #include "gimple-iterator.h"
> @@ -389,6 +390,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>    false,                                       /* use_divmod_expansion */
>  };
>
> +static bool riscv_avoid_shrink_wrapping_separate ();
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
>  static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
>
> @@ -4910,6 +4912,8 @@ riscv_avoid_multi_push(const struct riscv_frame_info *frame)
>        || cfun->machine->interrupt_handler_p
>        || cfun->machine->varargs_size != 0
>        || crtl->args.pretend_args_size != 0
> +      || (use_shrink_wrapping_separate ()
> +          && !riscv_avoid_shrink_wrapping_separate ())
>        || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
>      return true;
>
> @@ -6077,6 +6081,17 @@ riscv_epilogue_uses (unsigned int regno)
>    return false;
>  }
>
> +static bool
> +riscv_avoid_shrink_wrapping_separate ()
> +{
> +  if (riscv_use_save_libcall (&cfun->machine->frame)
> +      || cfun->machine->interrupt_handler_p
> +      || !cfun->machine->frame.gp_sp_offset.is_constant ())
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
>
>  static sbitmap
> @@ -6086,9 +6101,7 @@ riscv_get_separate_components (void)
>    sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
>    bitmap_clear (components);
>
> -  if (riscv_use_save_libcall (&cfun->machine->frame)
> -      || cfun->machine->interrupt_handler_p
> -      || !cfun->machine->frame.gp_sp_offset.is_constant ())
> +  if (riscv_avoid_shrink_wrapping_separate ())
>      return components;
>
>    offset = cfun->machine->frame.gp_sp_offset.to_constant ();
> diff --git a/gcc/shrink-wrap.cc b/gcc/shrink-wrap.cc
> index b8d7b557130..d534964321a 100644
> --- a/gcc/shrink-wrap.cc
> +++ b/gcc/shrink-wrap.cc
> @@ -1776,16 +1776,14 @@ insert_prologue_epilogue_for_components (sbitmap components)
>    commit_edge_insertions ();
>  }
>
> -/* The main entry point to this subpass.  FIRST_BB is where the prologue
> -   would be normally put.  */
> -void
> -try_shrink_wrapping_separate (basic_block first_bb)
> +bool
> +use_shrink_wrapping_separate (void)
>  {
>    if (!(SHRINK_WRAPPING_ENABLED
> -       && flag_shrink_wrap_separate
> -       && optimize_function_for_speed_p (cfun)
> -       && targetm.shrink_wrap.get_separate_components))
> -    return;
> +        && flag_shrink_wrap_separate
> +        && optimize_function_for_speed_p (cfun)
> +        && targetm.shrink_wrap.get_separate_components))
> +    return false;
>
>    /* We don't handle "strange" functions.  */
>    if (cfun->calls_alloca
> @@ -1794,6 +1792,17 @@ try_shrink_wrapping_separate (basic_block first_bb)
>        || crtl->calls_eh_return
>        || crtl->has_nonlocal_goto
>        || crtl->saves_all_registers)
> +    return false;
> +
> +  return true;
> +}
> +
> +/* The main entry point to this subpass.  FIRST_BB is where the prologue
> +   would be normally put.  */
> +void
> +try_shrink_wrapping_separate (basic_block first_bb)
> +{
> +  if (!use_shrink_wrapping_separate ())
>      return;
>
>    /* Ask the target what components there are.  If it returns NULL, don't
> diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
> index 161647711a3..82386c2b712 100644
> --- a/gcc/shrink-wrap.h
> +++ b/gcc/shrink-wrap.h
> @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3.  If not see
>  extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
>  extern void try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq);
>  extern void try_shrink_wrapping_separate (basic_block first_bb);
> +extern bool use_shrink_wrapping_separate (void);
>  #define SHRINK_WRAPPING_ENABLED \
>    (flag_shrink_wrap && targetm.have_simple_return ())
>
> diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
> new file mode 100644
> index 00000000000..11f87aee607
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
> @@ -0,0 +1,97 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
> +
> +typedef struct MAT_PARAMS_S
> +{
> +    int     N;
> +    signed short *A;
> +    signed short *B;
> +    signed int *C;
> +} mat_params;
> +
> +typedef struct CORE_PORTABLE_S
> +{
> +    unsigned char portable_id;
> +} core_portable;
> +
> +typedef struct RESULTS_S
> +{
> +    /* inputs */
> +    signed short              seed1;       /* Initializing seed */
> +    signed short              seed2;       /* Initializing seed */
> +    signed short              seed3;       /* Initializing seed */
> +    void *              memblock[4]; /* Pointer to safe memory location */
> +    unsigned int              size;        /* Size of the data */
> +    unsigned int              iterations;  /* Number of iterations to execute */
> +    unsigned int              execs;       /* Bitmask of operations to execute */
> +    struct list_head_s *list;
> +    mat_params          mat;
> +    /* outputs */
> +    unsigned short crc;
> +    unsigned short crclist;
> +    unsigned short crcmatrix;
> +    unsigned short crcstate;
> +    signed short err;
> +    /* ultithread specific */
> +    core_portable port;
> +} core_results;
> +
> +extern signed short
> +core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
> +
> +extern signed short
> +core_bench_matrix(mat_params *, signed short, unsigned short);
> +
> +extern unsigned short
> +crcu16(signed short, unsigned short);
> +
> +signed short
> +calc_func(signed short *pdata, core_results *res)
> +{
> +    signed short data = *pdata;
> +    signed short retval;
> +    unsigned char  optype
> +        = (data >> 7)
> +          & 1;  /* bit 7 indicates if the function result has been cached */
> +    if (optype) /* if cached, use cache */
> +        return (data & 0x007f);
> +    else
> +    {                             /* otherwise calculate and cache the result */
> +        signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
> +        signed short dtype
> +            = ((data >> 3)
> +               & 0xf);       /* bits 3-6 is specific data for the operation */
> +        dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
> +        switch (flag)
> +        {
> +            case 0:
> +                if (dtype < 0x22) /* set min period for bit corruption */
> +                    dtype = 0x22;
> +                retval = core_bench_state(res->size,
> +                                          res->memblock[3],
> +                                          res->seed1,
> +                                          res->seed2,
> +                                          dtype,
> +                                          res->crc);
> +                if (res->crcstate == 0)
> +                    res->crcstate = retval;
> +                break;
> +            case 1:
> +                retval = core_bench_matrix(&(res->mat), dtype, res->crc);
> +                if (res->crcmatrix == 0)
> +                    res->crcmatrix = retval;
> +                break;
> +            default:
> +                retval = data;
> +                break;
> +        }
> +        res->crc = crcu16(retval, res->crc);
> +        retval &= 0x007f;
> +        *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
> +        return retval;
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not "cm\.push" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
> new file mode 100644
> index 00000000000..ec7e9c39b5d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
> @@ -0,0 +1,97 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -fno-shrink-wrap-separate -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
> +
> +typedef struct MAT_PARAMS_S
> +{
> +    int     N;
> +    signed short *A;
> +    signed short *B;
> +    signed int *C;
> +} mat_params;
> +
> +typedef struct CORE_PORTABLE_S
> +{
> +    unsigned char portable_id;
> +} core_portable;
> +
> +typedef struct RESULTS_S
> +{
> +    /* inputs */
> +    signed short              seed1;       /* Initializing seed */
> +    signed short              seed2;       /* Initializing seed */
> +    signed short              seed3;       /* Initializing seed */
> +    void *              memblock[4]; /* Pointer to safe memory location */
> +    unsigned int              size;        /* Size of the data */
> +    unsigned int              iterations;  /* Number of iterations to execute */
> +    unsigned int              execs;       /* Bitmask of operations to execute */
> +    struct list_head_s *list;
> +    mat_params          mat;
> +    /* outputs */
> +    unsigned short crc;
> +    unsigned short crclist;
> +    unsigned short crcmatrix;
> +    unsigned short crcstate;
> +    signed short err;
> +    /* ultithread specific */
> +    core_portable port;
> +} core_results;
> +
> +extern signed short
> +core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
> +
> +extern signed short
> +core_bench_matrix(mat_params *, signed short, unsigned short);
> +
> +extern unsigned short
> +crcu16(signed short, unsigned short);
> +
> +signed short
> +calc_func(signed short *pdata, core_results *res)
> +{
> +    signed short data = *pdata;
> +    signed short retval;
> +    unsigned char  optype
> +        = (data >> 7)
> +          & 1;  /* bit 7 indicates if the function result has been cached */
> +    if (optype) /* if cached, use cache */
> +        return (data & 0x007f);
> +    else
> +    {                             /* otherwise calculate and cache the result */
> +        signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
> +        signed short dtype
> +            = ((data >> 3)
> +               & 0xf);       /* bits 3-6 is specific data for the operation */
> +        dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
> +        switch (flag)
> +        {
> +            case 0:
> +                if (dtype < 0x22) /* set min period for bit corruption */
> +                    dtype = 0x22;
> +                retval = core_bench_state(res->size,
> +                                          res->memblock[3],
> +                                          res->seed1,
> +                                          res->seed2,
> +                                          dtype,
> +                                          res->crc);
> +                if (res->crcstate == 0)
> +                    res->crcstate = retval;
> +                break;
> +            case 1:
> +                retval = core_bench_matrix(&(res->mat), dtype, res->crc);
> +                if (res->crcmatrix == 0)
> +                    res->crcmatrix = retval;
> +                break;
> +            default:
> +                retval = data;
> +                break;
> +        }
> +        res->crc = crcu16(retval, res->crc);
> +        retval &= 0x007f;
> +        *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
> +        return retval;
> +    }
> +}
> +
> +/* { dg-final { scan-assembler "cm\.push" } } */
> +
> --
> 2.17.1
>
  
Jeff Law June 12, 2023, 7:26 p.m. UTC | #2
On 6/6/23 23:52, Fei Gao wrote:
> Disable zcmp multi push/pop if shrink-wrap-separate is active.
> 
> So in -Os that prefers smaller code size, by default shrink-wrap-separate
> is disabled while zcmp multi push/pop is enabled.
> 
> And in -O2 and others that prefers speed, by default shrink-wrap-separate
> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
> 
> The following TC shows the issues in -O2 before this patch with both
> shrink-wrap-separate and zcmp multi push/pop active.
> 1. duplicated store of s regs.
> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
>     prologue does, causing stack corruption and failure to resotre s regs.
> 
> TC: zcmp_shrink_wrap_separate.c included in this patch.
> 
> output asm before this patch:
> calc_func:
> 	cm.push	{ra, s0-s3}, -32
> 	...
> 	beq	a5,zero,.L2
> 	...
> .L2:
> 	...
> 	sw	s1,20(sp) //issue here
> 	sw	s3,12(sp) //issue here
> 	...
> 	sw	s2,16(sp) //issue here
> 
> output asm after this patch:
> calc_func:
> 	addi	sp,sp,-32
> 	sw	s0,24(sp)
> 	...
> 	beq	a5,zero,.L2
> 	...
> .L2:
> 	...
> 	sw	s1,20(sp)
> 	sw	s3,12(sp)
> 	...
> 	sw	s2,16(sp)
> gcc/ChangeLog:
> 
>          * config/riscv/riscv.cc
>          (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
>          riscv_avoid_shrink_wrapping_separate.
>          (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
>            is active.
>          (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
>          * shrink-wrap.cc (try_shrink_wrapping_separate): call
>            use_shrink_wrapping_separate.
>          (use_shrink_wrapping_separate):wrap the condition
>            check in use_shrink_wrapping_separate
>          * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
>          * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
I know Kito asked for this to be broken up into target dependent vs 
target independent changes, that's a good ask.

Can't we utilize the get_separate_components hook to accomplish what 
you're trying to do?  ie, put the logic to avoid shrink wrapping for 
this case within the existing risc-v hook?

jeff
  
Fei Gao June 13, 2023, 2:35 a.m. UTC | #3
On 2023-06-13 03:26  Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
>On 6/6/23 23:52, Fei Gao wrote:
>> Disable zcmp multi push/pop if shrink-wrap-separate is active.
>>
>> So in -Os that prefers smaller code size, by default shrink-wrap-separate
>> is disabled while zcmp multi push/pop is enabled.
>>
>> And in -O2 and others that prefers speed, by default shrink-wrap-separate
>> is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
>> push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.
>>
>> The following TC shows the issues in -O2 before this patch with both
>> shrink-wrap-separate and zcmp multi push/pop active.
>> 1. duplicated store of s regs.
>> 2. cm.push pushes ra, s0-s11 in reverse order than what normal
>>     prologue does, causing stack corruption and failure to resotre s regs.
>>
>> TC: zcmp_shrink_wrap_separate.c included in this patch.
>>
>> output asm before this patch:
>> calc_func:
>> cm.push	{ra, s0-s3}, -32
>> ...
>> beq	a5,zero,.L2
>> ...
>> .L2:
>> ...
>> sw	s1,20(sp) //issue here
>> sw	s3,12(sp) //issue here
>> ...
>> sw	s2,16(sp) //issue here
>>
>> output asm after this patch:
>> calc_func:
>> addi	sp,sp,-32
>> sw	s0,24(sp)
>> ...
>> beq	a5,zero,.L2
>> ...
>> .L2:
>> ...
>> sw	s1,20(sp)
>> sw	s3,12(sp)
>> ...
>> sw	s2,16(sp)
>> gcc/ChangeLog:
>>
>>          * config/riscv/riscv.cc
>>          (riscv_avoid_shrink_wrapping_separate): wrap the condition check in
>>          riscv_avoid_shrink_wrapping_separate.
>>          (riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
>>            is active.
>>          (riscv_get_separate_components): call riscv_avoid_shrink_wrapping_separate
>>          * shrink-wrap.cc (try_shrink_wrapping_separate): call
>>            use_shrink_wrapping_separate.
>>          (use_shrink_wrapping_separate):wrap the condition
>>            check in use_shrink_wrapping_separate
>>          * shrink-wrap.h (use_shrink_wrapping_separate): add to extern
>>
>> gcc/testsuite/ChangeLog:
>>
>>          * gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
>>          * gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.
>I know Kito asked for this to be broken up into target dependent vs
>target independent changes, that's a good ask.
>
>Can't we utilize the get_separate_components hook to accomplish what
>you're trying to do?  ie, put the logic to avoid shrink wrapping for
>this case within the existing risc-v hook? 

Thank Jeff and Kito for your comments. 

My first try was to avoid shrink wrapping if zcmp is enabled.
But after discussion with Kito and Andrew Pinski, I realized it's better to disable
zcmp push and pops if shrink wrapping is active.
For detailed discussion, please check link below.
thread: [PATCH 1/2] [RISC-V] disable shrink-wrap-separate if zcmp enabled.
link: https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg307203.html

I will go ahead with Kito's advice if you're fine with the current solution.
Thanks.

BR, 
Fei


>
>jeff
  

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f60c241a526..b505cdeca34 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -64,6 +64,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "cfgloop.h"
 #include "cfgrtl.h"
+#include "shrink-wrap.h"
 #include "sel-sched.h"
 #include "fold-const.h"
 #include "gimple-iterator.h"
@@ -389,6 +390,7 @@  static const struct riscv_tune_param optimize_size_tune_info = {
   false,					/* use_divmod_expansion */
 };
 
+static bool riscv_avoid_shrink_wrapping_separate ();
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
 
@@ -4910,6 +4912,8 @@  riscv_avoid_multi_push(const struct riscv_frame_info *frame)
       || cfun->machine->interrupt_handler_p
       || cfun->machine->varargs_size != 0
       || crtl->args.pretend_args_size != 0
+      || (use_shrink_wrapping_separate ()
+          && !riscv_avoid_shrink_wrapping_separate ())
       || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
     return true;
 
@@ -6077,6 +6081,17 @@  riscv_epilogue_uses (unsigned int regno)
   return false;
 }
 
+static bool
+riscv_avoid_shrink_wrapping_separate ()
+{
+  if (riscv_use_save_libcall (&cfun->machine->frame)
+      || cfun->machine->interrupt_handler_p
+      || !cfun->machine->frame.gp_sp_offset.is_constant ())
+    return true;
+
+  return false;
+}
+
 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
 
 static sbitmap
@@ -6086,9 +6101,7 @@  riscv_get_separate_components (void)
   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   bitmap_clear (components);
 
-  if (riscv_use_save_libcall (&cfun->machine->frame)
-      || cfun->machine->interrupt_handler_p
-      || !cfun->machine->frame.gp_sp_offset.is_constant ())
+  if (riscv_avoid_shrink_wrapping_separate ())
     return components;
 
   offset = cfun->machine->frame.gp_sp_offset.to_constant ();
diff --git a/gcc/shrink-wrap.cc b/gcc/shrink-wrap.cc
index b8d7b557130..d534964321a 100644
--- a/gcc/shrink-wrap.cc
+++ b/gcc/shrink-wrap.cc
@@ -1776,16 +1776,14 @@  insert_prologue_epilogue_for_components (sbitmap components)
   commit_edge_insertions ();
 }
 
-/* The main entry point to this subpass.  FIRST_BB is where the prologue
-   would be normally put.  */
-void
-try_shrink_wrapping_separate (basic_block first_bb)
+bool
+use_shrink_wrapping_separate (void)
 {
   if (!(SHRINK_WRAPPING_ENABLED
-	&& flag_shrink_wrap_separate
-	&& optimize_function_for_speed_p (cfun)
-	&& targetm.shrink_wrap.get_separate_components))
-    return;
+        && flag_shrink_wrap_separate
+        && optimize_function_for_speed_p (cfun)
+        && targetm.shrink_wrap.get_separate_components))
+    return false;
 
   /* We don't handle "strange" functions.  */
   if (cfun->calls_alloca
@@ -1794,6 +1792,17 @@  try_shrink_wrapping_separate (basic_block first_bb)
       || crtl->calls_eh_return
       || crtl->has_nonlocal_goto
       || crtl->saves_all_registers)
+    return false;
+
+  return true;
+}
+
+/* The main entry point to this subpass.  FIRST_BB is where the prologue
+   would be normally put.  */
+void
+try_shrink_wrapping_separate (basic_block first_bb)
+{
+  if (!use_shrink_wrapping_separate ())
     return;
 
   /* Ask the target what components there are.  If it returns NULL, don't
diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
index 161647711a3..82386c2b712 100644
--- a/gcc/shrink-wrap.h
+++ b/gcc/shrink-wrap.h
@@ -26,6 +26,7 @@  along with GCC; see the file COPYING3.  If not see
 extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
 extern void try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq);
 extern void try_shrink_wrapping_separate (basic_block first_bb);
+extern bool use_shrink_wrapping_separate (void);
 #define SHRINK_WRAPPING_ENABLED \
   (flag_shrink_wrap && targetm.have_simple_return ())
 
diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
new file mode 100644
index 00000000000..11f87aee607
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
@@ -0,0 +1,97 @@ 
+/* { dg-do compile } */
+/* { dg-options " -O2 -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+typedef struct MAT_PARAMS_S
+{
+    int     N;
+    signed short *A;
+    signed short *B;
+    signed int *C;
+} mat_params;
+
+typedef struct CORE_PORTABLE_S
+{
+    unsigned char portable_id;
+} core_portable;
+
+typedef struct RESULTS_S
+{
+    /* inputs */
+    signed short              seed1;       /* Initializing seed */
+    signed short              seed2;       /* Initializing seed */
+    signed short              seed3;       /* Initializing seed */
+    void *              memblock[4]; /* Pointer to safe memory location */
+    unsigned int              size;        /* Size of the data */
+    unsigned int              iterations;  /* Number of iterations to execute */
+    unsigned int              execs;       /* Bitmask of operations to execute */
+    struct list_head_s *list;
+    mat_params          mat;
+    /* outputs */
+    unsigned short crc;
+    unsigned short crclist;
+    unsigned short crcmatrix;
+    unsigned short crcstate;
+    signed short err;
+    /* ultithread specific */
+    core_portable port;
+} core_results;
+
+extern signed short
+core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
+
+extern signed short
+core_bench_matrix(mat_params *, signed short, unsigned short);
+
+extern unsigned short
+crcu16(signed short, unsigned short);
+
+signed short
+calc_func(signed short *pdata, core_results *res)
+{
+    signed short data = *pdata;
+    signed short retval;
+    unsigned char  optype
+        = (data >> 7)
+          & 1;  /* bit 7 indicates if the function result has been cached */
+    if (optype) /* if cached, use cache */
+        return (data & 0x007f);
+    else
+    {                             /* otherwise calculate and cache the result */
+        signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
+        signed short dtype
+            = ((data >> 3)
+               & 0xf);       /* bits 3-6 is specific data for the operation */
+        dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
+        switch (flag)
+        {
+            case 0:
+                if (dtype < 0x22) /* set min period for bit corruption */
+                    dtype = 0x22;
+                retval = core_bench_state(res->size,
+                                          res->memblock[3],
+                                          res->seed1,
+                                          res->seed2,
+                                          dtype,
+                                          res->crc);
+                if (res->crcstate == 0)
+                    res->crcstate = retval;
+                break;
+            case 1:
+                retval = core_bench_matrix(&(res->mat), dtype, res->crc);
+                if (res->crcmatrix == 0)
+                    res->crcmatrix = retval;
+                break;
+            default:
+                retval = data;
+                break;
+        }
+        res->crc = crcu16(retval, res->crc);
+        retval &= 0x007f;
+        *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
+        return retval;
+    }
+}
+
+/* { dg-final { scan-assembler-not "cm\.push" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
new file mode 100644
index 00000000000..ec7e9c39b5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
@@ -0,0 +1,97 @@ 
+/* { dg-do compile } */
+/* { dg-options " -O2 -fno-shrink-wrap-separate -march=rv32imaf_zca_zcmp -mabi=ilp32f" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+typedef struct MAT_PARAMS_S
+{
+    int     N;
+    signed short *A;
+    signed short *B;
+    signed int *C;
+} mat_params;
+
+typedef struct CORE_PORTABLE_S
+{
+    unsigned char portable_id;
+} core_portable;
+
+typedef struct RESULTS_S
+{
+    /* inputs */
+    signed short              seed1;       /* Initializing seed */
+    signed short              seed2;       /* Initializing seed */
+    signed short              seed3;       /* Initializing seed */
+    void *              memblock[4]; /* Pointer to safe memory location */
+    unsigned int              size;        /* Size of the data */
+    unsigned int              iterations;  /* Number of iterations to execute */
+    unsigned int              execs;       /* Bitmask of operations to execute */
+    struct list_head_s *list;
+    mat_params          mat;
+    /* outputs */
+    unsigned short crc;
+    unsigned short crclist;
+    unsigned short crcmatrix;
+    unsigned short crcstate;
+    signed short err;
+    /* ultithread specific */
+    core_portable port;
+} core_results;
+
+extern signed short
+core_bench_state(unsigned int, void *, signed short, signed short, signed short, unsigned short);
+
+extern signed short
+core_bench_matrix(mat_params *, signed short, unsigned short);
+
+extern unsigned short
+crcu16(signed short, unsigned short);
+
+signed short
+calc_func(signed short *pdata, core_results *res)
+{
+    signed short data = *pdata;
+    signed short retval;
+    unsigned char  optype
+        = (data >> 7)
+          & 1;  /* bit 7 indicates if the function result has been cached */
+    if (optype) /* if cached, use cache */
+        return (data & 0x007f);
+    else
+    {                             /* otherwise calculate and cache the result */
+        signed short flag = data & 0x7; /* bits 0-2 is type of function to perform */
+        signed short dtype
+            = ((data >> 3)
+               & 0xf);       /* bits 3-6 is specific data for the operation */
+        dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
+        switch (flag)
+        {
+            case 0:
+                if (dtype < 0x22) /* set min period for bit corruption */
+                    dtype = 0x22;
+                retval = core_bench_state(res->size,
+                                          res->memblock[3],
+                                          res->seed1,
+                                          res->seed2,
+                                          dtype,
+                                          res->crc);
+                if (res->crcstate == 0)
+                    res->crcstate = retval;
+                break;
+            case 1:
+                retval = core_bench_matrix(&(res->mat), dtype, res->crc);
+                if (res->crcmatrix == 0)
+                    res->crcmatrix = retval;
+                break;
+            default:
+                retval = data;
+                break;
+        }
+        res->crc = crcu16(retval, res->crc);
+        retval &= 0x007f;
+        *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
+        return retval;
+    }
+}
+
+/* { dg-final { scan-assembler "cm\.push" } } */
+