[v2,6/6] gdb/aarch64: Detect vector length changes when debugging remotely

Message ID 20221126020452.1686509-7-thiago.bauermann@linaro.org
State New
Headers
Series gdbserver improvements for AArch64 SVE support |

Commit Message

Thiago Jung Bauermann Nov. 26, 2022, 2:04 a.m. UTC
  If the remote target provides an expedited VG register, use it to update
the thread's gdbarch. This allows debugging programs that change their SVE
vector length during runtime.

This is accomplished by implementing the thread_architecture method in
remote_target, which returns the gdbarch corresponding to the expedited
registers provided by the last stop reply.

To allow changing the architecture based on the expedited registers, add a
new gdbarch method to allow arch-specific code to make the adjustment.
---
 gdb/aarch64-tdep.c        | 26 +++++++++++++++++++++++-
 gdb/arch-utils.c          |  9 +++++++++
 gdb/arch-utils.h          |  5 +++++
 gdb/gdbarch-components.py | 16 +++++++++++++++
 gdb/gdbarch-gen.h         | 11 ++++++++++
 gdb/gdbarch.c             | 22 ++++++++++++++++++++
 gdb/remote.c              | 42 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 130 insertions(+), 1 deletion(-)
  

Comments

Luis Machado Nov. 28, 2022, 1:27 p.m. UTC | #1
On 11/26/22 02:04, Thiago Jung Bauermann wrote:
> If the remote target provides an expedited VG register, use it to update
> the thread's gdbarch. This allows debugging programs that change their SVE
> vector length during runtime.

debugging programs -> debugging programs remotely
> 
> This is accomplished by implementing the thread_architecture method in
> remote_target, which returns the gdbarch corresponding to the expedited
> registers provided by the last stop reply.
> 
> To allow changing the architecture based on the expedited registers, add a
> new gdbarch method to allow arch-specific code to make the adjustment.
> ---
>   gdb/aarch64-tdep.c        | 26 +++++++++++++++++++++++-
>   gdb/arch-utils.c          |  9 +++++++++
>   gdb/arch-utils.h          |  5 +++++
>   gdb/gdbarch-components.py | 16 +++++++++++++++
>   gdb/gdbarch-gen.h         | 11 ++++++++++
>   gdb/gdbarch.c             | 22 ++++++++++++++++++++
>   gdb/remote.c              | 42 +++++++++++++++++++++++++++++++++++++++
>   7 files changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
> index ffc128d91f60..764f693aee8d 100644
> --- a/gdb/aarch64-tdep.c
> +++ b/gdb/aarch64-tdep.c
> @@ -3486,7 +3486,8 @@ aarch64_cannot_store_register (struct gdbarch *gdbarch, int regnum)
>   	  || regnum == AARCH64_PAUTH_CMASK_REGNUM (tdep->pauth_reg_base));
>   }
>   
> -/* Helper function for the "thread_architecture" target_ops method.
> +/* Helper function for both the "update_architecture" gdbarch method and the
> +   "thread_architecture" target_ops method.
>   
>      Returns a new gdbarch that is equivalent to the given gdbarch, but with SVE
>      registers reflecting the given vq value.  */
> @@ -3521,6 +3522,28 @@ aarch64_update_gdbarch (struct gdbarch *gdbarch, uint64_t vq)
>     return gdbarch_find_by_info (info);
>   }
>   
> +/* Implement the "update_architecture" gdbarch method.  */
> +
> +static struct gdbarch *
> +aarch64_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache)
> +{
> +  /* Look for the VG register.  */
> +  auto it = find_if (regcache.cbegin (), regcache.cend (),
> +		     [] (const cached_reg_t &reg) {
> +		       return reg.num == AARCH64_SVE_VG_REGNUM;
> +		     });
> +
> +  /* No VG register was provided.  Don't change gdbarch.  */
> +  if (it == regcache.cend ())
> +    return gdbarch;
> +
> +  ULONGEST vg = extract_unsigned_integer (it->data, 8,
> +					  gdbarch_byte_order (gdbarch));
> +
> +  return aarch64_update_gdbarch (gdbarch, sve_vq_from_vg (vg));
> +}
> +
>   /* Implement the stack_frame_destroyed_p gdbarch method.  */
>   
>   static int
> @@ -3742,6 +3765,7 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>     set_tdesc_pseudo_register_reggroup_p (gdbarch,
>   					aarch64_pseudo_register_reggroup_p);
>     set_gdbarch_cannot_store_register (gdbarch, aarch64_cannot_store_register);
> +  set_gdbarch_update_architecture (gdbarch, aarch64_update_architecture);
>   
>     /* ABI */
>     set_gdbarch_short_bit (gdbarch, 16);
> diff --git a/gdb/arch-utils.c b/gdb/arch-utils.c
> index 7b84daf046e2..2315aacb4bbe 100644
> --- a/gdb/arch-utils.c
> +++ b/gdb/arch-utils.c
> @@ -1090,6 +1090,15 @@ default_get_return_buf_addr (struct type *val_type, frame_info_ptr cur_frame)
>     return 0;
>   }
>   
> +/* See arch-utils.h.  */
> +
> +struct gdbarch *
> +default_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache)
> +{
> +  return gdbarch;
> +}
> +
>   /* Non-zero if we want to trace architecture code.  */
>   
>   #ifndef GDBARCH_DEBUG
> diff --git a/gdb/arch-utils.h b/gdb/arch-utils.h
> index 46018a6fbbb6..e68a91f6753d 100644
> --- a/gdb/arch-utils.h
> +++ b/gdb/arch-utils.h
> @@ -304,4 +304,9 @@ extern void default_read_core_file_mappings
>   /* Default implementation of gdbarch default_get_return_buf_addr method.  */
>   extern CORE_ADDR default_get_return_buf_addr (struct type *val_typegdbarch,
>   					      frame_info_ptr cur_frame);
> +
> +/* Default implementation of gdbarch update_architecture method.  */
> +extern struct gdbarch *
> +default_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache);
>   #endif /* ARCH_UTILS_H */
> diff --git a/gdb/gdbarch-components.py b/gdb/gdbarch-components.py
> index 9b688998a7bd..fbb32c5e5853 100644
> --- a/gdb/gdbarch-components.py
> +++ b/gdb/gdbarch-components.py
> @@ -2698,3 +2698,19 @@ Read core file mappings
>       predefault="default_read_core_file_mappings",
>       invalid=False,
>   )
> +
> +Method(
> +    comment="""
> +An architecture may change based on the current contents of its registers.
> +For instance, the length of the vector registers in AArch64's Scalable Vector
> +Extension is given by the contents of the VL pseudo-register.

VL -> VG

> +
> +This method allows an architecture to provide a new gdbarch corresponding to
> +the registers in the given regcache.
> +""",
> +    type="struct gdbarch *",
> +    name="update_architecture",
> +    params=[("const std::vector<cached_reg_t> &", "regcache")],
> +    predefault="default_update_architecture",
> +    invalid=False,
> +)
> diff --git a/gdb/gdbarch-gen.h b/gdb/gdbarch-gen.h
> index a663316df166..a65699e7241f 100644
> --- a/gdb/gdbarch-gen.h
> +++ b/gdb/gdbarch-gen.h
> @@ -1649,3 +1649,14 @@ extern void set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, gdbarch_g
>   typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
>   extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
>   extern void set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, gdbarch_read_core_file_mappings_ftype *read_core_file_mappings);
> +
> +/* An architecture may change based on the current contents of its registers.
> +   For instance, the length of the vector registers in AArch64's Scalable Vector
> +   Extension is given by the contents of the VL pseudo-register.
> +
> +   This method allows an architecture to provide a new gdbarch corresponding to
> +   the registers in the given regcache. */
> +
> +typedef struct gdbarch * (gdbarch_update_architecture_ftype) (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
> +extern struct gdbarch * gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
> +extern void set_gdbarch_update_architecture (struct gdbarch *gdbarch, gdbarch_update_architecture_ftype *update_architecture);
> diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c
> index 3227e9458801..1b0a2c70db4c 100644
> --- a/gdb/gdbarch.c
> +++ b/gdb/gdbarch.c
> @@ -255,6 +255,7 @@ struct gdbarch
>     gdbarch_type_align_ftype *type_align = default_type_align;
>     gdbarch_get_pc_address_flags_ftype *get_pc_address_flags = default_get_pc_address_flags;
>     gdbarch_read_core_file_mappings_ftype *read_core_file_mappings = default_read_core_file_mappings;
> +  gdbarch_update_architecture_ftype *update_architecture = default_update_architecture;
>   };
>   
>   /* Create a new ``struct gdbarch'' based on information provided by
> @@ -514,6 +515,7 @@ verify_gdbarch (struct gdbarch *gdbarch)
>     /* Skip verify of type_align, invalid_p == 0 */
>     /* Skip verify of get_pc_address_flags, invalid_p == 0 */
>     /* Skip verify of read_core_file_mappings, invalid_p == 0 */
> +  /* Skip verify of update_architecture, invalid_p == 0 */
>     if (!log.empty ())
>       internal_error (_("verify_gdbarch: the following are invalid ...%s"),
>   		    log.c_str ());
> @@ -1352,6 +1354,9 @@ gdbarch_dump (struct gdbarch *gdbarch, struct ui_file *file)
>     gdb_printf (file,
>                         "gdbarch_dump: read_core_file_mappings = <%s>\n",
>                         host_address_to_string (gdbarch->read_core_file_mappings));
> +  gdb_printf (file,
> +                      "gdbarch_dump: update_architecture = <%s>\n",
> +                      host_address_to_string (gdbarch->update_architecture));
>     if (gdbarch->dump_tdep != NULL)
>       gdbarch->dump_tdep (gdbarch, file);
>   }
> @@ -5314,3 +5319,20 @@ set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch,
>   {
>     gdbarch->read_core_file_mappings = read_core_file_mappings;
>   }
> +
> +struct gdbarch *
> +gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache)
> +{
> +  gdb_assert (gdbarch != NULL);
> +  gdb_assert (gdbarch->update_architecture != NULL);
> +  if (gdbarch_debug >= 2)
> +    gdb_printf (gdb_stdlog, "gdbarch_update_architecture called\n");
> +  return gdbarch->update_architecture (gdbarch, regcache);
> +}
> +
> +void
> +set_gdbarch_update_architecture (struct gdbarch *gdbarch,
> +                                 gdbarch_update_architecture_ftype update_architecture)
> +{
> +  gdbarch->update_architecture = update_architecture;
> +}
> diff --git a/gdb/remote.c b/gdb/remote.c
> index 5118ecd0a312..eb60ed51585b 100644
> --- a/gdb/remote.c
> +++ b/gdb/remote.c
> @@ -491,6 +491,8 @@ class remote_target : public process_stratum_target
>     gdb::byte_vector thread_info_to_thread_handle (struct thread_info *tp)
>   						 override;
>   
> +  struct gdbarch *thread_architecture (ptid_t ptid) override;
> +
>     void stop (ptid_t) override;
>   
>     void interrupt () override;
> @@ -1150,6 +1152,10 @@ struct remote_thread_info : public private_thread_info
>        to stop for a watchpoint.  */
>     CORE_ADDR watch_data_address = 0;
>   
> +  /* The architecture corresponding to the expedited registers returned
> +     in the last stop reply.  */
> +  struct gdbarch *expedited_arch = nullptr;
> +
>     /* Get the thread's resume state.  */
>     enum resume_state get_resume_state () const
>     {
> @@ -1168,6 +1174,8 @@ struct remote_thread_info : public private_thread_info
>       m_resume_state = resume_state::RESUMED_PENDING_VCONT;
>       m_resumed_pending_vcont_info.step = step;
>       m_resumed_pending_vcont_info.sig = sig;
> +
> +    expedited_arch = nullptr;
>     }
>   
>     /* Get the information this thread's pending vCont-resumption.
> @@ -1185,6 +1193,8 @@ struct remote_thread_info : public private_thread_info
>     void set_resumed ()
>     {
>       m_resume_state = resume_state::RESUMED;
> +
> +    expedited_arch = nullptr;
>     }
>   
>   private:
> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>         /* Expedited registers.  */
>         if (!stop_reply->regcache.empty ())
>   	{
> +	  /* If GDB already knows about this thread, we can give the
> +	     architecture-specific code a chance to update the gdbarch based on
> +	     the expedited registers.  */
> +	  if (find_thread_ptid (this, ptid) != nullptr)
> +	    {
> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
> +							      stop_reply->regcache);
> +
> +	      /* Save stop_reply->arch so that it can be returned by the
> +		 thread_architecture method.  */
> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
> +								       ptid);
> +	      remote_thr->expedited_arch = stop_reply->arch;
> +	    }
> +
>   	  struct regcache *regcache
>   	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>   
> @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct thread_info *tp)
>     return priv->thread_handle;
>   }
>   
> +struct gdbarch *
> +remote_target::thread_architecture (ptid_t ptid)
> +{
> +  thread_info *thr = find_thread_ptid (this, ptid);
> +  remote_thread_info *remote_thr = nullptr;
> +
> +  if (thr != nullptr)
> +    remote_thr = get_remote_thread_info (thr);
> +
> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
> +    /* The default thread_architecture implementation is the one from
> +       process_stratum_target.  */
> +    return process_stratum_target::thread_architecture(ptid);
> +
> +  return remote_thr->expedited_arch;
> +}
> +
>   bool
>   remote_target::can_async_p ()
>   {

Just to confirm, as I don't exactly remember how this went. Is the case where we switch threads during remote debugging covered in case we have threads with different vector lengths?

I seem to recall we'd call thread_architecture in those cases, but I don't know how the expedited register comes into play for this case. Do we get expedited registers for each thread that has stopped?

Otherwise LGTM.

Reviewed-by: Luis Machado <luis.machado@arm.com>
  
Simon Marchi Nov. 28, 2022, 4:36 p.m. UTC | #2
On 11/25/22 21:04, Thiago Jung Bauermann via Gdb-patches wrote:
> If the remote target provides an expedited VG register, use it to update
> the thread's gdbarch. This allows debugging programs that change their SVE
> vector length during runtime.
> 
> This is accomplished by implementing the thread_architecture method in
> remote_target, which returns the gdbarch corresponding to the expedited
> registers provided by the last stop reply.
> 
> To allow changing the architecture based on the expedited registers, add a
> new gdbarch method to allow arch-specific code to make the adjustment.

I get this when applying, can you address that?

Applying: gdb/aarch64: Detect vector length changes when debugging remotely
.git/rebase-apply/patch:164: indent with spaces.
                       "gdbarch_dump: update_architecture = <%s>\n",
.git/rebase-apply/patch:165: indent with spaces.
                       host_address_to_string (gdbarch->update_architecture));
.git/rebase-apply/patch:186: indent with spaces.
                                  gdbarch_update_architecture_ftype update_architecture)
warning: 3 lines add whitespace errors.

I think the idea makes sense.  Short of adding a packet for "get thread
architecture" or extending the stop reply format to add a note about a
thread having a special tdesc, I don't see another way to do this.  We
can't read the vq register using 'g', because to interpret the 'g'
result, we need to know the full architecture.  We wouldn't know for
sure the offset of vq in the reply.  By using expedited registers, we
just need to make sure vq's register number is stable.  I guess that is
the case?


> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>        /* Expedited registers.  */
>        if (!stop_reply->regcache.empty ())
>  	{
> +	  /* If GDB already knows about this thread, we can give the
> +	     architecture-specific code a chance to update the gdbarch based on
> +	     the expedited registers.  */
> +	  if (find_thread_ptid (this, ptid) != nullptr)
> +	    {
> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
> +							      stop_reply->regcache);
> +
> +	      /* Save stop_reply->arch so that it can be returned by the
> +		 thread_architecture method.  */
> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
> +								       ptid);
> +	      remote_thr->expedited_arch = stop_reply->arch;
> +	    }
> +
>  	  struct regcache *regcache
>  	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);


What happens with threads that are not yet known?  Imagine the process
starts with SVE == X, the main threads spawns a worker thread, the
worker thread updates its SVE to Y, and the worker thread hits a
breakpoint.  This is the first time we hear about that worker thread.
If we don't do a "gdbarch_update_architecture" for it and save it
somewhere, we'll never set the gdbarch with SVE == Y, will we?

Simon
  
Luis Machado Nov. 30, 2022, 8:43 a.m. UTC | #3
On 11/26/22 02:04, Thiago Jung Bauermann wrote:
> If the remote target provides an expedited VG register, use it to update
> the thread's gdbarch. This allows debugging programs that change their SVE
> vector length during runtime.
> 
> This is accomplished by implementing the thread_architecture method in
> remote_target, which returns the gdbarch corresponding to the expedited
> registers provided by the last stop reply.
> 
> To allow changing the architecture based on the expedited registers, add a
> new gdbarch method to allow arch-specific code to make the adjustment.
> ---
>   gdb/aarch64-tdep.c        | 26 +++++++++++++++++++++++-
>   gdb/arch-utils.c          |  9 +++++++++
>   gdb/arch-utils.h          |  5 +++++
>   gdb/gdbarch-components.py | 16 +++++++++++++++
>   gdb/gdbarch-gen.h         | 11 ++++++++++
>   gdb/gdbarch.c             | 22 ++++++++++++++++++++
>   gdb/remote.c              | 42 +++++++++++++++++++++++++++++++++++++++
>   7 files changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
> index ffc128d91f60..764f693aee8d 100644
> --- a/gdb/aarch64-tdep.c
> +++ b/gdb/aarch64-tdep.c
> @@ -3486,7 +3486,8 @@ aarch64_cannot_store_register (struct gdbarch *gdbarch, int regnum)
>   	  || regnum == AARCH64_PAUTH_CMASK_REGNUM (tdep->pauth_reg_base));
>   }
>   
> -/* Helper function for the "thread_architecture" target_ops method.
> +/* Helper function for both the "update_architecture" gdbarch method and the
> +   "thread_architecture" target_ops method.
>   
>      Returns a new gdbarch that is equivalent to the given gdbarch, but with SVE
>      registers reflecting the given vq value.  */
> @@ -3521,6 +3522,28 @@ aarch64_update_gdbarch (struct gdbarch *gdbarch, uint64_t vq)
>     return gdbarch_find_by_info (info);
>   }
>   
> +/* Implement the "update_architecture" gdbarch method.  */
> +
> +static struct gdbarch *
> +aarch64_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache)
> +{
> +  /* Look for the VG register.  */
> +  auto it = find_if (regcache.cbegin (), regcache.cend (),
> +		     [] (const cached_reg_t &reg) {
> +		       return reg.num == AARCH64_SVE_VG_REGNUM;
> +		     });
> +
> +  /* No VG register was provided.  Don't change gdbarch.  */
> +  if (it == regcache.cend ())
> +    return gdbarch;
> +
> +  ULONGEST vg = extract_unsigned_integer (it->data, 8,
> +					  gdbarch_byte_order (gdbarch));
> +
> +  return aarch64_update_gdbarch (gdbarch, sve_vq_from_vg (vg));
> +}
> +
>   /* Implement the stack_frame_destroyed_p gdbarch method.  */
>   
>   static int
> @@ -3742,6 +3765,7 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>     set_tdesc_pseudo_register_reggroup_p (gdbarch,
>   					aarch64_pseudo_register_reggroup_p);
>     set_gdbarch_cannot_store_register (gdbarch, aarch64_cannot_store_register);
> +  set_gdbarch_update_architecture (gdbarch, aarch64_update_architecture);
>   
>     /* ABI */
>     set_gdbarch_short_bit (gdbarch, 16);
> diff --git a/gdb/arch-utils.c b/gdb/arch-utils.c
> index 7b84daf046e2..2315aacb4bbe 100644
> --- a/gdb/arch-utils.c
> +++ b/gdb/arch-utils.c
> @@ -1090,6 +1090,15 @@ default_get_return_buf_addr (struct type *val_type, frame_info_ptr cur_frame)
>     return 0;
>   }
>   
> +/* See arch-utils.h.  */
> +
> +struct gdbarch *
> +default_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache)
> +{
> +  return gdbarch;
> +}
> +
>   /* Non-zero if we want to trace architecture code.  */
>   
>   #ifndef GDBARCH_DEBUG
> diff --git a/gdb/arch-utils.h b/gdb/arch-utils.h
> index 46018a6fbbb6..e68a91f6753d 100644
> --- a/gdb/arch-utils.h
> +++ b/gdb/arch-utils.h
> @@ -304,4 +304,9 @@ extern void default_read_core_file_mappings
>   /* Default implementation of gdbarch default_get_return_buf_addr method.  */
>   extern CORE_ADDR default_get_return_buf_addr (struct type *val_typegdbarch,
>   					      frame_info_ptr cur_frame);
> +
> +/* Default implementation of gdbarch update_architecture method.  */
> +extern struct gdbarch *
> +default_update_architecture (struct gdbarch *gdbarch,
> +			     const std::vector<cached_reg_t> &regcache);
>   #endif /* ARCH_UTILS_H */
> diff --git a/gdb/gdbarch-components.py b/gdb/gdbarch-components.py
> index 9b688998a7bd..fbb32c5e5853 100644
> --- a/gdb/gdbarch-components.py
> +++ b/gdb/gdbarch-components.py
> @@ -2698,3 +2698,19 @@ Read core file mappings
>       predefault="default_read_core_file_mappings",
>       invalid=False,
>   )
> +
> +Method(
> +    comment="""
> +An architecture may change based on the current contents of its registers.
> +For instance, the length of the vector registers in AArch64's Scalable Vector
> +Extension is given by the contents of the VL pseudo-register.
> +
> +This method allows an architecture to provide a new gdbarch corresponding to
> +the registers in the given regcache.
> +""",
> +    type="struct gdbarch *",
> +    name="update_architecture",
> +    params=[("const std::vector<cached_reg_t> &", "regcache")],
> +    predefault="default_update_architecture",
> +    invalid=False,
> +)
> diff --git a/gdb/gdbarch-gen.h b/gdb/gdbarch-gen.h
> index a663316df166..a65699e7241f 100644
> --- a/gdb/gdbarch-gen.h
> +++ b/gdb/gdbarch-gen.h
> @@ -1649,3 +1649,14 @@ extern void set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, gdbarch_g
>   typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
>   extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
>   extern void set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, gdbarch_read_core_file_mappings_ftype *read_core_file_mappings);
> +
> +/* An architecture may change based on the current contents of its registers.
> +   For instance, the length of the vector registers in AArch64's Scalable Vector
> +   Extension is given by the contents of the VL pseudo-register.
> +
> +   This method allows an architecture to provide a new gdbarch corresponding to
> +   the registers in the given regcache. */
> +
> +typedef struct gdbarch * (gdbarch_update_architecture_ftype) (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
> +extern struct gdbarch * gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
> +extern void set_gdbarch_update_architecture (struct gdbarch *gdbarch, gdbarch_update_architecture_ftype *update_architecture);
> diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c
> index 3227e9458801..1b0a2c70db4c 100644
> --- a/gdb/gdbarch.c
> +++ b/gdb/gdbarch.c
> @@ -255,6 +255,7 @@ struct gdbarch
>     gdbarch_type_align_ftype *type_align = default_type_align;
>     gdbarch_get_pc_address_flags_ftype *get_pc_address_flags = default_get_pc_address_flags;
>     gdbarch_read_core_file_mappings_ftype *read_core_file_mappings = default_read_core_file_mappings;
> +  gdbarch_update_architecture_ftype *update_architecture = default_update_architecture;
>   };
>   
>   /* Create a new ``struct gdbarch'' based on information provided by
> @@ -514,6 +515,7 @@ verify_gdbarch (struct gdbarch *gdbarch)
>     /* Skip verify of type_align, invalid_p == 0 */
>     /* Skip verify of get_pc_address_flags, invalid_p == 0 */
>     /* Skip verify of read_core_file_mappings, invalid_p == 0 */
> +  /* Skip verify of update_architecture, invalid_p == 0 */
>     if (!log.empty ())
>       internal_error (_("verify_gdbarch: the following are invalid ...%s"),
>   		    log.c_str ());
> @@ -1352,6 +1354,9 @@ gdbarch_dump (struct gdbarch *gdbarch, struct ui_file *file)
>     gdb_printf (file,
>                         "gdbarch_dump: read_core_file_mappings = <%s>\n",
>                         host_address_to_string (gdbarch->read_core_file_mappings));
> +  gdb_printf (file,
> +                      "gdbarch_dump: update_architecture = <%s>\n",
> +                      host_address_to_string (gdbarch->update_architecture));
>     if (gdbarch->dump_tdep != NULL)
>       gdbarch->dump_tdep (gdbarch, file);
>   }
> @@ -5314,3 +5319,20 @@ set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch,
>   {
>     gdbarch->read_core_file_mappings = read_core_file_mappings;
>   }
> +
> +struct gdbarch *
> +gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache)
> +{
> +  gdb_assert (gdbarch != NULL);
> +  gdb_assert (gdbarch->update_architecture != NULL);
> +  if (gdbarch_debug >= 2)
> +    gdb_printf (gdb_stdlog, "gdbarch_update_architecture called\n");
> +  return gdbarch->update_architecture (gdbarch, regcache);
> +}
> +
> +void
> +set_gdbarch_update_architecture (struct gdbarch *gdbarch,
> +                                 gdbarch_update_architecture_ftype update_architecture)
> +{
> +  gdbarch->update_architecture = update_architecture;
> +}
> diff --git a/gdb/remote.c b/gdb/remote.c
> index 5118ecd0a312..eb60ed51585b 100644
> --- a/gdb/remote.c
> +++ b/gdb/remote.c
> @@ -491,6 +491,8 @@ class remote_target : public process_stratum_target
>     gdb::byte_vector thread_info_to_thread_handle (struct thread_info *tp)
>   						 override;
>   
> +  struct gdbarch *thread_architecture (ptid_t ptid) override;
> +
>     void stop (ptid_t) override;
>   
>     void interrupt () override;
> @@ -1150,6 +1152,10 @@ struct remote_thread_info : public private_thread_info
>        to stop for a watchpoint.  */
>     CORE_ADDR watch_data_address = 0;
>   
> +  /* The architecture corresponding to the expedited registers returned
> +     in the last stop reply.  */
> +  struct gdbarch *expedited_arch = nullptr;
> +
>     /* Get the thread's resume state.  */
>     enum resume_state get_resume_state () const
>     {
> @@ -1168,6 +1174,8 @@ struct remote_thread_info : public private_thread_info
>       m_resume_state = resume_state::RESUMED_PENDING_VCONT;
>       m_resumed_pending_vcont_info.step = step;
>       m_resumed_pending_vcont_info.sig = sig;
> +
> +    expedited_arch = nullptr;
>     }
>   
>     /* Get the information this thread's pending vCont-resumption.
> @@ -1185,6 +1193,8 @@ struct remote_thread_info : public private_thread_info
>     void set_resumed ()
>     {
>       m_resume_state = resume_state::RESUMED;
> +
> +    expedited_arch = nullptr;
>     }
>   
>   private:
> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>         /* Expedited registers.  */
>         if (!stop_reply->regcache.empty ())
>   	{
> +	  /* If GDB already knows about this thread, we can give the
> +	     architecture-specific code a chance to update the gdbarch based on
> +	     the expedited registers.  */
> +	  if (find_thread_ptid (this, ptid) != nullptr)
> +	    {
> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
> +							      stop_reply->regcache);
> +
> +	      /* Save stop_reply->arch so that it can be returned by the
> +		 thread_architecture method.  */
> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
> +								       ptid);
> +	      remote_thr->expedited_arch = stop_reply->arch;
> +	    }
> +
>   	  struct regcache *regcache
>   	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>   
> @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct thread_info *tp)
>     return priv->thread_handle;
>   }
>   
> +struct gdbarch *
> +remote_target::thread_architecture (ptid_t ptid)
> +{
> +  thread_info *thr = find_thread_ptid (this, ptid);
> +  remote_thread_info *remote_thr = nullptr;
> +
> +  if (thr != nullptr)
> +    remote_thr = get_remote_thread_info (thr);
> +
> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
> +    /* The default thread_architecture implementation is the one from
> +       process_stratum_target.  */
> +    return process_stratum_target::thread_architecture(ptid);
> +
> +  return remote_thr->expedited_arch;
> +}
> +
>   bool
>   remote_target::can_async_p ()
>   {

Just recalled this. One deficiency of the current SVE implementation is that it doesn't have a proper way to hide the Z register when they don't have any meaningful state (SVE not active, so we have fpsimd state).

Given the VL will always be reported as > 0, even if there is no active SVE state, gdb will always attempt to show Z registers. And those are quite verbose.

In this sense, the implementations of the the thread_architecture methods for both native aarch64 and remote differ. While native aarch64's implementation is capable of detecting the lack of active SVE state, the
remote implementation can't. If gdbserver decided to drop the Z registers mid-execution (before the SVE state is not active), there wouldn't be vg for gdb to make a decision.

For example, there isn't a way to return a value of 0 for vg, as vg == 0 would mean no SVE feature. If there isn't a SVE feature, vg wouldn't exist.

Looking ahead, it would be the same for SME with its SVL. If we have to show both Z and ZA contexts, it will be quite a verbose exchange if we're listing all the possible registers.

I wonder if there is a better way to handle this that would still allow us to hide these scalable registers when there is no active state.
  
Thiago Jung Bauermann Dec. 1, 2022, 1:15 a.m. UTC | #4
Luis Machado <luis.machado@arm.com> writes:

> On 11/26/22 02:04, Thiago Jung Bauermann wrote:
>> If the remote target provides an expedited VG register, use it to update
>> the thread's gdbarch. This allows debugging programs that change their SVE
>> vector length during runtime.
>
> debugging programs -> debugging programs remotely

Indeed. Fixed.

>> This is accomplished by implementing the thread_architecture method in
>> remote_target, which returns the gdbarch corresponding to the expedited
>> registers provided by the last stop reply.
>> To allow changing the architecture based on the expedited registers, add a
>> new gdbarch method to allow arch-specific code to make the adjustment.

<snip>

>> diff --git a/gdb/gdbarch-components.py b/gdb/gdbarch-components.py
>> index 9b688998a7bd..fbb32c5e5853 100644
>> --- a/gdb/gdbarch-components.py
>> +++ b/gdb/gdbarch-components.py
>> @@ -2698,3 +2698,19 @@ Read core file mappings
>>       predefault="default_read_core_file_mappings",
>>       invalid=False,
>>   )
>> +
>> +Method(
>> +    comment="""
>> +An architecture may change based on the current contents of its registers.
>> +For instance, the length of the vector registers in AArch64's Scalable Vector
>> +Extension is given by the contents of the VL pseudo-register.
>
> VL -> VG

Ok, changed.

>> +This method allows an architecture to provide a new gdbarch corresponding to
>> +the registers in the given regcache.
>> +""",
>> +    type="struct gdbarch *",
>> +    name="update_architecture",
>> +    params=[("const std::vector<cached_reg_t> &", "regcache")],
>> +    predefault="default_update_architecture",
>> +    invalid=False,
>> +)

<snip>

>>   @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct
>> thread_info *tp)
>>     return priv->thread_handle;
>>   }
>>   +struct gdbarch *
>> +remote_target::thread_architecture (ptid_t ptid)
>> +{
>> +  thread_info *thr = find_thread_ptid (this, ptid);
>> +  remote_thread_info *remote_thr = nullptr;
>> +
>> +  if (thr != nullptr)
>> +    remote_thr = get_remote_thread_info (thr);
>> +
>> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
>> +    /* The default thread_architecture implementation is the one from
>> +       process_stratum_target.  */
>> +    return process_stratum_target::thread_architecture(ptid);
>> +
>> +  return remote_thr->expedited_arch;
>> +}
>> +
>>   bool
>>   remote_target::can_async_p ()
>>   {
>
> Just to confirm, as I don't exactly remember how this went. Is the case where we switch
> threads during remote debugging covered in case we have threads with different vector
> lengths?
>
> I seem to recall we'd call thread_architecture in those cases, but I don't know how the
> expedited register comes into play for this case. Do we get expedited registers for each
> thread that has stopped?

I thought this was fine because my limited understanding of the remote
protocol was that whenever a thread stops, a T stop reply is sent with
the expedited registers.

However, when I tried testing this scenario it didn't work: I get a
“Truncated register 51 in remote 'g' packet” error when the main thread
hits a breakpoint and I try to switch to another thread with a different
vector length. It turns out that gdbserver only sends the T stop reply
for the thread that hit the breakpoint.

Thanks for spotting this problem. I will add a testcase to exercise this
scenario and see if I can come up with a fix.

> Otherwise LGTM.
>
> Reviewed-by: Luis Machado <luis.machado@arm.com>

Thanks!
  
Thiago Jung Bauermann Dec. 1, 2022, 3:16 a.m. UTC | #5
Simon Marchi <simark@simark.ca> writes:

> On 11/25/22 21:04, Thiago Jung Bauermann via Gdb-patches wrote:
>> If the remote target provides an expedited VG register, use it to update
>> the thread's gdbarch. This allows debugging programs that change their SVE
>> vector length during runtime.
>> 
>> This is accomplished by implementing the thread_architecture method in
>> remote_target, which returns the gdbarch corresponding to the expedited
>> registers provided by the last stop reply.
>> 
>> To allow changing the architecture based on the expedited registers, add a
>> new gdbarch method to allow arch-specific code to make the adjustment.
>
> I get this when applying, can you address that?
>
> Applying: gdb/aarch64: Detect vector length changes when debugging remotely
> .git/rebase-apply/patch:164: indent with spaces.
>                        "gdbarch_dump: update_architecture = <%s>\n",
> .git/rebase-apply/patch:165: indent with spaces.
>                        host_address_to_string (gdbarch->update_architecture));
> .git/rebase-apply/patch:186: indent with spaces.
>                                   gdbarch_update_architecture_ftype update_architecture)
> warning: 3 lines add whitespace errors.

This is because gdbarch.py generates these lines in gdbarch.c with
spaces-only indentation. I will send a separate patch to fix it.

> I think the idea makes sense.  Short of adding a packet for "get thread
> architecture" or extending the stop reply format to add a note about a
> thread having a special tdesc, I don't see another way to do this.  We

I guess we could define such a packet, but this approach is indeed
simpler.

> can't read the vq register using 'g', because to interpret the 'g'
> result, we need to know the full architecture.  We wouldn't know for
> sure the offset of vq in the reply.  By using expedited registers, we
> just need to make sure vq's register number is stable.  I guess that is
> the case?

The register number is determined by the function
aarch64_create_target_description. If SVE is supported, VG will always
have the register number 85. If not, then AFAIU the register number will
be unused (since the PAuth, MTE and TLS features only have a few
registers). But it could potentially be used by another feature in the
future...

Do we have to reserve the number 85 for the VG pseudo-register?

>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>        /* Expedited registers.  */
>>        if (!stop_reply->regcache.empty ())
>>  	{
>> +	  /* If GDB already knows about this thread, we can give the
>> +	     architecture-specific code a chance to update the gdbarch based on
>> +	     the expedited registers.  */
>> +	  if (find_thread_ptid (this, ptid) != nullptr)
>> +	    {
>> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>> +							      stop_reply->regcache);
>> +
>> +	      /* Save stop_reply->arch so that it can be returned by the
>> +		 thread_architecture method.  */
>> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
>> +								       ptid);
>> +	      remote_thr->expedited_arch = stop_reply->arch;
>> +	    }
>> +
>>  	  struct regcache *regcache
>>  	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>
>
> What happens with threads that are not yet known?  Imagine the process
> starts with SVE == X, the main threads spawns a worker thread, the
> worker thread updates its SVE to Y, and the worker thread hits a
> breakpoint.  This is the first time we hear about that worker thread.

I tested this scenario and... it sort of works, but not quite. When the
unknown thread hits the breakpoint, gdbserver sends a T stop reply
packet with the new VG value in the expedited registers. It will be the
first time we hear about that worker thread, but at the same time we
will also learn about the value of its VG pseudo-register. So GDB
reports the breakpoint hit event and gets to the prompt as expected. You
can even print the new value of the VG pseudo-register, or any other
expedited register such as PC or SP.

It's only when you try to print the value of a non-expedited register
that you get an error indicating that GDB and gdbserver don't agree on
the size of the SVE registers (“Truncated register 51 in remote 'g'
packet”).

> If we don't do a "gdbarch_update_architecture" for it and save it
> somewhere, we'll never set the gdbarch with SVE == Y, will we?

We do that in remote_target::process_stop_reply, and I was under the
impression that it happened early enough for GDB to have the correct
gdbarch available when it needs it, but unfortunately that's not the
case.

I will add a testcase exercising this issue and see if I can find a
solution. Thank you for spotting it.
  
Luis Machado Dec. 1, 2022, 8:32 a.m. UTC | #6
On 12/1/22 03:16, Thiago Jung Bauermann wrote:
> 
> Simon Marchi <simark@simark.ca> writes:
> 
>> On 11/25/22 21:04, Thiago Jung Bauermann via Gdb-patches wrote:
>>> If the remote target provides an expedited VG register, use it to update
>>> the thread's gdbarch. This allows debugging programs that change their SVE
>>> vector length during runtime.
>>>
>>> This is accomplished by implementing the thread_architecture method in
>>> remote_target, which returns the gdbarch corresponding to the expedited
>>> registers provided by the last stop reply.
>>>
>>> To allow changing the architecture based on the expedited registers, add a
>>> new gdbarch method to allow arch-specific code to make the adjustment.
>>
>> I get this when applying, can you address that?
>>
>> Applying: gdb/aarch64: Detect vector length changes when debugging remotely
>> .git/rebase-apply/patch:164: indent with spaces.
>>                         "gdbarch_dump: update_architecture = <%s>\n",
>> .git/rebase-apply/patch:165: indent with spaces.
>>                         host_address_to_string (gdbarch->update_architecture));
>> .git/rebase-apply/patch:186: indent with spaces.
>>                                    gdbarch_update_architecture_ftype update_architecture)
>> warning: 3 lines add whitespace errors.
> 
> This is because gdbarch.py generates these lines in gdbarch.c with
> spaces-only indentation. I will send a separate patch to fix it.
> 
>> I think the idea makes sense.  Short of adding a packet for "get thread
>> architecture" or extending the stop reply format to add a note about a
>> thread having a special tdesc, I don't see another way to do this.  We
> 
> I guess we could define such a packet, but this approach is indeed
> simpler.
> 

Just a FYI, we now have a feature hash for determining the target features for aarch64. It is a 64-bit
number and doesn't rely on positioning of registers.

This situation with vg/SVE will also apply to svg/SME in the future.

>> can't read the vq register using 'g', because to interpret the 'g'
>> result, we need to know the full architecture.  We wouldn't know for
>> sure the offset of vq in the reply.  By using expedited registers, we
>> just need to make sure vq's register number is stable.  I guess that is
>> the case?
> 
> The register number is determined by the function
> aarch64_create_target_description. If SVE is supported, VG will always
> have the register number 85. If not, then AFAIU the register number will
> be unused (since the PAuth, MTE and TLS features only have a few
> registers). But it could potentially be used by another feature in the
> future...
> 
> Do we have to reserve the number 85 for the VG pseudo-register?
> 
>>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>>         /* Expedited registers.  */
>>>         if (!stop_reply->regcache.empty ())
>>>   	{
>>> +	  /* If GDB already knows about this thread, we can give the
>>> +	     architecture-specific code a chance to update the gdbarch based on
>>> +	     the expedited registers.  */
>>> +	  if (find_thread_ptid (this, ptid) != nullptr)
>>> +	    {
>>> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>>> +							      stop_reply->regcache);
>>> +
>>> +	      /* Save stop_reply->arch so that it can be returned by the
>>> +		 thread_architecture method.  */
>>> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
>>> +								       ptid);
>>> +	      remote_thr->expedited_arch = stop_reply->arch;
>>> +	    }
>>> +
>>>   	  struct regcache *regcache
>>>   	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>
>>
>> What happens with threads that are not yet known?  Imagine the process
>> starts with SVE == X, the main threads spawns a worker thread, the
>> worker thread updates its SVE to Y, and the worker thread hits a
>> breakpoint.  This is the first time we hear about that worker thread.
> 
> I tested this scenario and... it sort of works, but not quite. When the
> unknown thread hits the breakpoint, gdbserver sends a T stop reply
> packet with the new VG value in the expedited registers. It will be the
> first time we hear about that worker thread, but at the same time we
> will also learn about the value of its VG pseudo-register. So GDB
> reports the breakpoint hit event and gets to the prompt as expected. You
> can even print the new value of the VG pseudo-register, or any other
> expedited register such as PC or SP.
> 
> It's only when you try to print the value of a non-expedited register
> that you get an error indicating that GDB and gdbserver don't agree on
> the size of the SVE registers (“Truncated register 51 in remote 'g'
> packet”).
> 
>> If we don't do a "gdbarch_update_architecture" for it and save it
>> somewhere, we'll never set the gdbarch with SVE == Y, will we?
> 
> We do that in remote_target::process_stop_reply, and I was under the
> impression that it happened early enough for GDB to have the correct
> gdbarch available when it needs it, but unfortunately that's not the
> case.
> 
> I will add a testcase exercising this issue and see if I can find a
> solution. Thank you for spotting it.
>
  
Simon Marchi Dec. 1, 2022, 4:16 p.m. UTC | #7
On 11/30/22 22:16, Thiago Jung Bauermann via Gdb-patches wrote:
> 
> Simon Marchi <simark@simark.ca> writes:
> 
>> On 11/25/22 21:04, Thiago Jung Bauermann via Gdb-patches wrote:
>>> If the remote target provides an expedited VG register, use it to update
>>> the thread's gdbarch. This allows debugging programs that change their SVE
>>> vector length during runtime.
>>>
>>> This is accomplished by implementing the thread_architecture method in
>>> remote_target, which returns the gdbarch corresponding to the expedited
>>> registers provided by the last stop reply.
>>>
>>> To allow changing the architecture based on the expedited registers, add a
>>> new gdbarch method to allow arch-specific code to make the adjustment.
>>
>> I get this when applying, can you address that?
>>
>> Applying: gdb/aarch64: Detect vector length changes when debugging remotely
>> .git/rebase-apply/patch:164: indent with spaces.
>>                        "gdbarch_dump: update_architecture = <%s>\n",
>> .git/rebase-apply/patch:165: indent with spaces.
>>                        host_address_to_string (gdbarch->update_architecture));
>> .git/rebase-apply/patch:186: indent with spaces.
>>                                   gdbarch_update_architecture_ftype update_architecture)
>> warning: 3 lines add whitespace errors.
> 
> This is because gdbarch.py generates these lines in gdbarch.c with
> spaces-only indentation. I will send a separate patch to fix it.

Ah, I did not realize it was in a generated file.  Well, if you can fix
the generator, that's nice, but otherwise not a big deal.

>> can't read the vq register using 'g', because to interpret the 'g'
>> result, we need to know the full architecture.  We wouldn't know for
>> sure the offset of vq in the reply.  By using expedited registers, we
>> just need to make sure vq's register number is stable.  I guess that is
>> the case?
> 
> The register number is determined by the function
> aarch64_create_target_description. If SVE is supported, VG will always
> have the register number 85. If not, then AFAIU the register number will
> be unused (since the PAuth, MTE and TLS features only have a few
> registers). But it could potentially be used by another feature in the
> future...
> 
> Do we have to reserve the number 85 for the VG pseudo-register?

Given that aarch64_update_architecture looks for register 85
(AARCH64_SVE_VG_REGNUM) in the expedited registers without prior
knowledge of whether there is really a VG register, I can imagine bad
things happening if another unrelated register is using number 85.

At this point, do we have access to the inferior's target
description?  Can we maybe check if register 85 is indeed VG, and bail
out otherwise?

> 
>>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>>        /* Expedited registers.  */
>>>        if (!stop_reply->regcache.empty ())
>>>  	{
>>> +	  /* If GDB already knows about this thread, we can give the
>>> +	     architecture-specific code a chance to update the gdbarch based on
>>> +	     the expedited registers.  */
>>> +	  if (find_thread_ptid (this, ptid) != nullptr)
>>> +	    {
>>> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>>> +							      stop_reply->regcache);
>>> +
>>> +	      /* Save stop_reply->arch so that it can be returned by the
>>> +		 thread_architecture method.  */
>>> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
>>> +								       ptid);
>>> +	      remote_thr->expedited_arch = stop_reply->arch;
>>> +	    }
>>> +
>>>  	  struct regcache *regcache
>>>  	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>
>>
>> What happens with threads that are not yet known?  Imagine the process
>> starts with SVE == X, the main threads spawns a worker thread, the
>> worker thread updates its SVE to Y, and the worker thread hits a
>> breakpoint.  This is the first time we hear about that worker thread.
> 
> I tested this scenario and... it sort of works, but not quite. When the
> unknown thread hits the breakpoint, gdbserver sends a T stop reply
> packet with the new VG value in the expedited registers. It will be the
> first time we hear about that worker thread, but at the same time we
> will also learn about the value of its VG pseudo-register. So GDB
> reports the breakpoint hit event and gets to the prompt as expected. You
> can even print the new value of the VG pseudo-register, or any other
> expedited register such as PC or SP.
> 
> It's only when you try to print the value of a non-expedited register
> that you get an error indicating that GDB and gdbserver don't agree on
> the size of the SVE registers (“Truncated register 51 in remote 'g'
> packet”).
> 
>> If we don't do a "gdbarch_update_architecture" for it and save it
>> somewhere, we'll never set the gdbarch with SVE == Y, will we?
> 
> We do that in remote_target::process_stop_reply, and I was under the
> impression that it happened early enough for GDB to have the correct
> gdbarch available when it needs it, but unfortunately that's not the
> case.
> 
> I will add a testcase exercising this issue and see if I can find a
> solution. Thank you for spotting it.

I think that the threads (if new) are added just after that, in the
remote_notice_new_inferior call.  Maybe you can reorder things to do
remote_notice_new_inferior first and your code + the
get_thread_arch_regcache call after.

Simon
  
Thiago Jung Bauermann Dec. 5, 2022, 10:37 p.m. UTC | #8
Luis Machado <luis.machado@arm.com> writes:

> On 11/26/22 02:04, Thiago Jung Bauermann wrote:
>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>         /* Expedited registers.  */
>>         if (!stop_reply->regcache.empty ())
>>   	{
>> +	  /* If GDB already knows about this thread, we can give the
>> +	     architecture-specific code a chance to update the gdbarch based on
>> +	     the expedited registers.  */
>> +	  if (find_thread_ptid (this, ptid) != nullptr)
>> +	    {
>> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>> +							      stop_reply->regcache);
>> +
>> +	      /* Save stop_reply->arch so that it can be returned by the
>> +		 thread_architecture method.  */
>> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
>> +								       ptid);
>> +	      remote_thr->expedited_arch = stop_reply->arch;
>> +	    }
>> +
>>   	  struct regcache *regcache
>>   	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>   @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct
>> thread_info *tp)
>>     return priv->thread_handle;
>>   }
>>   +struct gdbarch *
>> +remote_target::thread_architecture (ptid_t ptid)
>> +{
>> +  thread_info *thr = find_thread_ptid (this, ptid);
>> +  remote_thread_info *remote_thr = nullptr;
>> +
>> +  if (thr != nullptr)
>> +    remote_thr = get_remote_thread_info (thr);
>> +
>> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
>> +    /* The default thread_architecture implementation is the one from
>> +       process_stratum_target.  */
>> +    return process_stratum_target::thread_architecture(ptid);
>> +
>> +  return remote_thr->expedited_arch;
>> +}
>> +
>>   bool
>>   remote_target::can_async_p ()
>>   {
>
> Just recalled this. One deficiency of the current SVE implementation
> is that it doesn't have a proper way to hide the Z register when they
> don't have any meaningful state (SVE not active, so we have fpsimd
> state).

I see the SVE_HEADER_FLAG_SVE being checked/set in
aarch64_linux_supply_sve_regset and aarch64_linux_collect_sve_regset
(though in the latter it is set unconditionally), and also HAS_SVE_STATE
being used in aarch64_sve_regs_copy_to_reg_buf and
aarch64_sve_regs_copy_from_reg_buf.

But I wasn't able to find similar logic regarding hiding the Z
registers. Do you have any pointers?

> Given the VL will always be reported as > 0, even if there is no
> active SVE state, gdb will always attempt to show Z registers. And
> those are quite verbose.

The VG pseudo-register is defined with an int type, so we could return a
negative value to indicate that the Z registers are unused (and the
module of the value would still be the vector granule). Would that
be ok?

> In this sense, the implementations of the the thread_architecture
> methods for both native aarch64 and remote differ. While native
> aarch64's implementation is capable of detecting the lack of active
> SVE state, the remote implementation can't. If gdbserver decided to
> drop the Z registers mid-execution (before the SVE state is not
> active), there wouldn't be vg for gdb to make a decision.

Looking at aarch64_linux_nat_target::thread_architecture my impression
is that its result depends only on aarch64_sve_get_vq, so I don't see
how it's different from the remote implementation.

> I wonder if there is a better way to handle this that would still
> allow us to hide these scalable registers when there is no active
> state.

I guess it depends on whether you would consider using negative VG
values as better. :-)
  
Luis Machado Dec. 7, 2022, 5:05 p.m. UTC | #9
On 12/5/22 22:37, Thiago Jung Bauermann wrote:
> 
> Luis Machado <luis.machado@arm.com> writes:
> 
>> On 11/26/22 02:04, Thiago Jung Bauermann wrote:
>>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>>          /* Expedited registers.  */
>>>          if (!stop_reply->regcache.empty ())
>>>    	{
>>> +	  /* If GDB already knows about this thread, we can give the
>>> +	     architecture-specific code a chance to update the gdbarch based on
>>> +	     the expedited registers.  */
>>> +	  if (find_thread_ptid (this, ptid) != nullptr)
>>> +	    {
>>> +	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>>> +							      stop_reply->regcache);
>>> +
>>> +	      /* Save stop_reply->arch so that it can be returned by the
>>> +		 thread_architecture method.  */
>>> +	      remote_thread_info *remote_thr = get_remote_thread_info (this,
>>> +								       ptid);
>>> +	      remote_thr->expedited_arch = stop_reply->arch;
>>> +	    }
>>> +
>>>    	  struct regcache *regcache
>>>    	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>>    @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct
>>> thread_info *tp)
>>>      return priv->thread_handle;
>>>    }
>>>    +struct gdbarch *
>>> +remote_target::thread_architecture (ptid_t ptid)
>>> +{
>>> +  thread_info *thr = find_thread_ptid (this, ptid);
>>> +  remote_thread_info *remote_thr = nullptr;
>>> +
>>> +  if (thr != nullptr)
>>> +    remote_thr = get_remote_thread_info (thr);
>>> +
>>> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
>>> +    /* The default thread_architecture implementation is the one from
>>> +       process_stratum_target.  */
>>> +    return process_stratum_target::thread_architecture(ptid);
>>> +
>>> +  return remote_thr->expedited_arch;
>>> +}
>>> +
>>>    bool
>>>    remote_target::can_async_p ()
>>>    {
>>
>> Just recalled this. One deficiency of the current SVE implementation
>> is that it doesn't have a proper way to hide the Z register when they
>> don't have any meaningful state (SVE not active, so we have fpsimd
>> state).
> 
> I see the SVE_HEADER_FLAG_SVE being checked/set in
> aarch64_linux_supply_sve_regset and aarch64_linux_collect_sve_regset
> (though in the latter it is set unconditionally), and also HAS_SVE_STATE
> being used in aarch64_sve_regs_copy_to_reg_buf and
> aarch64_sve_regs_copy_from_reg_buf.
> 
> But I wasn't able to find similar logic regarding hiding the Z
> registers. Do you have any pointers?
> 

There isn't logic to do that at present. This is something I'm pondering for SME, and would possibly apply to
SVE as well. But it may be more complicated than useful.

>> Given the VL will always be reported as > 0, even if there is no
>> active SVE state, gdb will always attempt to show Z registers. And
>> those are quite verbose.
> 
> The VG pseudo-register is defined with an int type, so we could return a
> negative value to indicate that the Z registers are unused (and the
> module of the value would still be the vector granule). Would that
> be ok?

I think a value of 0 would be fine for this purpose but...

> 
>> In this sense, the implementations of the the thread_architecture
>> methods for both native aarch64 and remote differ. While native
>> aarch64's implementation is capable of detecting the lack of active
>> SVE state, the remote implementation can't. If gdbserver decided to
>> drop the Z registers mid-execution (before the SVE state is not
>> active), there wouldn't be vg for gdb to make a decision.
> 
> Looking at aarch64_linux_nat_target::thread_architecture my impression
> is that its result depends only on aarch64_sve_get_vq, so I don't see
> how it's different from the remote implementation.

... Right, but for native debugging we actually call ptrace to extract the vector length. That is completely independent from
the XML target description or the available register sets.

In the case of remote debugging, the expedited registers are returned based on an existing register description.

Take, for example, the following:

- SVE state is enabled and we have the vg register.
- Program updates the SVE vector length and, as a result, we no longer have an active SVE state (reverted to FPSIMD state)
- GDB drops the sve feature from its target description.
- Program executes a sve instruction and SVE state is made active.
- gdbserver tries to tell gdb what the value of vg is, but gdb doesn't know anything about vg at this point, because it has dropped the sve feature previously.

I think this is the downside of using expedited registers to communicate the architecture changes. For this case, returning a full XML would make it easy for gdb to
figure things out.

But, going back to hiding sve/sme registers when they're not in use, I'm not yet sure that is something we should pursue.

I'd say if you manage to get things working with the expedited registers (for the problematic cases Simon and I pointed out), that's fine.

> 
>> I wonder if there is a better way to handle this that would still
>> allow us to hide these scalable registers when there is no active
>> state.
> 
> I guess it depends on whether you would consider using negative VG
> values as better. :-)
>
  
Simon Marchi Dec. 7, 2022, 7:25 p.m. UTC | #10
On 12/7/22 12:05, Luis Machado via Gdb-patches wrote:
> On 12/5/22 22:37, Thiago Jung Bauermann wrote:
>>
>> Luis Machado <luis.machado@arm.com> writes:
>>
>>> On 11/26/22 02:04, Thiago Jung Bauermann wrote:
>>>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>>>          /* Expedited registers.  */
>>>>          if (!stop_reply->regcache.empty ())
>>>>        {
>>>> +      /* If GDB already knows about this thread, we can give the
>>>> +         architecture-specific code a chance to update the gdbarch based on
>>>> +         the expedited registers.  */
>>>> +      if (find_thread_ptid (this, ptid) != nullptr)
>>>> +        {
>>>> +          stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>>>> +                                  stop_reply->regcache);
>>>> +
>>>> +          /* Save stop_reply->arch so that it can be returned by the
>>>> +         thread_architecture method.  */
>>>> +          remote_thread_info *remote_thr = get_remote_thread_info (this,
>>>> +                                       ptid);
>>>> +          remote_thr->expedited_arch = stop_reply->arch;
>>>> +        }
>>>> +
>>>>          struct regcache *regcache
>>>>            = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>>>    @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct
>>>> thread_info *tp)
>>>>      return priv->thread_handle;
>>>>    }
>>>>    +struct gdbarch *
>>>> +remote_target::thread_architecture (ptid_t ptid)
>>>> +{
>>>> +  thread_info *thr = find_thread_ptid (this, ptid);
>>>> +  remote_thread_info *remote_thr = nullptr;
>>>> +
>>>> +  if (thr != nullptr)
>>>> +    remote_thr = get_remote_thread_info (thr);
>>>> +
>>>> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
>>>> +    /* The default thread_architecture implementation is the one from
>>>> +       process_stratum_target.  */
>>>> +    return process_stratum_target::thread_architecture(ptid);
>>>> +
>>>> +  return remote_thr->expedited_arch;
>>>> +}
>>>> +
>>>>    bool
>>>>    remote_target::can_async_p ()
>>>>    {
>>>
>>> Just recalled this. One deficiency of the current SVE implementation
>>> is that it doesn't have a proper way to hide the Z register when they
>>> don't have any meaningful state (SVE not active, so we have fpsimd
>>> state).
>>
>> I see the SVE_HEADER_FLAG_SVE being checked/set in
>> aarch64_linux_supply_sve_regset and aarch64_linux_collect_sve_regset
>> (though in the latter it is set unconditionally), and also HAS_SVE_STATE
>> being used in aarch64_sve_regs_copy_to_reg_buf and
>> aarch64_sve_regs_copy_from_reg_buf.
>>
>> But I wasn't able to find similar logic regarding hiding the Z
>> registers. Do you have any pointers?
>>
> 
> There isn't logic to do that at present. This is something I'm pondering for SME, and would possibly apply to
> SVE as well. But it may be more complicated than useful.
> 
>>> Given the VL will always be reported as > 0, even if there is no
>>> active SVE state, gdb will always attempt to show Z registers. And
>>> those are quite verbose.
>>
>> The VG pseudo-register is defined with an int type, so we could return a
>> negative value to indicate that the Z registers are unused (and the
>> module of the value would still be the vector granule). Would that
>> be ok?
> 
> I think a value of 0 would be fine for this purpose but...
> 
>>
>>> In this sense, the implementations of the the thread_architecture
>>> methods for both native aarch64 and remote differ. While native
>>> aarch64's implementation is capable of detecting the lack of active
>>> SVE state, the remote implementation can't. If gdbserver decided to
>>> drop the Z registers mid-execution (before the SVE state is not
>>> active), there wouldn't be vg for gdb to make a decision.
>>
>> Looking at aarch64_linux_nat_target::thread_architecture my impression
>> is that its result depends only on aarch64_sve_get_vq, so I don't see
>> how it's different from the remote implementation.
> 
> ... Right, but for native debugging we actually call ptrace to extract the vector length. That is completely independent from
> the XML target description or the available register sets.
> 
> In the case of remote debugging, the expedited registers are returned based on an existing register description.
> 
> Take, for example, the following:
> 
> - SVE state is enabled and we have the vg register.
> - Program updates the SVE vector length and, as a result, we no longer have an active SVE state (reverted to FPSIMD state)
> - GDB drops the sve feature from its target description.
> - Program executes a sve instruction and SVE state is made active.
> - gdbserver tries to tell gdb what the value of vg is, but gdb doesn't know anything about vg at this point, because it has dropped the sve feature previously.
> 
> I think this is the downside of using expedited registers to communicate the architecture changes. For this case, returning a full XML would make it easy for gdb to
> figure things out.
> 
> But, going back to hiding sve/sme registers when they're not in use, I'm not yet sure that is something we should pursue.
> 
> I'd say if you manage to get things working with the expedited registers (for the problematic cases Simon and I pointed out), that's fine.

Reading this, I thought of another potential problem with using
expedited registers to communicate the architectures change.  When using
the non-stop version of the remote protocol, we will have a stop reply
for each thread that stops, so that's fine, we can infer each thread's
tdesc.  But with the all-stop variant of the protocol, you only get a
stop reply for one thread, for a given stop.

For instance, imagine you have two threads, thread 1 hits a breakpoint.
GDB gets a stop reply for thread 1, and thread 2 is just considered stop
because we're using the all-stop remote protocol.  If we need to read
thread 2's registers, we would need to know its tdesc.  Its vg may have
changed since last time, or it might be the first time we learned about
it.  In that case, I don't think we have a way out of adding some mean
for gdbserver to tell gdb which tdesc applies to that thread.

If it's the case, that we need to extend the RSP to allow fetching
per-thread tdesc, here's the idea I had in mind for a while, to avoid
adding too much overhead.  Stop replies and the qXfer:threads:read reply
could include a per-thread tdesc identifier.  That tdesc identifier
would be something short, either an incrementing number, or some kind of
hash of the tdesc.  It would be something opaque chosen by the stub.  A
new packet would be introduced to allow GDB to request the XML given
that ID.  On the GDB side, GDB would request the XML when encountering a
tdesc ID it doesn't know about, and then cache the result.  The
additional overhead would be:

 - fetching each XML tdesc once, that seems inevitable (we just don't
   want to fetch the same XML tdesc over and over)
 - a few characters more in stop replies and qXfer:threads:read replies

I believe this could be implemented in a backwards compatible way
without even adding a new qSupported feature.  XML is extensible by
nature.  And stop replies are too.  The T stop reply doc says:

  Otherwise, GDB should ignore this ‘n:r’ pair and go on to the next;
  this allows us to extend the protocol in the future.

So in theory we could add a `tdesc-id:1234` field without breaking old
GDBs.  However, those old GDBs would break when trying to read registers
of threads with unexpected SVE lengths.

Simon
  
Luis Machado Dec. 7, 2022, 11:01 p.m. UTC | #11
On 12/7/22 19:25, Simon Marchi wrote:
> On 12/7/22 12:05, Luis Machado via Gdb-patches wrote:
>> On 12/5/22 22:37, Thiago Jung Bauermann wrote:
>>>
>>> Luis Machado <luis.machado@arm.com> writes:
>>>
>>>> On 11/26/22 02:04, Thiago Jung Bauermann wrote:
>>>>> @@ -8068,6 +8078,21 @@ remote_target::process_stop_reply (struct stop_reply *stop_reply,
>>>>>           /* Expedited registers.  */
>>>>>           if (!stop_reply->regcache.empty ())
>>>>>         {
>>>>> +      /* If GDB already knows about this thread, we can give the
>>>>> +         architecture-specific code a chance to update the gdbarch based on
>>>>> +         the expedited registers.  */
>>>>> +      if (find_thread_ptid (this, ptid) != nullptr)
>>>>> +        {
>>>>> +          stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
>>>>> +                                  stop_reply->regcache);
>>>>> +
>>>>> +          /* Save stop_reply->arch so that it can be returned by the
>>>>> +         thread_architecture method.  */
>>>>> +          remote_thread_info *remote_thr = get_remote_thread_info (this,
>>>>> +                                       ptid);
>>>>> +          remote_thr->expedited_arch = stop_reply->arch;
>>>>> +        }
>>>>> +
>>>>>           struct regcache *regcache
>>>>>             = get_thread_arch_regcache (this, ptid, stop_reply->arch);
>>>>>     @@ -14382,6 +14407,23 @@ remote_target::thread_info_to_thread_handle (struct
>>>>> thread_info *tp)
>>>>>       return priv->thread_handle;
>>>>>     }
>>>>>     +struct gdbarch *
>>>>> +remote_target::thread_architecture (ptid_t ptid)
>>>>> +{
>>>>> +  thread_info *thr = find_thread_ptid (this, ptid);
>>>>> +  remote_thread_info *remote_thr = nullptr;
>>>>> +
>>>>> +  if (thr != nullptr)
>>>>> +    remote_thr = get_remote_thread_info (thr);
>>>>> +
>>>>> +  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
>>>>> +    /* The default thread_architecture implementation is the one from
>>>>> +       process_stratum_target.  */
>>>>> +    return process_stratum_target::thread_architecture(ptid);
>>>>> +
>>>>> +  return remote_thr->expedited_arch;
>>>>> +}
>>>>> +
>>>>>     bool
>>>>>     remote_target::can_async_p ()
>>>>>     {
>>>>
>>>> Just recalled this. One deficiency of the current SVE implementation
>>>> is that it doesn't have a proper way to hide the Z register when they
>>>> don't have any meaningful state (SVE not active, so we have fpsimd
>>>> state).
>>>
>>> I see the SVE_HEADER_FLAG_SVE being checked/set in
>>> aarch64_linux_supply_sve_regset and aarch64_linux_collect_sve_regset
>>> (though in the latter it is set unconditionally), and also HAS_SVE_STATE
>>> being used in aarch64_sve_regs_copy_to_reg_buf and
>>> aarch64_sve_regs_copy_from_reg_buf.
>>>
>>> But I wasn't able to find similar logic regarding hiding the Z
>>> registers. Do you have any pointers?
>>>
>>
>> There isn't logic to do that at present. This is something I'm pondering for SME, and would possibly apply to
>> SVE as well. But it may be more complicated than useful.
>>
>>>> Given the VL will always be reported as > 0, even if there is no
>>>> active SVE state, gdb will always attempt to show Z registers. And
>>>> those are quite verbose.
>>>
>>> The VG pseudo-register is defined with an int type, so we could return a
>>> negative value to indicate that the Z registers are unused (and the
>>> module of the value would still be the vector granule). Would that
>>> be ok?
>>
>> I think a value of 0 would be fine for this purpose but...
>>
>>>
>>>> In this sense, the implementations of the the thread_architecture
>>>> methods for both native aarch64 and remote differ. While native
>>>> aarch64's implementation is capable of detecting the lack of active
>>>> SVE state, the remote implementation can't. If gdbserver decided to
>>>> drop the Z registers mid-execution (before the SVE state is not
>>>> active), there wouldn't be vg for gdb to make a decision.
>>>
>>> Looking at aarch64_linux_nat_target::thread_architecture my impression
>>> is that its result depends only on aarch64_sve_get_vq, so I don't see
>>> how it's different from the remote implementation.
>>
>> ... Right, but for native debugging we actually call ptrace to extract the vector length. That is completely independent from
>> the XML target description or the available register sets.
>>
>> In the case of remote debugging, the expedited registers are returned based on an existing register description.
>>
>> Take, for example, the following:
>>
>> - SVE state is enabled and we have the vg register.
>> - Program updates the SVE vector length and, as a result, we no longer have an active SVE state (reverted to FPSIMD state)
>> - GDB drops the sve feature from its target description.
>> - Program executes a sve instruction and SVE state is made active.
>> - gdbserver tries to tell gdb what the value of vg is, but gdb doesn't know anything about vg at this point, because it has dropped the sve feature previously.
>>
>> I think this is the downside of using expedited registers to communicate the architecture changes. For this case, returning a full XML would make it easy for gdb to
>> figure things out.
>>
>> But, going back to hiding sve/sme registers when they're not in use, I'm not yet sure that is something we should pursue.
>>
>> I'd say if you manage to get things working with the expedited registers (for the problematic cases Simon and I pointed out), that's fine.
> 
> Reading this, I thought of another potential problem with using
> expedited registers to communicate the architectures change.  When using
> the non-stop version of the remote protocol, we will have a stop reply
> for each thread that stops, so that's fine, we can infer each thread's
> tdesc.  But with the all-stop variant of the protocol, you only get a
> stop reply for one thread, for a given stop.

I wasn't sure about this case, so I mentioned it in the first reply to patch 6/6. Isn't it possible to send back
information about all the threads that have stopped in all-stop mode?

> 
> For instance, imagine you have two threads, thread 1 hits a breakpoint.
> GDB gets a stop reply for thread 1, and thread 2 is just considered stop
> because we're using the all-stop remote protocol.  If we need to read
> thread 2's registers, we would need to know its tdesc.  Its vg may have
> changed since last time, or it might be the first time we learned about
> it.  In that case, I don't think we have a way out of adding some mean
> for gdbserver to tell gdb which tdesc applies to that thread.
> 
> If it's the case, that we need to extend the RSP to allow fetching
> per-thread tdesc, here's the idea I had in mind for a while, to avoid
> adding too much overhead.  Stop replies and the qXfer:threads:read reply
> could include a per-thread tdesc identifier.  That tdesc identifier
> would be something short, either an incrementing number, or some kind of
> hash of the tdesc.  It would be something opaque chosen by the stub.  A
> new packet would be introduced to allow GDB to request the XML given
> that ID.  On the GDB side, GDB would request the XML when encountering a
> tdesc ID it doesn't know about, and then cache the result.  The
> additional overhead would be:

The aarch64 port uses feature hashes for the target descriptions. We could leverage that, but we would need to
keep the hash stable so older gdb's that don't know about certain features don't break/get confused.

> 
>   - fetching each XML tdesc once, that seems inevitable (we just don't
>     want to fetch the same XML tdesc over and over)
>   - a few characters more in stop replies and qXfer:threads:read replies
> 
> I believe this could be implemented in a backwards compatible way
> without even adding a new qSupported feature.  XML is extensible by
> nature.  And stop replies are too.  The T stop reply doc says:
> 
>    Otherwise, GDB should ignore this ‘n:r’ pair and go on to the next;
>    this allows us to extend the protocol in the future.
> 
> So in theory we could add a `tdesc-id:1234` field without breaking old
> GDBs.  However, those old GDBs would break when trying to read registers
> of threads with unexpected SVE lengths.

It sounds like a good approach to me, and slightly better than using the expedited registers.

It would be nice if we could minimize the amount of tdesc-id entries for all the threads, if they're mostly the same.

The most obvious case is having hundreds of threads that all have the same tdesc-id. It wouldn't be nice to duplicate all that data.

Originally we discussed a packet to ask the remote if the target description had changed. If nothing's changed, it is a quick reply.

I suppose we could have something similar to that here as well, to optimize things, that is, don't return data if nothing's changed.
> 
> Simon
  
Thiago Jung Bauermann Dec. 9, 2022, 2:20 a.m. UTC | #12
Luis Machado <luis.machado@arm.com> writes:

> On 12/7/22 19:25, Simon Marchi wrote:
>> Reading this, I thought of another potential problem with using
>> expedited registers to communicate the architectures change.  When using
>> the non-stop version of the remote protocol, we will have a stop reply
>> for each thread that stops, so that's fine, we can infer each thread's
>> tdesc.  But with the all-stop variant of the protocol, you only get a
>> stop reply for one thread, for a given stop.
>
> I wasn't sure about this case, so I mentioned it in the first reply to
> patch 6/6. Isn't it possible to send back information about all the
> threads that have stopped in all-stop mode?

I think it could be done by adding the extra information in the XML of
the qXfer:threads:read reply as Simon suggested here.

I thought about adding the expedited registers for each thread to fix
this case, but as long as we are making changes to the remote protocol,
Simon's idea is cleaner and more general.

>> For instance, imagine you have two threads, thread 1 hits a breakpoint.
>> GDB gets a stop reply for thread 1, and thread 2 is just considered stop
>> because we're using the all-stop remote protocol.  If we need to read
>> thread 2's registers, we would need to know its tdesc.  Its vg may have
>> changed since last time, or it might be the first time we learned about
>> it.  In that case, I don't think we have a way out of adding some mean
>> for gdbserver to tell gdb which tdesc applies to that thread.
>> If it's the case, that we need to extend the RSP to allow fetching
>> per-thread tdesc, here's the idea I had in mind for a while, to avoid
>> adding too much overhead.  Stop replies and the qXfer:threads:read reply
>> could include a per-thread tdesc identifier.  That tdesc identifier
>> would be something short, either an incrementing number, or some kind of
>> hash of the tdesc.  It would be something opaque chosen by the stub.  A
>> new packet would be introduced to allow GDB to request the XML given
>> that ID.  On the GDB side, GDB would request the XML when encountering a
>> tdesc ID it doesn't know about, and then cache the result.  The
>> additional overhead would be:
>
> The aarch64 port uses feature hashes for the target descriptions. We
> could leverage that, but we would need to keep the hash stable so
> older gdb's that don't know about certain features don't break/get
> confused.

IIUC the tdesc ids are defined by gdbserver and are opaque to GDB, and
also they are only meaningful within a single connection, so they don't
need to be stable.

Because of that, I think small integers are preferable to hashes because
they are shorter so it's less data to push through the wire.

>>   - fetching each XML tdesc once, that seems inevitable (we just don't
>>     want to fetch the same XML tdesc over and over)
>>   - a few characters more in stop replies and qXfer:threads:read replies
>> I believe this could be implemented in a backwards compatible way
>> without even adding a new qSupported feature.  XML is extensible by
>> nature.  And stop replies are too.  The T stop reply doc says:
>>    Otherwise, GDB should ignore this ‘n:r’ pair and go on to the next;
>>    this allows us to extend the protocol in the future.
>> So in theory we could add a `tdesc-id:1234` field without breaking old
>> GDBs.  However, those old GDBs would break when trying to read registers
>> of threads with unexpected SVE lengths.
>
> It sounds like a good approach to me, and slightly better than using
> the expedited registers.

I agree. Thanks Simon for this idea and for detailing it! I'll take a
stab at implementing it for v3.

> It would be nice if we could minimize the amount of tdesc-id entries
> for all the threads, if they're mostly the same.
>
> The most obvious case is having hundreds of threads that all have the
> same tdesc-id. It wouldn't be nice to duplicate all that data.

gdbserver can assign the same id for identical tdescs, so I think this
will happen naturally.

> Originally we discussed a packet to ask the remote if the target
> description had changed. If nothing's changed, it is a quick reply.
>
> I suppose we could have something similar to that here as well, to
> optimize things, that is, don't return data if nothing's changed.

Yes, if it happens that GDB needs to fetch registers without having
received a tdesc-updating T stop reply or qXfer:threads:read reply then
this is a good solution.
  

Patch

diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
index ffc128d91f60..764f693aee8d 100644
--- a/gdb/aarch64-tdep.c
+++ b/gdb/aarch64-tdep.c
@@ -3486,7 +3486,8 @@  aarch64_cannot_store_register (struct gdbarch *gdbarch, int regnum)
 	  || regnum == AARCH64_PAUTH_CMASK_REGNUM (tdep->pauth_reg_base));
 }
 
-/* Helper function for the "thread_architecture" target_ops method.
+/* Helper function for both the "update_architecture" gdbarch method and the
+   "thread_architecture" target_ops method.
 
    Returns a new gdbarch that is equivalent to the given gdbarch, but with SVE
    registers reflecting the given vq value.  */
@@ -3521,6 +3522,28 @@  aarch64_update_gdbarch (struct gdbarch *gdbarch, uint64_t vq)
   return gdbarch_find_by_info (info);
 }
 
+/* Implement the "update_architecture" gdbarch method.  */
+
+static struct gdbarch *
+aarch64_update_architecture (struct gdbarch *gdbarch,
+			     const std::vector<cached_reg_t> &regcache)
+{
+  /* Look for the VG register.  */
+  auto it = find_if (regcache.cbegin (), regcache.cend (),
+		     [] (const cached_reg_t &reg) {
+		       return reg.num == AARCH64_SVE_VG_REGNUM;
+		     });
+
+  /* No VG register was provided.  Don't change gdbarch.  */
+  if (it == regcache.cend ())
+    return gdbarch;
+
+  ULONGEST vg = extract_unsigned_integer (it->data, 8,
+					  gdbarch_byte_order (gdbarch));
+
+  return aarch64_update_gdbarch (gdbarch, sve_vq_from_vg (vg));
+}
+
 /* Implement the stack_frame_destroyed_p gdbarch method.  */
 
 static int
@@ -3742,6 +3765,7 @@  aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
   set_tdesc_pseudo_register_reggroup_p (gdbarch,
 					aarch64_pseudo_register_reggroup_p);
   set_gdbarch_cannot_store_register (gdbarch, aarch64_cannot_store_register);
+  set_gdbarch_update_architecture (gdbarch, aarch64_update_architecture);
 
   /* ABI */
   set_gdbarch_short_bit (gdbarch, 16);
diff --git a/gdb/arch-utils.c b/gdb/arch-utils.c
index 7b84daf046e2..2315aacb4bbe 100644
--- a/gdb/arch-utils.c
+++ b/gdb/arch-utils.c
@@ -1090,6 +1090,15 @@  default_get_return_buf_addr (struct type *val_type, frame_info_ptr cur_frame)
   return 0;
 }
 
+/* See arch-utils.h.  */
+
+struct gdbarch *
+default_update_architecture (struct gdbarch *gdbarch,
+			     const std::vector<cached_reg_t> &regcache)
+{
+  return gdbarch;
+}
+
 /* Non-zero if we want to trace architecture code.  */
 
 #ifndef GDBARCH_DEBUG
diff --git a/gdb/arch-utils.h b/gdb/arch-utils.h
index 46018a6fbbb6..e68a91f6753d 100644
--- a/gdb/arch-utils.h
+++ b/gdb/arch-utils.h
@@ -304,4 +304,9 @@  extern void default_read_core_file_mappings
 /* Default implementation of gdbarch default_get_return_buf_addr method.  */
 extern CORE_ADDR default_get_return_buf_addr (struct type *val_typegdbarch,
 					      frame_info_ptr cur_frame);
+
+/* Default implementation of gdbarch update_architecture method.  */
+extern struct gdbarch *
+default_update_architecture (struct gdbarch *gdbarch,
+			     const std::vector<cached_reg_t> &regcache);
 #endif /* ARCH_UTILS_H */
diff --git a/gdb/gdbarch-components.py b/gdb/gdbarch-components.py
index 9b688998a7bd..fbb32c5e5853 100644
--- a/gdb/gdbarch-components.py
+++ b/gdb/gdbarch-components.py
@@ -2698,3 +2698,19 @@  Read core file mappings
     predefault="default_read_core_file_mappings",
     invalid=False,
 )
+
+Method(
+    comment="""
+An architecture may change based on the current contents of its registers.
+For instance, the length of the vector registers in AArch64's Scalable Vector
+Extension is given by the contents of the VL pseudo-register.
+
+This method allows an architecture to provide a new gdbarch corresponding to
+the registers in the given regcache.
+""",
+    type="struct gdbarch *",
+    name="update_architecture",
+    params=[("const std::vector<cached_reg_t> &", "regcache")],
+    predefault="default_update_architecture",
+    invalid=False,
+)
diff --git a/gdb/gdbarch-gen.h b/gdb/gdbarch-gen.h
index a663316df166..a65699e7241f 100644
--- a/gdb/gdbarch-gen.h
+++ b/gdb/gdbarch-gen.h
@@ -1649,3 +1649,14 @@  extern void set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, gdbarch_g
 typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
 extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, read_core_file_mappings_pre_loop_ftype pre_loop_cb, read_core_file_mappings_loop_ftype loop_cb);
 extern void set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, gdbarch_read_core_file_mappings_ftype *read_core_file_mappings);
+
+/* An architecture may change based on the current contents of its registers.
+   For instance, the length of the vector registers in AArch64's Scalable Vector
+   Extension is given by the contents of the VL pseudo-register.
+
+   This method allows an architecture to provide a new gdbarch corresponding to
+   the registers in the given regcache. */
+
+typedef struct gdbarch * (gdbarch_update_architecture_ftype) (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
+extern struct gdbarch * gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache);
+extern void set_gdbarch_update_architecture (struct gdbarch *gdbarch, gdbarch_update_architecture_ftype *update_architecture);
diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c
index 3227e9458801..1b0a2c70db4c 100644
--- a/gdb/gdbarch.c
+++ b/gdb/gdbarch.c
@@ -255,6 +255,7 @@  struct gdbarch
   gdbarch_type_align_ftype *type_align = default_type_align;
   gdbarch_get_pc_address_flags_ftype *get_pc_address_flags = default_get_pc_address_flags;
   gdbarch_read_core_file_mappings_ftype *read_core_file_mappings = default_read_core_file_mappings;
+  gdbarch_update_architecture_ftype *update_architecture = default_update_architecture;
 };
 
 /* Create a new ``struct gdbarch'' based on information provided by
@@ -514,6 +515,7 @@  verify_gdbarch (struct gdbarch *gdbarch)
   /* Skip verify of type_align, invalid_p == 0 */
   /* Skip verify of get_pc_address_flags, invalid_p == 0 */
   /* Skip verify of read_core_file_mappings, invalid_p == 0 */
+  /* Skip verify of update_architecture, invalid_p == 0 */
   if (!log.empty ())
     internal_error (_("verify_gdbarch: the following are invalid ...%s"),
 		    log.c_str ());
@@ -1352,6 +1354,9 @@  gdbarch_dump (struct gdbarch *gdbarch, struct ui_file *file)
   gdb_printf (file,
                       "gdbarch_dump: read_core_file_mappings = <%s>\n",
                       host_address_to_string (gdbarch->read_core_file_mappings));
+  gdb_printf (file,
+                      "gdbarch_dump: update_architecture = <%s>\n",
+                      host_address_to_string (gdbarch->update_architecture));
   if (gdbarch->dump_tdep != NULL)
     gdbarch->dump_tdep (gdbarch, file);
 }
@@ -5314,3 +5319,20 @@  set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch,
 {
   gdbarch->read_core_file_mappings = read_core_file_mappings;
 }
+
+struct gdbarch *
+gdbarch_update_architecture (struct gdbarch *gdbarch, const std::vector<cached_reg_t> &regcache)
+{
+  gdb_assert (gdbarch != NULL);
+  gdb_assert (gdbarch->update_architecture != NULL);
+  if (gdbarch_debug >= 2)
+    gdb_printf (gdb_stdlog, "gdbarch_update_architecture called\n");
+  return gdbarch->update_architecture (gdbarch, regcache);
+}
+
+void
+set_gdbarch_update_architecture (struct gdbarch *gdbarch,
+                                 gdbarch_update_architecture_ftype update_architecture)
+{
+  gdbarch->update_architecture = update_architecture;
+}
diff --git a/gdb/remote.c b/gdb/remote.c
index 5118ecd0a312..eb60ed51585b 100644
--- a/gdb/remote.c
+++ b/gdb/remote.c
@@ -491,6 +491,8 @@  class remote_target : public process_stratum_target
   gdb::byte_vector thread_info_to_thread_handle (struct thread_info *tp)
 						 override;
 
+  struct gdbarch *thread_architecture (ptid_t ptid) override;
+
   void stop (ptid_t) override;
 
   void interrupt () override;
@@ -1150,6 +1152,10 @@  struct remote_thread_info : public private_thread_info
      to stop for a watchpoint.  */
   CORE_ADDR watch_data_address = 0;
 
+  /* The architecture corresponding to the expedited registers returned
+     in the last stop reply.  */
+  struct gdbarch *expedited_arch = nullptr;
+
   /* Get the thread's resume state.  */
   enum resume_state get_resume_state () const
   {
@@ -1168,6 +1174,8 @@  struct remote_thread_info : public private_thread_info
     m_resume_state = resume_state::RESUMED_PENDING_VCONT;
     m_resumed_pending_vcont_info.step = step;
     m_resumed_pending_vcont_info.sig = sig;
+
+    expedited_arch = nullptr;
   }
 
   /* Get the information this thread's pending vCont-resumption.
@@ -1185,6 +1193,8 @@  struct remote_thread_info : public private_thread_info
   void set_resumed ()
   {
     m_resume_state = resume_state::RESUMED;
+
+    expedited_arch = nullptr;
   }
 
 private:
@@ -8068,6 +8078,21 @@  remote_target::process_stop_reply (struct stop_reply *stop_reply,
       /* Expedited registers.  */
       if (!stop_reply->regcache.empty ())
 	{
+	  /* If GDB already knows about this thread, we can give the
+	     architecture-specific code a chance to update the gdbarch based on
+	     the expedited registers.  */
+	  if (find_thread_ptid (this, ptid) != nullptr)
+	    {
+	      stop_reply->arch = gdbarch_update_architecture (stop_reply->arch,
+							      stop_reply->regcache);
+
+	      /* Save stop_reply->arch so that it can be returned by the
+		 thread_architecture method.  */
+	      remote_thread_info *remote_thr = get_remote_thread_info (this,
+								       ptid);
+	      remote_thr->expedited_arch = stop_reply->arch;
+	    }
+
 	  struct regcache *regcache
 	    = get_thread_arch_regcache (this, ptid, stop_reply->arch);
 
@@ -14382,6 +14407,23 @@  remote_target::thread_info_to_thread_handle (struct thread_info *tp)
   return priv->thread_handle;
 }
 
+struct gdbarch *
+remote_target::thread_architecture (ptid_t ptid)
+{
+  thread_info *thr = find_thread_ptid (this, ptid);
+  remote_thread_info *remote_thr = nullptr;
+
+  if (thr != nullptr)
+    remote_thr = get_remote_thread_info (thr);
+
+  if (remote_thr == nullptr || remote_thr->expedited_arch == nullptr)
+    /* The default thread_architecture implementation is the one from
+       process_stratum_target.  */
+    return process_stratum_target::thread_architecture(ptid);
+
+  return remote_thr->expedited_arch;
+}
+
 bool
 remote_target::can_async_p ()
 {