diff mbox

[v3,14/18] Fast tracepoint support for ARM on Linux

Message ID 1467726030-13020-15-git-send-email-antoine.tremblay@ericsson.com
State New
Headers show

Commit Message

Antoine Tremblay July 5, 2016, 1:40 p.m. UTC
This patch enables fast tracepoints for ARM on Linux.

There are some limitations:

 * The tracepoint insertion will fail if the distance from the
   instruction to the jump pad is big.

 * As of this patch, the implementation will reject any tracepoint
   placed on an instruction that might be PC-relative.  Later patches
   implement relocation for some selected instructions.

 * It is only possible to place fast tracepoints on 4-byte
   instructions, which also limits the valid locations in Thumb mode.

 * As of this patch, JIT compilation of condition evaluation is not
   implemented.  It is added in a later patch.

Henrik Wallin  <henrik.wallin@windriver.com>
Simon Marchi  <simon.marchi@ericsson.com>
Antoine Tremblay  <antoine.tremblay@ericsson.com>

gdb/ChangeLog:

	* arm-tdep.c (arm_fast_tracepoint_valid_at): New function.
	(arm_gdbarch_init): Initialize gdbarch_fast_tracepoint_valid_at.

gdb/gdbserver/ChangeLog:

	* Makefile.in (SFILES): Add arm-insn-reloc.c.
	(arm-core-ipa.o): New rule.
	(arm-with-neon-ipa.o): Likewise.
	(arm-with-vfpv2-ipa.o): Likewise.
	(arm-with-vfpv3-ipa.o): Likewise.
	(linux-arm-ipa.o): Likewise.
	(arm-get-next-pcs.o:): Likewise.
	(arm-insn-reloc.o): Likewise.
	* configure.srv (arm*-*-linux*): Add arm-insn-reloc.o, arm-core-ipa.o,
	arm-with-neon-ipa.o, arm-with-vfpv2-ipa.o, arm-with-vfpv3-ipa.o,
	* linux-aarch32-low.c (enum arm_breakpoint_kinds): Move to...
	* linux-aarch32-low.h (enum arm_breakpoint_kinds): ...here
	* linux-arm-ipa.c: New file.
	* linux-arm-low.c: Include inttypes.h, arch/arm-insn-utils.h,
	arch/arm-insn-emit.h, arch/arm-insn-reloc.h, ax.h.
	(enum arm_linux_tdesc): New enum.
	(append_inferior_memory): New function.
	(append_inferior_memory_32): Likewise.
	(append_inferior_memory_16): Likewise.
	(struct arm_insn_reloc_data): New struct.
	(arm_reloc_others): New function.
	(arm_reloc_alu_imm): Likewise.
	(arm_reloc_alu_reg): Likewise.
	(arm_reloc_alu_shifted_reg): Likewise.
	(arm_reloc_b_bl_blx): Likewise.
	(arm_reloc_block_xfer): Likewise.
	(arm_reloc_bx_blx_reg): Likewise.
	(arm_reloc_copro_load_store): Likwise.
	(arm_reloc_extra_ld_st): Likewise.
	(arm_reloc_ldr_str_ldrb_strb): Likewise.
	(arm_reloc_preload): Likewise.
	(arm_reloc_preload_reg): Likewise.
	(arm_reloc_svc): Likewise.
	(arm_reloc_undef): Likewise.
	(arm_reloc_unpred): Likewise.
	(struct arm_insn_reloc_visitor): New struct.
	(copy_instruction_arm): New function.
	(thumb32_reloc_others): Likewise.
	(thumb32_reloc_alu_imm): Likewise.
	(thumb32_reloc_b_bl_blx): Likewise.
	(thumb32_reloc_block_xfer): Likewise.
	(thumb32_reloc_copro_load_store): Likewise.
	(thumb32_reloc_load_literal): Likewise.
	(thumb32_reloc_load_reg_imm): Likewise.
	(thumb32_reloc_pc_relative_32bit): Likewise.
	(thumb32_reloc_preload): Likewise.
	(thumb32_reloc_undef): Likewise.
	(thumb32_reloc_table_branch): Likewise.
	(struct thumb_32bit_insn_reloc_visitor): New struct.
	(copy_instruction_thumb32): New function.
	(arm_get_thread_area): Likewise.
	(arm_get_min_fast_tracepoint_insn_len): Likewise.
	(arm_install_fast_tracepoint_jump_pad_arm): Likewise.
	(arm_install_fast_tracepoint_jump_pad_thumb2): Likewise.
	(arm_get_ipa_tdesc_idx): Likewise.
	(struct linux_target_ops) <get_thread_area>: Initialize.
	<install_fast_tracepoint_jump_pad>: Likewise.
	<get_min_fast_tracepoint_insn_len>: Likewise.
	<supports_hardware_single_step>: Likewise.
	<get_ipa_tdesc_idx>: Likewise.
	* tracepoint.c (get_ipa_tdesc_idx): New function.
	* tracepoint.h (get_ipa_tdesc_idx): New function delcaration.

gdb/testsuite/ChangeLog:

	* lib/trace-support.exp (gdb_target_supports_fast_trace): Add
	aarch32 targets.
---
 gdb/arm-tdep.c                      |  45 ++
 gdb/gdbserver/Makefile.in           |  21 +-
 gdb/gdbserver/configure.srv         |   5 +
 gdb/gdbserver/linux-aarch32-low.c   |   8 -
 gdb/gdbserver/linux-aarch32-low.h   |   8 +
 gdb/gdbserver/linux-arm-ipa.c       | 217 +++++++++
 gdb/gdbserver/linux-arm-low.c       | 917 +++++++++++++++++++++++++++++++++++-
 gdb/gdbserver/tracepoint.c          |   7 +
 gdb/gdbserver/tracepoint.h          |   1 +
 gdb/testsuite/lib/trace-support.exp |   3 +-
 10 files changed, 1218 insertions(+), 14 deletions(-)
 create mode 100644 gdb/gdbserver/linux-arm-ipa.c
diff mbox

Patch

diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
index 4a4826a..2a83b82 100644
--- a/gdb/arm-tdep.c
+++ b/gdb/arm-tdep.c
@@ -8180,6 +8180,49 @@  arm_code_of_frame_writable (struct gdbarch *gdbarch, struct frame_info *frame)
     return 1;
 }
 
+static int
+arm_fast_tracepoint_valid_at (struct gdbarch *gdbarch,
+			      CORE_ADDR addr, char **msg)
+{
+  /* A branch instruction used for fast tracepoint takes 4 bytes.
+     (A 2 bytes branch instruction only gets us 4k away,
+     so will not be enough.)
+
+     target gdbserver will validate that the relative branch
+     distance will fit in the instructions.
+     (16M for Thumb, 32M for ARM)
+
+     We only allow to replace one instruction. (4 bytes)
+     Replacing 2 instructions is not safe. Consider
+     the case where code wants to jump to the 2nd instruction - it
+     will jump into the middle of a branch instruction.   */
+
+  if (arm_pc_is_thumb (gdbarch, addr))
+    {
+      uint16_t insn
+	= read_memory_unsigned_integer (addr, 2,
+					gdbarch_byte_order_for_code (gdbarch));
+
+      if (thumb_insn_size (insn) == 2)
+	{
+	  if (msg)
+	    *msg = xstrprintf (_ ("; instruction is only 2 bytes long, "
+				  "need 4 bytes for the jump"));
+	  return 0;
+	}
+    }
+  else
+    {
+      /* In ARM mode, all instructions are 4 bytes long, so there is
+         no restriction related to instruction length.  */
+    }
+
+  if (msg)
+    *msg = NULL;
+
+  return 1;
+}
+
 
 /* Initialize the current architecture based on INFO.  If possible,
    re-use an architecture from ARCHES, which is a list of
@@ -8784,6 +8827,8 @@  arm_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
     user_reg_add (gdbarch, arm_register_aliases[i].name,
 		  value_of_arm_user_reg, &arm_register_aliases[i].regnum);
 
+  set_gdbarch_fast_tracepoint_valid_at (gdbarch, arm_fast_tracepoint_valid_at);
+
   return gdbarch;
 }
 
diff --git a/gdb/gdbserver/Makefile.in b/gdb/gdbserver/Makefile.in
index 271f804..217a324 100644
--- a/gdb/gdbserver/Makefile.in
+++ b/gdb/gdbserver/Makefile.in
@@ -186,7 +186,8 @@  SFILES=	$(srcdir)/gdbreplay.c $(srcdir)/inferiors.c $(srcdir)/dll.c \
 	$(srcdir)/common/fileio.c $(srcdir)/nat/linux-namespaces.c \
 	$(srcdir)/arch/arm.c $(srcdir)/common/common-regcache.c \
 	$(srcdir)/arch/arm-linux.c $(srcdir)/arch/arm-get-next-pcs.c \
-	$(srcdir)/arch/arm-insn-emit.c
+	$(srcdir)/arch/arm-insn-emit.c \
+	$(srcdir)/arch/arm-insn-reloc.c
 
 DEPFILES = @GDBSERVER_DEPFILES@
 
@@ -648,6 +649,21 @@  rsp-low-ipa.o: ../common/rsp-low.c
 errors-ipa.o: ../common/errors.c
 	$(IPAGENT_COMPILE) $<
 	$(POSTCOMPILE)
+arm-core-ipa.o: reg-arm.c
+	$(IPAGENT_COMPILE) $<
+	$(POSTCOMPILE)
+arm-with-neon-ipa.o: arm-with-neon.c
+	$(IPAGENT_COMPILE) $<
+	$(POSTCOMPILE)
+arm-with-vfpv2-ipa.o: arm-with-vfpv2.c
+	$(IPAGENT_COMPILE) $<
+	$(POSTCOMPILE)
+arm-with-vfpv3-ipa.o: arm-with-vfpv3.c
+	$(IPAGENT_COMPILE) $<
+	$(POSTCOMPILE)
+linux-arm-ipa.o: linux-arm-ipa.c
+	$(IPAGENT_COMPILE) $<
+	$(POSTCOMPILE)
 
 ax.o: ax.c
 	$(COMPILE) $(WARN_CFLAGS_NO_FORMAT) $<
@@ -731,6 +747,9 @@  arm-get-next-pcs.o: ../arch/arm-get-next-pcs.c
 arm-insn-emit.o: ../arch/arm-insn-emit.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
+arm-insn-reloc.o: ../arch/arm-insn-reloc.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
 
 # Native object files rules from ../nat
 
diff --git a/gdb/gdbserver/configure.srv b/gdb/gdbserver/configure.srv
index 34d4be8..0da536f 100644
--- a/gdb/gdbserver/configure.srv
+++ b/gdb/gdbserver/configure.srv
@@ -78,6 +78,7 @@  case "${target}" in
 			srv_tgtobj="${srv_tgtobj} arm-get-next-pcs.o"
 			srv_tgtobj="${srv_tgtobj} arm-insn-emit.o"
 			srv_tgtobj="${srv_tgtobj} arm-insn-utils.o"
+			srv_tgtobj="${srv_tgtobj} arm-insn-reloc.o"
 			srv_xmlfiles="arm-with-iwmmxt.xml"
 			srv_xmlfiles="${srv_xmlfiles} arm-with-vfpv2.xml"
 			srv_xmlfiles="${srv_xmlfiles} arm-with-vfpv3.xml"
@@ -89,6 +90,10 @@  case "${target}" in
 			srv_linux_usrregs=yes
 			srv_linux_regsets=yes
 			srv_linux_thread_db=yes
+			ipa_obj="arm-core-ipa.o arm-with-neon-ipa.o"
+			ipa_obj="${ipa_obj} arm-with-vfpv2-ipa.o"
+			ipa_obj="${ipa_obj} arm-with-vfpv3-ipa.o"
+			ipa_obj="${ipa_obj} linux-arm-ipa.o"
 			;;
   arm*-*-mingw32ce*)	srv_regobj=reg-arm.o
 			srv_tgtobj="win32-low.o win32-arm-low.o"
diff --git a/gdb/gdbserver/linux-aarch32-low.c b/gdb/gdbserver/linux-aarch32-low.c
index e6971d5..ffc7ace 100644
--- a/gdb/gdbserver/linux-aarch32-low.c
+++ b/gdb/gdbserver/linux-aarch32-low.c
@@ -215,14 +215,6 @@  arm_breakpoint_at (CORE_ADDR where)
   return 0;
 }
 
-/* Enum describing the different kinds of breakpoints.  */
-enum arm_breakpoint_kinds
-{
-   ARM_BP_KIND_THUMB = 2,
-   ARM_BP_KIND_THUMB2 = 3,
-   ARM_BP_KIND_ARM = 4,
-};
-
 /* Implementation of linux_target_ops method "breakpoint_kind_from_pc".
 
    Determine the type and size of breakpoint to insert at PCPTR.  Uses the
diff --git a/gdb/gdbserver/linux-aarch32-low.h b/gdb/gdbserver/linux-aarch32-low.h
index 434a523..ac14ef0 100644
--- a/gdb/gdbserver/linux-aarch32-low.h
+++ b/gdb/gdbserver/linux-aarch32-low.h
@@ -15,6 +15,14 @@ 
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
+/* Enum describing the different kinds of breakpoints.  */
+enum arm_breakpoint_kinds
+{
+  ARM_BP_KIND_THUMB = 2,
+  ARM_BP_KIND_THUMB2 = 3,
+  ARM_BP_KIND_ARM = 4,
+};
+
 extern struct regs_info regs_info_aarch32;
 
 void arm_fill_gregset (struct regcache *regcache, void *buf);
diff --git a/gdb/gdbserver/linux-arm-ipa.c b/gdb/gdbserver/linux-arm-ipa.c
new file mode 100644
index 0000000..d684b78
--- /dev/null
+++ b/gdb/gdbserver/linux-arm-ipa.c
@@ -0,0 +1,217 @@ 
+/* GNU/Linux/arm specific low level interface, for the in-process
+   agent library for GDB.
+
+   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "server.h"
+#include <stdint.h>
+#include <sys/mman.h>
+#include "tracepoint.h"
+#include <sys/auxv.h>
+
+/* ARM GNU/Linux HWCAP values.  These are in defined in
+   <asm/elf.h> in current kernels.  */
+#define HWCAP_VFP       64
+#define HWCAP_IWMMXT    512
+#define HWCAP_NEON      4096
+#define HWCAP_VFPv3     8192
+#define HWCAP_VFPv3D16  16384
+
+/* Target description indexes for the IPA.  */
+enum arm_linux_tdesc
+  {
+    ARM_TDESC_ARM = 0,
+    ARM_TDESC_ARM_WITH_VFPV2 = 1,
+    ARM_TDESC_ARM_WITH_VFPV3 = 2,
+    ARM_TDESC_ARM_WITH_NEON = 3,
+  };
+
+/* Defined in auto-generated file regs-arm.c.  */
+void init_registers_arm (void);
+extern const struct target_desc *tdesc_arm;
+
+void init_registers_arm_with_vfpv2 (void);
+extern const struct target_desc *tdesc_arm_with_vfpv2;
+
+void init_registers_arm_with_vfpv3 (void);
+extern const struct target_desc *tdesc_arm_with_vfpv3;
+
+void init_registers_arm_with_neon (void);
+extern const struct target_desc *tdesc_arm_with_neon;
+
+/* 32 bits GPR registers.  */
+#define GPR_SIZE 4
+/* 64 bits FPR registers.  */
+#define FPR_SIZE 8
+
+/* Special registers mappings.  */
+#define FT_CR_PC	0
+#define FT_CR_CPSR	1 * GPR_SIZE
+#define FT_CR_LR	15 * GPR_SIZE
+#define FT_CR_GPR_0	2 * GPR_SIZE
+#define FT_CR_FPR_0	FT_CR_LR + GPR_SIZE
+#define FT_CR_GPR(n)	(FT_CR_GPR_0 + (n * GPR_SIZE))
+#define FT_CR_FPR(n)	(FT_CR_FPR_0 + (n * FPR_SIZE))
+#define FT_CR_UNAVAIL	-1
+
+/* Mapping between registers collected by the jump pad and GDB's register
+   array layout used by regcache for arm core registers.
+
+   See linux-arm-low.c (arm_install_fast_tracepoint_jump_pad) for
+   more details.  */
+
+static const int arm_core_ft_collect_regmap[] = {
+  FT_CR_GPR (0),  FT_CR_GPR (1),  FT_CR_GPR (2), FT_CR_GPR (3), FT_CR_GPR (4),
+  FT_CR_GPR (5),  FT_CR_GPR (6),  FT_CR_GPR (7), FT_CR_GPR (8), FT_CR_GPR (9),
+  FT_CR_GPR (10), FT_CR_GPR (11), FT_CR_GPR (12),
+  /* SP is calculated rather than collected.  */
+  FT_CR_UNAVAIL,
+  FT_CR_LR, FT_CR_PC,
+  /* Legacy FPA Registers. 16 to 24.  */
+  FT_CR_UNAVAIL, FT_CR_UNAVAIL, FT_CR_UNAVAIL, FT_CR_UNAVAIL, FT_CR_UNAVAIL,
+  FT_CR_UNAVAIL, FT_CR_UNAVAIL, FT_CR_UNAVAIL, FT_CR_UNAVAIL,
+  FT_CR_CPSR,
+};
+
+/* Mapping for VFPv2 registers.  */
+static const int arm_vfpv2_ft_collect_regmap[] = {
+  FT_CR_FPR (0),  FT_CR_FPR (1),  FT_CR_FPR (2),  FT_CR_FPR (3), FT_CR_FPR (4),
+  FT_CR_FPR (5),  FT_CR_FPR (6),  FT_CR_FPR (7),  FT_CR_FPR (8), FT_CR_FPR (9),
+  FT_CR_FPR (10), FT_CR_FPR (11), FT_CR_FPR (12), FT_CR_FPR (13),
+  FT_CR_FPR (14), FT_CR_FPR (15),
+};
+
+/* Mapping for VFPv3 registers.  */
+static const int arm_vfpv3_ft_collect_regmap[] = {
+  FT_CR_FPR (0),  FT_CR_FPR (1),  FT_CR_FPR (2),  FT_CR_FPR (3),  FT_CR_FPR (4),
+  FT_CR_FPR (5),  FT_CR_FPR (6),  FT_CR_FPR (7),  FT_CR_FPR (8),  FT_CR_FPR (9),
+  FT_CR_FPR (10), FT_CR_FPR (11), FT_CR_FPR (12), FT_CR_FPR (13),
+  FT_CR_FPR (14), FT_CR_FPR (15), FT_CR_FPR (16), FT_CR_FPR (17),
+  FT_CR_FPR (18), FT_CR_FPR (19), FT_CR_FPR (20), FT_CR_FPR (21),
+  FT_CR_FPR (22), FT_CR_FPR (23), FT_CR_FPR (24), FT_CR_FPR (25),
+  FT_CR_FPR (26), FT_CR_FPR (27), FT_CR_FPR (28), FT_CR_FPR (29),
+  FT_CR_FPR (30), FT_CR_FPR (31),
+};
+
+#define ARM_CORE_NUM_FT_COLLECT_REGS \
+  (sizeof(arm_core_ft_collect_regmap) / sizeof(arm_core_ft_collect_regmap[0]))
+
+#define ARM_VFPV2_NUM_FT_COLLECT_REGS \
+  (sizeof(arm_vfpv2_ft_collect_regmap) / sizeof(arm_vfpv2_ft_collect_regmap[0]))
+
+#define ARM_VFPV3_NUM_FT_COLLECT_REGS \
+  (sizeof(arm_vfpv3_ft_collect_regmap) / sizeof(arm_vfpv3_ft_collect_regmap[0]))
+
+void
+supply_fast_tracepoint_registers (struct regcache *regcache,
+				  const unsigned char *buf)
+{
+  int i;
+  uint32_t val = 0;
+  /* Number of extention registers collected.  */
+  int num_ext_regs = 0;
+
+  for (i = 0; i < ARM_CORE_NUM_FT_COLLECT_REGS; i++)
+    {
+      int index = arm_core_ft_collect_regmap[i];
+      if (index != FT_CR_UNAVAIL)
+	supply_register (regcache, i,
+			 (char *) buf + arm_core_ft_collect_regmap[i]);
+    }
+  if (get_ipa_tdesc (get_ipa_tdesc_idx ()) == tdesc_arm_with_neon
+      || get_ipa_tdesc (get_ipa_tdesc_idx ()) == tdesc_arm_with_vfpv3)
+    {
+      num_ext_regs = ARM_VFPV3_NUM_FT_COLLECT_REGS;
+
+      for (i = 0; i < ARM_VFPV3_NUM_FT_COLLECT_REGS; i++)
+	supply_register (regcache, i + ARM_CORE_NUM_FT_COLLECT_REGS,
+			 (char *) buf + arm_vfpv3_ft_collect_regmap[i]);
+    }
+  else if (get_ipa_tdesc (get_ipa_tdesc_idx ()) == tdesc_arm_with_vfpv2)
+    {
+      num_ext_regs = ARM_VFPV2_NUM_FT_COLLECT_REGS;
+
+      for (i = 0; i < ARM_VFPV2_NUM_FT_COLLECT_REGS; i++)
+	supply_register (regcache, i + ARM_CORE_NUM_FT_COLLECT_REGS,
+			 (char *) buf + arm_vfpv2_ft_collect_regmap[i]);
+    }
+
+  /* SP calculation from stack layout.  */
+  val = (uint32_t) buf + 16 * 4 + num_ext_regs * 8;
+  supply_register (regcache, 13, &val);
+}
+
+ULONGEST
+get_raw_reg (const unsigned char *raw_regs, int regnum)
+{
+  /* Used for JIT conditions.  */
+  return 0;
+}
+
+const char *gdbserver_xmltarget;
+
+const struct target_desc *
+get_ipa_tdesc (int idx)
+{
+  switch (idx)
+    {
+    case ARM_TDESC_ARM:
+      return tdesc_arm;
+    case ARM_TDESC_ARM_WITH_NEON:
+      return tdesc_arm_with_neon;
+    case ARM_TDESC_ARM_WITH_VFPV2:
+      return tdesc_arm_with_vfpv2;
+    case ARM_TDESC_ARM_WITH_VFPV3:
+      return tdesc_arm_with_vfpv3;
+    default:
+      internal_error (__FILE__, __LINE__,
+		      "unknown ipa tdesc index: %d", idx);
+      return tdesc_arm;
+    }
+}
+
+void *
+alloc_jump_pad_buffer (size_t size)
+{
+  uintptr_t addr;
+  void *res = NULL;
+
+  /* Allocate scratch buffer aligned on a page boundary, at a low
+   address (close to the main executable's code).  */
+  for (addr = size; addr != 0; addr += size)
+    {
+      res = (char *) mmap ((void *) addr, size,
+			   PROT_READ | PROT_WRITE | PROT_EXEC,
+			   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+      if (res == (void *) addr)
+	break;
+      if (res != MAP_FAILED)
+	munmap (res, size);
+    }
+  return res;
+}
+
+void
+initialize_low_tracepoint (void)
+{
+  /* Initialize the Linux target descriptions.  */
+  init_registers_arm ();
+  init_registers_arm_with_vfpv2 ();
+  init_registers_arm_with_vfpv3 ();
+  init_registers_arm_with_neon ();
+}
diff --git a/gdb/gdbserver/linux-arm-low.c b/gdb/gdbserver/linux-arm-low.c
index c927ad8..5294f00 100644
--- a/gdb/gdbserver/linux-arm-low.c
+++ b/gdb/gdbserver/linux-arm-low.c
@@ -17,10 +17,14 @@ 
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include "server.h"
+#include <inttypes.h>
 #include "linux-low.h"
 #include "arch/arm.h"
 #include "arch/arm-linux.h"
 #include "arch/arm-get-next-pcs.h"
+#include "arch/arm-insn-utils.h"
+#include "arch/arm-insn-emit.h"
+#include "arch/arm-insn-reloc.h"
 #include "linux-aarch32-low.h"
 
 #include <sys/uio.h>
@@ -34,6 +38,7 @@ 
 #include <sys/syscall.h>
 
 #include "tracepoint.h"
+#include "ax.h"
 
 /* Defined in auto-generated files.  */
 void init_registers_arm (void);
@@ -67,6 +72,15 @@  extern const struct target_desc *tdesc_arm_with_vfpv3;
 #define PTRACE_SETHBPREGS 30
 #endif
 
+/* Target description indexes for the IPA.  */
+enum arm_linux_tdesc
+  {
+    ARM_TDESC_ARM = 0,
+    ARM_TDESC_ARM_WITH_VFPV2 = 1,
+    ARM_TDESC_ARM_WITH_VFPV3 = 2,
+    ARM_TDESC_ARM_WITH_NEON = 3,
+  };
+
 /* Information describing the hardware breakpoint capabilities.  */
 static struct
 {
@@ -1042,6 +1056,900 @@  arm_supports_tracepoints (void)
   return 1;
 }
 
+static int
+append_inferior_memory (CORE_ADDR *to, size_t len, const unsigned char *buf)
+{
+  if (write_inferior_memory (*to, buf, len) != 0)
+    return 1;
+
+  *to += len;
+
+  return 0;
+}
+
+static int
+append_inferior_memory_32 (CORE_ADDR *to, uint32_t val)
+{
+  return append_inferior_memory (to, 4, (unsigned char *) &val);
+}
+
+static int
+append_inferior_memory_16 (CORE_ADDR *to, uint16_t val)
+{
+  return append_inferior_memory (to, 2, (unsigned char *) &val);
+}
+
+struct arm_insn_reloc_data
+{
+  union
+  {
+    uint32_t arm;
+    uint16_t thumb32[2];
+  } insns;
+
+  /* Error message to return to the client in case the relocation fails.  */
+  const char *err;
+};
+
+static int
+arm_reloc_others (uint32_t insn, const char *iname,
+		  struct arm_insn_reloc_data *data)
+{
+  data->insns.arm = insn;
+
+  return 0;
+}
+
+static int
+arm_reloc_alu_imm (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_alu_reg (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_alu_shifted_reg (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_b_bl_blx (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_block_xfer (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_bx_blx_reg (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_copro_load_store (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_extra_ld_st (uint32_t insn, struct arm_insn_reloc_data *data,
+		       int unprivileged)
+{
+  return 1;
+}
+
+static int
+arm_reloc_ldr_str_ldrb_strb (uint32_t insn, struct arm_insn_reloc_data *data,
+			     int load, int size, int usermode)
+{
+  return 1;
+}
+
+static int
+arm_reloc_preload (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_preload_reg (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_svc (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_undef (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+arm_reloc_unpred (uint32_t insn, struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+struct arm_insn_reloc_visitor arm_insn_reloc_visitor = {
+  arm_reloc_alu_imm,
+  arm_reloc_alu_reg,
+  arm_reloc_alu_shifted_reg,
+  arm_reloc_b_bl_blx,
+  arm_reloc_block_xfer,
+  arm_reloc_bx_blx_reg,
+  arm_reloc_copro_load_store,
+  arm_reloc_extra_ld_st,
+  arm_reloc_ldr_str_ldrb_strb,
+  arm_reloc_others,
+  arm_reloc_preload,
+  arm_reloc_preload_reg,
+  arm_reloc_svc,
+  arm_reloc_undef,
+  arm_reloc_unpred,
+};
+
+static int
+copy_instruction_arm (CORE_ADDR *to, CORE_ADDR from, const char **err)
+{
+  uint32_t insn;
+  struct arm_insn_reloc_data data;
+  int ret;
+
+  if (read_inferior_memory (from, (unsigned char *) &insn, sizeof (insn)) != 0)
+    {
+      *err = "Error reading memory while relocating instruction.";
+      return 1;
+    }
+
+  /* Set a default generic error message, which can be overridden by the
+     relocation functions.  */
+  data.err = "Error relocating instruction.";
+
+  ret = arm_relocate_insn (insn, &arm_insn_reloc_visitor, &data);
+  if (ret != 0)
+    {
+      *err = data.err;
+      return 1;
+    }
+
+  append_inferior_memory_32 (to, data.insns.arm);
+
+  return 0;
+}
+
+static int
+thumb32_reloc_others (uint16_t insn1, uint16_t insn2, const char *iname,
+		      struct arm_insn_reloc_data *data)
+{
+  data->insns.thumb32[0] = insn1;
+  data->insns.thumb32[1] = insn2;
+
+  return 0;
+}
+
+static int
+thumb32_reloc_alu_imm (uint16_t insn1, uint16_t insn2,
+		       struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_b_bl_blx (uint16_t insn1, uint16_t insn2,
+			struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_block_xfer (uint16_t insn1, uint16_t insn2,
+			  struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_copro_load_store (uint16_t insn1, uint16_t insn2,
+				struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_load_literal (uint16_t insn1, uint16_t insn2,
+			    struct arm_insn_reloc_data *data, int size)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_load_reg_imm (uint16_t insn1, uint16_t insn2,
+			    struct arm_insn_reloc_data *data, int writeback,
+			    int immed)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_pc_relative_32bit (uint16_t insn1, uint16_t insn2,
+				 struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_preload (uint16_t insn1, uint16_t insn2,
+		       struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_undef (uint16_t insn1, uint16_t insn2,
+		     struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+static int
+thumb32_reloc_table_branch (uint16_t insn1, uint16_t insn2,
+			    struct arm_insn_reloc_data *data)
+{
+  return 1;
+}
+
+
+struct thumb_32bit_insn_reloc_visitor thumb_32bit_insns_reloc_visitor = {
+  thumb32_reloc_alu_imm,
+  thumb32_reloc_b_bl_blx,
+  thumb32_reloc_block_xfer,
+  thumb32_reloc_copro_load_store,
+  thumb32_reloc_load_literal,
+  thumb32_reloc_load_reg_imm,
+  thumb32_reloc_others,
+  thumb32_reloc_pc_relative_32bit,
+  thumb32_reloc_preload,
+  thumb32_reloc_undef,
+  thumb32_reloc_table_branch,
+};
+
+static int
+copy_instruction_thumb32 (CORE_ADDR *to, CORE_ADDR from, const char **err)
+{
+  uint16_t insn1, insn2;
+  struct arm_insn_reloc_data data;
+  int ret;
+
+  if (read_inferior_memory (from, (unsigned char *) &insn1, sizeof (insn1))
+      != 0)
+    {
+      *err = "Error reading memory while relocating instruction.";
+      return 1;
+    }
+
+  if (read_inferior_memory (from + sizeof (insn1), (unsigned char *) &insn2,
+			    sizeof (insn2)) != 0)
+    {
+      *err = "Error reading memory while relocating instruction.";
+      return 1;
+    }
+
+  /* Set a default generic error message, which can be overridden by the
+     relocation functions.  */
+  data.err = "Error relocating instruction.";
+
+  ret = thumb_32bit_relocate_insn (insn1, insn2,
+				   &thumb_32bit_insns_reloc_visitor, &data);
+  if (ret != 0)
+    {
+      *err = data.err;
+      return 1;
+    }
+
+  append_inferior_memory_16 (to, data.insns.thumb32[0]);
+  append_inferior_memory_16 (to, data.insns.thumb32[1]);
+
+  return 0;
+}
+
+static int
+arm_get_thread_area (int lwpid, CORE_ADDR *addr)
+{
+  uint32_t val;
+
+  if (ptrace (PTRACE_GET_THREAD_AREA, lwpid, NULL, &val) != 0)
+    return -1;
+
+  *addr = val;
+  return 0;
+}
+
+static int
+arm_get_min_fast_tracepoint_insn_len (void)
+{
+  return 4;
+}
+
+/* Core register numbers used in fast tracepoints code.  */
+static const int r0 = 0;
+static const int r1 = 1;
+static const int r2 = 2;
+static const int r3 = 3;
+static const int r4 = 4;
+static const int r5 = 5;
+static const int sp = 13;
+static const int lr = 14;
+
+static int
+arm_install_fast_tracepoint_jump_pad_arm (struct tracepoint *tp,
+					  CORE_ADDR collector,
+					  CORE_ADDR lockaddr,
+					  CORE_ADDR *jump_entry,
+					  CORE_ADDR *trampoline,
+					  ULONGEST *trampoline_size,
+					  unsigned char *jjump_pad_insn,
+					  ULONGEST *jjump_pad_insn_size,
+					  char *err)
+{
+  unsigned char buf[0x100];
+  CORE_ADDR buildaddr = *jump_entry;
+  const struct target_desc *tdesc = current_process ()->tdesc;
+  const uint32_t kuser_get_tls = 0xffff0fe0;
+  uint32_t *ptr = (uint32_t *) buf;
+  const char *copy_insn_err;
+
+  /* Push VFP registers if available.  */
+  if (tdesc == tdesc_arm_with_neon || tdesc == tdesc_arm_with_vfpv3)
+    {
+      /* vpush {d0-d15} */
+      ptr += arm_emit_arm_vpush (ptr, INST_AL, 0, 16);
+
+      /* vpush {d16-d31} */
+      ptr += arm_emit_arm_vpush (ptr, INST_AL, 16, 16);
+    }
+  else if (tdesc == tdesc_arm_with_vfpv2)
+    /* vpush {d0-d15} */
+    ptr += arm_emit_arm_vpush (ptr, INST_AL, 0, 16);
+
+  /* Function prologue, push common registers on the stack.
+     push { r0-r12,lr }  */
+  ptr
+    += arm_emit_arm_push_list (ptr, INST_AL,
+			       encode_register_list (0, 13, ENCODE (1, 1, 14)));
+
+  /* Push current processor state register (CPSR) on the stack.  */
+  ptr += arm_emit_arm_mrs (ptr, INST_AL, r0);
+
+  /* push r0 */
+  ptr += arm_emit_arm_push_one (ptr, INST_AL, r0);
+
+  /* Push replaced instruction address on the stack.  */
+  ptr += arm_emit_arm_mov_32 (ptr, r0, (uint32_t) tp->address);
+  /* push r0 (orig pc)  */
+  ptr += arm_emit_arm_push_one (ptr, INST_AL, r0);
+
+  /* Save current stack pointer for the REGS parameters of the gdb_collect
+     call later. */
+  ptr += arm_emit_arm_mov (ptr, INST_AL, r1, register_operand (sp));
+
+  /* Push current thread's local storage location on the stack.  */
+  ptr += arm_emit_arm_mov_32 (ptr, r0, kuser_get_tls);
+  ptr += arm_emit_arm_blx (ptr, INST_AL, register_operand (r0));
+  /* push r0 (tls)  */
+  ptr += arm_emit_arm_push_one (ptr, INST_AL, r0);
+
+  /* Push obj_addr_on_target on the stack.  */
+  ptr += arm_emit_arm_mov_32 (ptr, r0, (uint32_t) tp->obj_addr_on_target);
+  /* push r0 (tpoint:arg1)  */
+  ptr += arm_emit_arm_push_one (ptr, INST_AL, r0);
+
+  /* Move collector function address to r2.  */
+  ptr += arm_emit_arm_mov_32 (ptr, r2, (uint32_t) collector);
+
+  /* Move lock address to r4.  */
+  ptr += arm_emit_arm_mov_32 (ptr, r4, (uint32_t) lockaddr);
+
+  /*
+   * At this point, the stack looks like:
+   *           bottom
+   * +-------------------------------------------------+
+   * |  saved lr                                       |
+   * |  saved r12                                      |
+   * |  ...                                            |
+   * |  saved r0                                       |
+   * |  saved cpsr                                     |
+   * |  tp->address                                    | <- r1
+   * |  tls  (collecting_t.thread_area)                |
+   * |  tp->obj_addr_on_target  (collecting_t.tpoint)  | <- r5
+   * +-------------------------------------------------+
+   *            top
+   */
+
+  /* Save current sp value, so we can restore it after the call to
+     gdb_collect.  */
+  /* mov r5, sp  */
+  ptr += arm_emit_arm_mov (ptr, INST_AL, r5, register_operand (sp));
+
+  /* Spin lock on lockaddr (r4 contains address of lock) */
+
+  /* This is a full memory barrier.
+     1: dmb sy (memory barrier)  */
+  ptr += arm_emit_arm_dmb (ptr);
+  /* Load lock value in r3
+     2: ldrex r3, [r4]  */
+  ptr += arm_emit_arm_ldrex (ptr, INST_AL, r3, r4);
+
+  /* Is it already locked?
+     cmp r3, #0  */
+  ptr += arm_emit_arm_cmp (ptr, INST_AL, r3, immediate_operand (0));
+
+  /* If so, start over.
+     bne 3  */
+  ptr += arm_emit_arm_b (ptr, INST_NE, arm_arm_branch_adjusted_offset (16));
+
+  /* If not, write a value (our saved stack pointer in r5) to the location.
+     strex lr, r5, [r4]  */
+  ptr += arm_emit_arm_strex (ptr, INST_AL, lr, r5, r4);
+
+  /* Did the write succeed?
+     cmp lr, #0  */
+  ptr += arm_emit_arm_cmp (ptr, INST_AL, lr, immediate_operand (0));
+
+  /* If not, start over.
+     bne 2  */
+  ptr += arm_emit_arm_b (ptr, INST_NE, arm_arm_branch_adjusted_offset (-20));
+
+  /* A full memory barrier again.  */
+  ptr += arm_emit_arm_dmb (ptr);
+  /* bne 1  */
+  ptr += arm_emit_arm_b (ptr, INST_NE, arm_arm_branch_adjusted_offset (-32));
+
+  /* Round the stack to a multiple of 8 (section 5.2.1.2)
+     bic r3, r5, 7  */
+  ptr += arm_emit_arm_bic (ptr, INST_AL, r3, r5, immediate_operand (7));
+
+  /* mov sp, r3  */
+  ptr += arm_emit_arm_mov (ptr, INST_AL, sp, register_operand (r3));
+
+  /* Call collector (obj_addr_on_target, regs);
+	  r2 -^      r0 -^             r1 -^  */
+  ptr += arm_emit_arm_blx (ptr, INST_AL, register_operand (r2));
+
+  /* Restore sp to pre-call/rounding value.
+     mov sp, r5  */
+  ptr += arm_emit_arm_mov (ptr, INST_AL, sp, register_operand (r5));
+
+  /* Unlock the spin lock (by writing 0 to it).  */
+  ptr += arm_emit_arm_mov (ptr, INST_AL, r3, immediate_operand (0));
+
+  /* str r3, [r4]  */
+  ptr += arm_emit_arm_str (ptr, INST_AL, r3, r4,
+			   memory_operand (offset_memory_operand (0)));
+
+  /* Pop everything that was saved. */
+
+  /* tpoint, tls, tpaddr
+     add sp, sp, #12  */
+  ptr += arm_emit_arm_add (ptr, INST_AL, 0, sp, sp, immediate_operand (12));
+
+  /* cpsr
+     pop r0  */
+  ptr += arm_emit_arm_pop_one (ptr, INST_AL, r0);
+
+  /* msr cpsr,r0  */
+  ptr += arm_emit_arm_msr (ptr, INST_AL, r0);
+
+  /* r0-r12 and lr
+     pop { r0-r12,lr }  */
+  ptr += arm_emit_arm_pop_list (ptr, INST_AL,
+				encode_register_list (0, 13,
+						      ENCODE (1, 1, 14)));
+
+  /* Pop VFP registers.  */
+  if (tdesc == tdesc_arm_with_neon || tdesc == tdesc_arm_with_vfpv3)
+    {
+      /* vpop {d16-d31} */
+      ptr += arm_emit_arm_vpop (ptr, INST_AL, 16, 16);
+
+      /* vpop {d0-d15} */
+      ptr += arm_emit_arm_vpop (ptr, INST_AL, 0, 16);
+    }
+  else if (tdesc == tdesc_arm_with_vfpv2)
+    /* vpop {d0-d15} */
+    ptr += arm_emit_arm_vpop (ptr, INST_AL, 0, 16);
+
+  append_inferior_memory (&buildaddr, (uint32_t) ptr - (uint32_t) buf, buf);
+
+  tp->adjusted_insn_addr = buildaddr;
+  if (copy_instruction_arm (&buildaddr, tp->address, &copy_insn_err) != 0)
+    {
+      sprintf (err, "E%s", copy_insn_err);
+      return 1;
+    }
+  tp->adjusted_insn_addr = buildaddr;
+
+  /* Possible improvements:
+   This branch can be made non-relative:
+   B <mem location>:
+   push    {r0,r1}
+   movw    r0, #<mem location>
+   movt    r0, #<mem location>
+   str     r0, [sp, #4]
+   pop     {r0,pc}  */
+  if (!arm_arm_is_reachable (buildaddr, tp->address + 4))
+    {
+      sprintf (err,
+	       "EJump back from jump pad too far from tracepoint "
+	       "(offset 0x%" PRIx32 " cannot be encoded in 24 bits).",
+	       arm_arm_branch_relative_distance (buildaddr, tp->address + 4));
+      return 1;
+    }
+  /* b <tp_addr + 4>  */
+  arm_emit_arm_b ((uint32_t *) buf, INST_AL,
+		  arm_arm_branch_relative_distance (buildaddr,
+						    tp->address + 4));
+  append_inferior_memory (&buildaddr, 4, buf);
+
+  /* write tp instr.  */
+  if (!arm_arm_is_reachable (tp->address, *jump_entry))
+    {
+      sprintf (err,
+	       "EJump pad too far from tracepoint "
+	       "(offset 0x%" PRIx32 " cannot be encoded in 24 bits).",
+	       arm_arm_branch_relative_distance (tp->address, *jump_entry));
+      return 1;
+    }
+
+  arm_emit_arm_b ((uint32_t *) jjump_pad_insn, INST_AL,
+		  arm_arm_branch_relative_distance (tp->address, *jump_entry));
+
+  *jjump_pad_insn_size = 4;
+  *jump_entry = buildaddr;
+
+  return 0;
+}
+
+static int
+arm_install_fast_tracepoint_jump_pad_thumb2 (struct tracepoint *tp,
+					     CORE_ADDR collector,
+					     CORE_ADDR lockaddr,
+					     CORE_ADDR *jump_entry,
+					     CORE_ADDR *trampoline,
+					     ULONGEST *trampoline_size,
+					     unsigned char *jjump_pad_insn,
+					     ULONGEST *jjump_pad_insn_size,
+					     char *err)
+{
+  unsigned char buf[0x100];
+  CORE_ADDR buildaddr = *jump_entry;
+  const struct target_desc *tdesc = current_process ()->tdesc;
+  const uint32_t kuser_get_tls = 0xffff0fe0;
+  uint16_t *ptr = (uint16_t *) buf;
+  const char *copy_insn_err;
+
+  /* Push VFP registers if available.  */
+  if (tdesc == tdesc_arm_with_neon || tdesc == tdesc_arm_with_vfpv3)
+    {
+      /* vpush {d0-d15} */
+      ptr += arm_emit_thumb_vpush (ptr, 0, 16);
+      /* vpush {d16-d31} */
+      ptr += arm_emit_thumb_vpush (ptr, 16, 16);
+    }
+  else if (tdesc == tdesc_arm_with_vfpv2)
+    /* vpush {d0-d15} */
+    ptr += arm_emit_thumb_vpush (ptr, 0, 16);
+
+  /* Function prologue, push common registers on the stack.
+     push { r0-r12,lr }  */
+  ptr += arm_emit_thumb_push_list (ptr, encode_register_list (0, 13, 0), 1);
+
+  /* Push current processor state register (CPSR) on the stack.  */
+  ptr += arm_emit_thumb_mrs (ptr, r0);
+
+  /* push r0  */
+  ptr += arm_emit_thumb_push_one (ptr, ENCODE (1, 1, 0), 0);
+
+  /* Push replaced instruction address on the stack.  */
+  ptr += arm_emit_thumb_mov_32 (ptr, r0, (uint32_t) tp->address);
+
+  /* push r0 (orig pc)  */
+  ptr += arm_emit_thumb_push_one (ptr, ENCODE (1, 1, 0), 0);
+
+  /* Save current stack pointer for the REGS parameters of the gdb_collect
+     call later.
+     mov r1, sp (regs:arg2)  */
+  ptr += arm_emit_thumb_mov (ptr, r1, register_operand (sp));
+
+  /* Push current thread's local storage location on the stack.  */
+  ptr += arm_emit_thumb_mov_32 (ptr, r0, kuser_get_tls);
+  ptr += arm_emit_thumb_blx (ptr, register_operand (r0));
+  /* push r0 (tls)  */
+  ptr += arm_emit_thumb_push_one (ptr, ENCODE (1, 1, 0), 0);
+
+  /* Push obj_addr_on_target on the stack.  */
+  ptr += arm_emit_thumb_mov_32 (ptr, r0,
+				     (uint32_t) tp->obj_addr_on_target);
+  /* push r0 (tpoint:arg1)  */
+  ptr += arm_emit_thumb_push_one (ptr, ENCODE (1, 1, 0), 0);
+
+  /* Move collector function address to r2.  */
+  ptr += arm_emit_thumb_mov_32 (ptr, r2, (uint32_t) collector);
+
+  /* Move lock address to r4.  */
+  ptr += arm_emit_thumb_mov_32 (ptr, r4, (uint32_t) lockaddr);
+
+  /*
+   * At this point, the stack looks like:
+   *           bottom
+   * +-------------------------------------------------+
+   * |  saved lr                                       |
+   * |  saved r12                                      |
+   * |  ...                                            |
+   * |  saved r0                                       |
+   * |  saved cpsr                                     |
+   * |  tp->address                                    | <- r1
+   * |  tls  (collecting_t.thread_area)                |
+   * |  tp->obj_addr_on_target  (collecting_t.tpoint)  | <- r5
+   * +-------------------------------------------------+
+   *            top
+   */
+
+  /* Save current sp value, so we can restore it after the call to
+     gdb_collect.
+     mov r5, sp  */
+  ptr += arm_emit_thumb_mov (ptr, r5, register_operand (sp));
+
+  /* Spin lock on lockaddr (r4 contains address of lock) */
+
+  /* This is a full memory barrier.
+     1: dmb sy  */
+  ptr += arm_emit_thumb_dmb (ptr);
+
+  /* Load lock value in r3
+     2: ldrex   r3, [r4]  */
+  ptr += arm_emit_thumb_ldrex (ptr, r3, r4, immediate_operand (0));
+
+  /* Is it already locked?
+     cmp r3, #0  */
+  ptr += arm_emit_thumb_cmp (ptr, r3, immediate_operand (0));
+
+  /* If so, start over
+     bne.n   3	 */
+  ptr += arm_emit_thumb_b (ptr, INST_NE, arm_thumb_branch_adjusted_offset (12));
+
+  /* If not, write a value (our saved stack pointer in r5) to the location.
+     strex   r14, r5, [r4]  */
+  ptr += arm_emit_thumb_strex (ptr, lr, r5, r4, immediate_operand (0));
+
+  /* Did the write succeed?
+     cmp.w   r14, #0  */
+  ptr += arm_emit_thumb_cmpw (ptr, lr, immediate_operand (0));
+
+  /* If not, start over.
+     bne.n  2  */
+  ptr += arm_emit_thumb_b (ptr, INST_NE,
+			   arm_thumb_branch_adjusted_offset (-16));
+
+  /* A full memory barrier again.
+     3. dmb  sy  */
+  ptr += arm_emit_thumb_dmb (ptr);
+
+  /* bne.n  1  */
+  ptr += arm_emit_thumb_b (ptr, INST_NE,
+			   arm_thumb_branch_adjusted_offset (-26));
+
+  /* Round the stack to a multiple of 8 (section 5.2.1.2)
+     bic r3, r5, 7  */
+  ptr += arm_emit_thumb_bic (ptr, r3, r5, immediate_operand (7));
+
+  /* mov sp, r3  */
+  ptr += arm_emit_thumb_mov (ptr, sp, register_operand (r3));
+
+  /* Call collector (obj_addr_on_target, regs);
+		r2 -^      r0 -^     r1 -^  */
+  ptr += arm_emit_thumb_blx (ptr, register_operand (r2));
+
+  /* Restore sp to pre-call/rounding value.
+     mov sp, r5  */
+  ptr += arm_emit_thumb_mov (ptr, sp, register_operand (r5));
+
+  /* Unlock the spin lock (by writing 0 to it).
+     movw r3, #0  */
+  ptr += arm_emit_thumb_movw (ptr, r3, immediate_operand (0));
+
+  /* str r3, [r4]  */
+  ptr += arm_emit_thumb_str (ptr, r3, r4, immediate_operand (0));
+
+  /* Pop everything that was saved. */
+
+  /* tpoint, tls, tpaddr
+     add sp, sp, #12  */
+  ptr += arm_emit_thumb_add_sp (ptr, immediate_operand (12));
+
+  /* For cpsr.
+     pop r0  */
+  ptr += arm_emit_thumb_pop (ptr, ENCODE (1, 1, 0), 0);
+
+  /* msr cpsr,r0*/
+  ptr += arm_emit_thumb_msr (ptr, r0);
+
+  /* r0-r12 and lr
+     pop { r0-r12,lr }  */
+  ptr += arm_emit_thumb_popw_list (ptr, encode_register_list (0, 13, 0), 0, 1);
+
+  /* Pop VFP registers.  */
+  if (tdesc == tdesc_arm_with_neon || tdesc == tdesc_arm_with_vfpv3)
+    {
+      /* vpop {d16-d31} */
+      ptr += arm_emit_thumb_vpop (ptr, 16, 16);
+      /* vpop {d0-d15} */
+      ptr += arm_emit_thumb_vpop (ptr, 0, 16);
+    }
+  else if (tdesc == tdesc_arm_with_vfpv2)
+    {
+      /* vpop {d0-d15} */
+      ptr += arm_emit_thumb_vpop (ptr, 0, 16);
+    }
+
+  append_inferior_memory (&buildaddr, (uint32_t) ptr - (uint32_t) buf, buf);
+
+  tp->adjusted_insn_addr = buildaddr;
+  if (copy_instruction_thumb32 (&buildaddr, tp->address, &copy_insn_err) != 0)
+    {
+      sprintf (err, "E%s", copy_insn_err);
+      return 1;
+    }
+  tp->adjusted_insn_addr_end = buildaddr;
+
+  /* Possible improvements:
+     This branch can be made non-relative:
+     B <mem location>:
+     push	   {r0,r1}
+     movw	   r0, #<mem location>
+     movt	   r0, #<mem location>
+     str	   r0, [sp, #4]
+     pop	   {r0,pc}  */
+  if (!arm_thumb_is_reachable (buildaddr, tp->address + 4))
+    {
+      sprintf (err,
+	       "EJump back from jump pad too far from tracepoint "
+	       "(offset 0x%" PRIx32 " cannot be encoded in 23 bits).",
+	       arm_thumb_branch_relative_distance (buildaddr, tp->address + 4));
+      return 1;
+    }
+
+  arm_emit_thumb_bw ((uint16_t *) buf,
+		     arm_thumb_branch_relative_distance (buildaddr,
+							 tp->address + 4));
+  append_inferior_memory (&buildaddr, 4, buf);
+
+  /* write tp instr.	*/
+  if (!arm_thumb_is_reachable (tp->address, *jump_entry))
+    {
+      sprintf (err,
+	       "EJump pad too far from tracepoint "
+	       "(offset 0x%" PRIx32 " cannot be encoded in 23 bits).",
+	       arm_thumb_branch_relative_distance (tp->address, *jump_entry));
+      return 1;
+    }
+
+  arm_emit_thumb_bw ((uint16_t *) jjump_pad_insn,
+		     arm_thumb_branch_relative_distance (tp->address,
+							 *jump_entry));
+  *jjump_pad_insn_size = 4;
+  *jump_entry = buildaddr;
+
+  return 0;
+}
+
+static int
+arm_install_fast_tracepoint_jump_pad (struct tracepoint *tp,
+				      CORE_ADDR collector,
+				      CORE_ADDR lockaddr,
+				      CORE_ADDR *jump_entry,
+				      CORE_ADDR *trampoline,
+				      ULONGEST *trampoline_size,
+				      unsigned char *jjump_pad_insn,
+				      ULONGEST *jjump_pad_insn_size,
+				      char *err)
+{
+  int res = 1;
+
+  if (tp->kind == ARM_BP_KIND_ARM)
+    {
+      TRY
+	{
+	  res = arm_install_fast_tracepoint_jump_pad_arm (tp, collector,
+							  lockaddr,
+							  jump_entry,
+							  trampoline,
+							  trampoline_size,
+							  jjump_pad_insn,
+							  jjump_pad_insn_size,
+							  err);
+	}
+      CATCH (ex, RETURN_MASK_ERROR)
+	{
+	  if (ex.error == NOT_SUPPORTED_ERROR)
+	    {
+	      err = strcpy (err, ex.message);
+	      return 1;
+	    }
+	  else
+	    {
+	      throw_exception (ex);
+	    }
+	}
+      END_CATCH
+    }
+  else if (tp->kind == ARM_BP_KIND_THUMB2)
+    {
+      TRY
+	{
+	  res = arm_install_fast_tracepoint_jump_pad_thumb2
+	    (tp, collector, lockaddr, jump_entry, trampoline, trampoline_size,
+	     jjump_pad_insn, jjump_pad_insn_size, err);
+	}
+      CATCH (ex, RETURN_MASK_ERROR)
+	{
+	  if (ex.error == NOT_SUPPORTED_ERROR)
+	    {
+	      err = strcpy (err, ex.message);
+	      return 1;
+	    }
+	  else
+	    {
+	      throw_exception (ex);
+	    }
+	}
+      END_CATCH
+    }
+  else
+    {
+      strcpy (err,
+	      "ECan't put a fast tracepoint jump on a two-bytes Thumb "
+	      "instruction.");
+      return 1;
+    }
+
+  return res;
+}
+
+/* Implementation of the linux_target_ops method "get_ipa_tdesc_idx".  */
+
+static int
+arm_get_ipa_tdesc_idx (void)
+{
+  const struct target_desc *tdesc = current_process ()->tdesc;
+
+  if (tdesc == tdesc_arm)
+    return ARM_TDESC_ARM;
+  if (tdesc == tdesc_arm_with_vfpv2)
+    return ARM_TDESC_ARM_WITH_VFPV2;
+  if (tdesc == tdesc_arm_with_vfpv3)
+    return ARM_TDESC_ARM_WITH_VFPV3;
+  if (tdesc == tdesc_arm_with_neon)
+    return ARM_TDESC_ARM_WITH_NEON;
+
+  return 0;
+}
+
 struct linux_target_ops the_low_target = {
   arm_arch_setup,
   arm_regs_info,
@@ -1069,14 +1977,15 @@  struct linux_target_ops the_low_target = {
   arm_prepare_to_resume,
   NULL, /* process_qsupported */
   arm_supports_tracepoints,
-  NULL, /* get_thread_area */
-  NULL, /* install_fast_tracepoint_jump_pad */
-  NULL, /* emit_ops */
-  NULL, /* get_min_fast_tracepoint_insn_len */
+  arm_get_thread_area, /* get_thread_area */
+  arm_install_fast_tracepoint_jump_pad, /* install_fast_tracepoint_jump_pad */
+  NULL, /*emit_ops */
+  arm_get_min_fast_tracepoint_insn_len, /* get_min_fast_tracepoint_insn_len */
   NULL, /* supports_range_stepping */
   arm_breakpoint_kind_from_current_state,
   arm_supports_hardware_single_step,
   arm_get_syscall_trapinfo,
+  arm_get_ipa_tdesc_idx,
 };
 
 void
diff --git a/gdb/gdbserver/tracepoint.c b/gdb/gdbserver/tracepoint.c
index 87211e9..95933dd 100644
--- a/gdb/gdbserver/tracepoint.c
+++ b/gdb/gdbserver/tracepoint.c
@@ -4658,6 +4658,13 @@  collect_data_at_step (struct tracepoint_hit_ctx *ctx,
 EXTERN_C_PUSH
 IP_AGENT_EXPORT_VAR int ipa_tdesc_idx;
 EXTERN_C_POP
+
+/* Return the current target descriptoin index.  */
+int
+get_ipa_tdesc_idx (void)
+{
+  return ipa_tdesc_idx;
+}
 #endif
 
 static struct regcache *
diff --git a/gdb/gdbserver/tracepoint.h b/gdb/gdbserver/tracepoint.h
index c45a03c..cdd315b 100644
--- a/gdb/gdbserver/tracepoint.h
+++ b/gdb/gdbserver/tracepoint.h
@@ -125,6 +125,7 @@  int handle_tracepoint_bkpts (struct thread_info *tinfo, CORE_ADDR stop_pc);
 #ifdef IN_PROCESS_AGENT
 void initialize_low_tracepoint (void);
 const struct target_desc *get_ipa_tdesc (int idx);
+int get_ipa_tdesc_idx (void);
 void supply_fast_tracepoint_registers (struct regcache *regcache,
 				       const unsigned char *regs);
 void supply_static_tracepoint_registers (struct regcache *regcache,
diff --git a/gdb/testsuite/lib/trace-support.exp b/gdb/testsuite/lib/trace-support.exp
index 9996315..e9926c9 100644
--- a/gdb/testsuite/lib/trace-support.exp
+++ b/gdb/testsuite/lib/trace-support.exp
@@ -95,7 +95,8 @@  proc gdb_target_supports_trace { } {
 proc gdb_target_supports_fast_trace { } {
     if {[istarget "x86_64-*-*"]
 	|| [istarget "i\[34567\]86-*-*"]
-	|| [is_aarch64_target]} {
+	|| [is_aarch64_target]
+	|| [is_aarch32_target]} {
 	return 1
     }