diff mbox

Power8 fusion of addis,addi

Message ID 20140601141822.GF6679@bubble.grove.modra.org
State New
Headers show

Commit Message

Alan Modra June 1, 2014, 2:18 p.m. UTC
Power8 fuses addis,addi and addis,ld sequences when the target of the
addis is the same as the addi/ld.  Thus
    addis r12,r2,xxx@ha
    addi r12,r12,xxx@l / ld r12,xxx@l(r12)
is faster than
    addis r11,r2,xxx@ha
    addi r12,r11,xxx@l / ld r12,xxx@l(r11)
So use the form that allows fusion in plt call and branch stubs.

This change requires an update to the set of linkage stubs recognised
by gdb.  OK to commit the gdb change?

bfd/
	* elf64-ppc.c (ADDIS_R12_R2): Define.
	(build_plt_stub): Support fusion on ELFv2 stub.
	(ppc_build_one_stub): Likewise for plt branch stubs.
gold/
	* powerpc.cc (addis_12_2): Define.
	(Stub_table::do_write): Support fusion on ELFv2 stubs.
ld/testsuite/
	* ld-powerpc/elfv2exe.d: Update for changed plt call stubs.
gdb/
	* ppc64-tdep.c (ppc64_standard_linkage8): New.
	(ppc64_skip_trampoline_code): Recognise ELFv2 stub supporting fusion.

Comments

Joel Brobecker June 2, 2014, 2:59 p.m. UTC | #1
Hi Alan,

> gdb/
> 	* ppc64-tdep.c (ppc64_standard_linkage8): New.
> 	(ppc64_skip_trampoline_code): Recognise ELFv2 stub supporting fusion.

OK for the GDB part.

Thanks! :)
diff mbox

Patch

diff --git a/gdb/ppc64-tdep.c b/gdb/ppc64-tdep.c
index cbbbedc..8acd754 100644
--- a/gdb/ppc64-tdep.c
+++ b/gdb/ppc64-tdep.c
@@ -303,6 +303,29 @@  static struct ppc_insn_pattern ppc64_standard_linkage7[] =
     { 0, 0, 0 }
   };
 
+/* ELFv2 PLT call stub to access PLT entries more than +/- 32k from r2,
+   supporting fusion.  */
+
+static struct ppc_insn_pattern ppc64_standard_linkage8[] =
+  {
+    /* std r2, 24(r1) <optional> */
+    { -1, insn_ds (62, 2, 1, 24, 0), 1 },
+
+    /* addis r12, r2, <any> */
+    { insn_d (-1, -1, -1, 0), insn_d (15, 12, 2, 0), 0 },
+
+    /* ld r12, <any>(r12) */
+    { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 12, 12, 0, 0), 0 },
+
+    /* mtctr r12 */
+    { insn_xfx (-1, -1, -1, -1), insn_xfx (31, 12, 9, 467), 0 },
+
+    /* bctr */
+    { -1, 0x4e800420, 0 },
+
+    { 0, 0, 0 }
+  };
+
 /* When the dynamic linker is doing lazy symbol resolution, the first
    call to a function in another object will go like this:
 
@@ -437,10 +460,14 @@  ppc64_skip_trampoline_code (struct frame_info *frame, CORE_ADDR pc)
 				    ARRAY_SIZE (ppc64_standard_linkage4))),
 			  MAX (MAX (ARRAY_SIZE (ppc64_standard_linkage5),
 				    ARRAY_SIZE (ppc64_standard_linkage6)),
-			       ARRAY_SIZE (ppc64_standard_linkage7))) - 1];
+			       MAX (ARRAY_SIZE (ppc64_standard_linkage7),
+				    ARRAY_SIZE (ppc64_standard_linkage8))))
+		     - 1];
   CORE_ADDR target;
 
-  if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns))
+  if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage8, insns))
+    pc = ppc64_standard_linkage4_target (frame, pc, insns);
+  else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns))
     pc = ppc64_standard_linkage3_target (frame, pc, insns);
   else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage6, insns))
     pc = ppc64_standard_linkage4_target (frame, pc, insns);
diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c
index fdca0c6..e7e2e39 100644
--- a/bfd/elf64-ppc.c
+++ b/bfd/elf64-ppc.c
@@ -173,6 +173,7 @@  static bfd_vma opd_entry_value
 
 #define LD_R2_0R1	0xe8410000	/* ld    %r2,0(%r1)      */
 
+#define ADDIS_R12_R2	0x3d820000	/* addis %r12,%r2,xxx@ha     */
 #define ADDIS_R12_R12	0x3d8c0000	/* addis %r12,%r12,xxx@ha */
 #define LD_R12_0R12	0xe98c0000	/* ld    %r12,xxx@l(%r12) */
 
@@ -10246,8 +10247,16 @@  build_plt_stub (struct ppc_link_hash_table *htab,
       if (ALWAYS_EMIT_R2SAVE
 	  || stub_entry->stub_type == ppc_stub_plt_call_r2save)
 	bfd_put_32 (obfd, STD_R2_0R1 + STK_TOC (htab), p),	p += 4;
-      bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p),	p += 4;
-      bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p),	p += 4;
+      if (plt_load_toc)
+	{
+	  bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p),	p += 4;
+	  bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p),	p += 4;
+	}
+      else
+	{
+	  bfd_put_32 (obfd, ADDIS_R12_R2 | PPC_HA (offset), p),	p += 4;
+	  bfd_put_32 (obfd, LD_R12_0R12 | PPC_LO (offset), p),	p += 4;
+	}
       if (plt_load_toc
 	  && PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
 	{
@@ -10668,10 +10677,10 @@  ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	    {
 	      size = 16;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R11_R2 | PPC_HA (off), loc);
+			  ADDIS_R12_R2 | PPC_HA (off), loc);
 	      loc += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  LD_R12_0R11 | PPC_LO (off), loc);
+			  LD_R12_0R12 | PPC_LO (off), loc);
 	    }
 	  else
 	    {
@@ -10697,10 +10706,10 @@  ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	    {
 	      size += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R11_R2 | PPC_HA (off), loc);
+			  ADDIS_R12_R2 | PPC_HA (off), loc);
 	      loc += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  LD_R12_0R11 | PPC_LO (off), loc);
+			  LD_R12_0R12 | PPC_LO (off), loc);
 	    }
 	  else
 	    bfd_put_32 (htab->params->stub_bfd, LD_R12_0R2 | PPC_LO (off), loc);
diff --git a/gold/powerpc.cc b/gold/powerpc.cc
index e59c319..263b623 100644
--- a/gold/powerpc.cc
+++ b/gold/powerpc.cc
@@ -3067,6 +3067,7 @@  static const uint32_t addis_3_13	= 0x3c6d0000;
 static const uint32_t addis_11_2	= 0x3d620000;
 static const uint32_t addis_11_11	= 0x3d6b0000;
 static const uint32_t addis_11_30	= 0x3d7e0000;
+static const uint32_t addis_12_2	= 0x3d820000;
 static const uint32_t addis_12_12	= 0x3d8c0000;
 static const uint32_t b			= 0x48000000;
 static const uint32_t bcl_20_31		= 0x429f0005;
@@ -4200,10 +4201,20 @@  Stub_table<size, big_endian>::do_write(Output_file* of)
 		{
 		  write_insn<big_endian>(p, std_2_1 + this->targ_->stk_toc());
 		  p += 4;
-		  write_insn<big_endian>(p, addis_11_2 + ha(off));
-		  p += 4;
-		  write_insn<big_endian>(p, ld_12_11 + l(off));
-		  p += 4;
+		  if (plt_load_toc)
+		    {
+		      write_insn<big_endian>(p, addis_11_2 + ha(off));
+		      p += 4;
+		      write_insn<big_endian>(p, ld_12_11 + l(off));
+		      p += 4;
+		    }
+		  else
+		    {
+		      write_insn<big_endian>(p, addis_12_2 + ha(off));
+		      p += 4;
+		      write_insn<big_endian>(p, ld_12_12 + l(off));
+		      p += 4;
+		    }
 		  if (plt_load_toc
 		      && ha(off + 8 + 8 * static_chain) != ha(off))
 		    {
@@ -4302,8 +4313,8 @@  Stub_table<size, big_endian>::do_write(Output_file* of)
 		}
 	      else
 		{
-		  write_insn<big_endian>(p, addis_11_2 + ha(brltoff)),	p += 4;
-		  write_insn<big_endian>(p, ld_12_11 + l(brltoff)),	p += 4;
+		  write_insn<big_endian>(p, addis_12_2 + ha(brltoff)),	p += 4;
+		  write_insn<big_endian>(p, ld_12_12 + l(brltoff)),	p += 4;
 		}
 	      write_insn<big_endian>(p, mtctr_12),			p += 4;
 	      write_insn<big_endian>(p, bctr);
diff --git a/ld/testsuite/ld-powerpc/elfv2exe.d b/ld/testsuite/ld-powerpc/elfv2exe.d
index 7ff9d38..9ea816c 100644
--- a/ld/testsuite/ld-powerpc/elfv2exe.d
+++ b/ld/testsuite/ld-powerpc/elfv2exe.d
@@ -8,14 +8,14 @@ 
 Disassembly of section \.text:
 
 0+100000c0 <.*\.plt_branch\.f2>:
-.*:	(ff ff 62 3d|3d 62 ff ff) 	addis   r11,r2,-1
-.*:	(f0 7f 8b e9|e9 8b 7f f0) 	ld      r12,32752\(r11\)
+.*:	(ff ff 82 3d|3d 82 ff ff) 	addis   r12,r2,-1
+.*:	(f0 7f 8c e9|e9 8c 7f f0) 	ld      r12,32752\(r12\)
 .*:	(a6 03 89 7d|7d 89 03 a6) 	mtctr   r12
 .*:	(20 04 80 4e|4e 80 04 20) 	bctr
 
 0+100000d0 <.*\.plt_branch\.f4>:
-.*:	(ff ff 62 3d|3d 62 ff ff) 	addis   r11,r2,-1
-.*:	(f8 7f 8b e9|e9 8b 7f f8) 	ld      r12,32760\(r11\)
+.*:	(ff ff 82 3d|3d 82 ff ff) 	addis   r12,r2,-1
+.*:	(f8 7f 8c e9|e9 8c 7f f8) 	ld      r12,32760\(r12\)
 .*:	(a6 03 89 7d|7d 89 03 a6) 	mtctr   r12
 .*:	(20 04 80 4e|4e 80 04 20) 	bctr