From patchwork Sun Jun 1 14:18:22 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Modra X-Patchwork-Id: 1224 Received: (qmail 17205 invoked by alias); 1 Jun 2014 14:18:33 -0000 Mailing-List: contact gdb-patches-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sourceware.org Delivered-To: mailing list gdb-patches@sourceware.org Received: (qmail 17184 invoked by uid 89); 1 Jun 2014 14:18:31 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.2 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-Spam-User: qpsmtpd, 2 recipients X-HELO: mail-pa0-f52.google.com Received: from mail-pa0-f52.google.com (HELO mail-pa0-f52.google.com) (209.85.220.52) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Sun, 01 Jun 2014 14:18:29 +0000 Received: by mail-pa0-f52.google.com with SMTP id bj1so3015923pad.39 for ; Sun, 01 Jun 2014 07:18:27 -0700 (PDT) X-Received: by 10.66.142.73 with SMTP id ru9mr33252882pab.41.1401632307774; Sun, 01 Jun 2014 07:18:27 -0700 (PDT) Received: from bubble.grove.modra.org ([101.166.26.37]) by mx.google.com with ESMTPSA id xz7sm49411676pac.3.2014.06.01.07.18.25 for (version=TLSv1.1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Sun, 01 Jun 2014 07:18:27 -0700 (PDT) Received: by bubble.grove.modra.org (Postfix, from userid 1000) id 867B6EA0098; Sun, 1 Jun 2014 23:48:22 +0930 (CST) Date: Sun, 1 Jun 2014 23:48:22 +0930 From: Alan Modra To: binutils@sourceware.org, gdb-patches@sourceware.org Subject: Power8 fusion of addis,addi Message-ID: <20140601141822.GF6679@bubble.grove.modra.org> Mail-Followup-To: binutils@sourceware.org, gdb-patches@sourceware.org MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Power8 fuses addis,addi and addis,ld sequences when the target of the addis is the same as the addi/ld. Thus addis r12,r2,xxx@ha addi r12,r12,xxx@l / ld r12,xxx@l(r12) is faster than addis r11,r2,xxx@ha addi r12,r11,xxx@l / ld r12,xxx@l(r11) So use the form that allows fusion in plt call and branch stubs. This change requires an update to the set of linkage stubs recognised by gdb. OK to commit the gdb change? bfd/ * elf64-ppc.c (ADDIS_R12_R2): Define. (build_plt_stub): Support fusion on ELFv2 stub. (ppc_build_one_stub): Likewise for plt branch stubs. gold/ * powerpc.cc (addis_12_2): Define. (Stub_table::do_write): Support fusion on ELFv2 stubs. ld/testsuite/ * ld-powerpc/elfv2exe.d: Update for changed plt call stubs. gdb/ * ppc64-tdep.c (ppc64_standard_linkage8): New. (ppc64_skip_trampoline_code): Recognise ELFv2 stub supporting fusion. diff --git a/gdb/ppc64-tdep.c b/gdb/ppc64-tdep.c index cbbbedc..8acd754 100644 --- a/gdb/ppc64-tdep.c +++ b/gdb/ppc64-tdep.c @@ -303,6 +303,29 @@ static struct ppc_insn_pattern ppc64_standard_linkage7[] = { 0, 0, 0 } }; +/* ELFv2 PLT call stub to access PLT entries more than +/- 32k from r2, + supporting fusion. */ + +static struct ppc_insn_pattern ppc64_standard_linkage8[] = + { + /* std r2, 24(r1) */ + { -1, insn_ds (62, 2, 1, 24, 0), 1 }, + + /* addis r12, r2, */ + { insn_d (-1, -1, -1, 0), insn_d (15, 12, 2, 0), 0 }, + + /* ld r12, (r12) */ + { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 12, 12, 0, 0), 0 }, + + /* mtctr r12 */ + { insn_xfx (-1, -1, -1, -1), insn_xfx (31, 12, 9, 467), 0 }, + + /* bctr */ + { -1, 0x4e800420, 0 }, + + { 0, 0, 0 } + }; + /* When the dynamic linker is doing lazy symbol resolution, the first call to a function in another object will go like this: @@ -437,10 +460,14 @@ ppc64_skip_trampoline_code (struct frame_info *frame, CORE_ADDR pc) ARRAY_SIZE (ppc64_standard_linkage4))), MAX (MAX (ARRAY_SIZE (ppc64_standard_linkage5), ARRAY_SIZE (ppc64_standard_linkage6)), - ARRAY_SIZE (ppc64_standard_linkage7))) - 1]; + MAX (ARRAY_SIZE (ppc64_standard_linkage7), + ARRAY_SIZE (ppc64_standard_linkage8)))) + - 1]; CORE_ADDR target; - if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns)) + if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage8, insns)) + pc = ppc64_standard_linkage4_target (frame, pc, insns); + else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns)) pc = ppc64_standard_linkage3_target (frame, pc, insns); else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage6, insns)) pc = ppc64_standard_linkage4_target (frame, pc, insns); diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index fdca0c6..e7e2e39 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -173,6 +173,7 @@ static bfd_vma opd_entry_value #define LD_R2_0R1 0xe8410000 /* ld %r2,0(%r1) */ +#define ADDIS_R12_R2 0x3d820000 /* addis %r12,%r2,xxx@ha */ #define ADDIS_R12_R12 0x3d8c0000 /* addis %r12,%r12,xxx@ha */ #define LD_R12_0R12 0xe98c0000 /* ld %r12,xxx@l(%r12) */ @@ -10246,8 +10247,16 @@ build_plt_stub (struct ppc_link_hash_table *htab, if (ALWAYS_EMIT_R2SAVE || stub_entry->stub_type == ppc_stub_plt_call_r2save) bfd_put_32 (obfd, STD_R2_0R1 + STK_TOC (htab), p), p += 4; - bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p), p += 4; - bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p), p += 4; + if (plt_load_toc) + { + bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p), p += 4; + bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p), p += 4; + } + else + { + bfd_put_32 (obfd, ADDIS_R12_R2 | PPC_HA (offset), p), p += 4; + bfd_put_32 (obfd, LD_R12_0R12 | PPC_LO (offset), p), p += 4; + } if (plt_load_toc && PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset)) { @@ -10668,10 +10677,10 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg) { size = 16; bfd_put_32 (htab->params->stub_bfd, - ADDIS_R11_R2 | PPC_HA (off), loc); + ADDIS_R12_R2 | PPC_HA (off), loc); loc += 4; bfd_put_32 (htab->params->stub_bfd, - LD_R12_0R11 | PPC_LO (off), loc); + LD_R12_0R12 | PPC_LO (off), loc); } else { @@ -10697,10 +10706,10 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg) { size += 4; bfd_put_32 (htab->params->stub_bfd, - ADDIS_R11_R2 | PPC_HA (off), loc); + ADDIS_R12_R2 | PPC_HA (off), loc); loc += 4; bfd_put_32 (htab->params->stub_bfd, - LD_R12_0R11 | PPC_LO (off), loc); + LD_R12_0R12 | PPC_LO (off), loc); } else bfd_put_32 (htab->params->stub_bfd, LD_R12_0R2 | PPC_LO (off), loc); diff --git a/gold/powerpc.cc b/gold/powerpc.cc index e59c319..263b623 100644 --- a/gold/powerpc.cc +++ b/gold/powerpc.cc @@ -3067,6 +3067,7 @@ static const uint32_t addis_3_13 = 0x3c6d0000; static const uint32_t addis_11_2 = 0x3d620000; static const uint32_t addis_11_11 = 0x3d6b0000; static const uint32_t addis_11_30 = 0x3d7e0000; +static const uint32_t addis_12_2 = 0x3d820000; static const uint32_t addis_12_12 = 0x3d8c0000; static const uint32_t b = 0x48000000; static const uint32_t bcl_20_31 = 0x429f0005; @@ -4200,10 +4201,20 @@ Stub_table::do_write(Output_file* of) { write_insn(p, std_2_1 + this->targ_->stk_toc()); p += 4; - write_insn(p, addis_11_2 + ha(off)); - p += 4; - write_insn(p, ld_12_11 + l(off)); - p += 4; + if (plt_load_toc) + { + write_insn(p, addis_11_2 + ha(off)); + p += 4; + write_insn(p, ld_12_11 + l(off)); + p += 4; + } + else + { + write_insn(p, addis_12_2 + ha(off)); + p += 4; + write_insn(p, ld_12_12 + l(off)); + p += 4; + } if (plt_load_toc && ha(off + 8 + 8 * static_chain) != ha(off)) { @@ -4302,8 +4313,8 @@ Stub_table::do_write(Output_file* of) } else { - write_insn(p, addis_11_2 + ha(brltoff)), p += 4; - write_insn(p, ld_12_11 + l(brltoff)), p += 4; + write_insn(p, addis_12_2 + ha(brltoff)), p += 4; + write_insn(p, ld_12_12 + l(brltoff)), p += 4; } write_insn(p, mtctr_12), p += 4; write_insn(p, bctr); diff --git a/ld/testsuite/ld-powerpc/elfv2exe.d b/ld/testsuite/ld-powerpc/elfv2exe.d index 7ff9d38..9ea816c 100644 --- a/ld/testsuite/ld-powerpc/elfv2exe.d +++ b/ld/testsuite/ld-powerpc/elfv2exe.d @@ -8,14 +8,14 @@ Disassembly of section \.text: 0+100000c0 <.*\.plt_branch\.f2>: -.*: (ff ff 62 3d|3d 62 ff ff) addis r11,r2,-1 -.*: (f0 7f 8b e9|e9 8b 7f f0) ld r12,32752\(r11\) +.*: (ff ff 82 3d|3d 82 ff ff) addis r12,r2,-1 +.*: (f0 7f 8c e9|e9 8c 7f f0) ld r12,32752\(r12\) .*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 .*: (20 04 80 4e|4e 80 04 20) bctr 0+100000d0 <.*\.plt_branch\.f4>: -.*: (ff ff 62 3d|3d 62 ff ff) addis r11,r2,-1 -.*: (f8 7f 8b e9|e9 8b 7f f8) ld r12,32760\(r11\) +.*: (ff ff 82 3d|3d 82 ff ff) addis r12,r2,-1 +.*: (f8 7f 8c e9|e9 8c 7f f8) ld r12,32760\(r12\) .*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 .*: (20 04 80 4e|4e 80 04 20) bctr