From patchwork Tue Mar 8 15:16:15 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: "Andre Vieira (lists)" X-Patchwork-Id: 51792 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 36E00385C412 for ; Tue, 8 Mar 2022 15:16:55 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 36E00385C412 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1646752615; bh=12Blo/UGPN4/udxm/tuF8oe/xOPE3meTyW34KQzlXUE=; h=Date:To:Subject:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:Cc:From; b=YbpfvHI//Nd38cWD1aK5EfaQ9bXrF3J4igdOErvmkmFsKH0ujKSBxf0nMQLf2mHoy WGRXutnBofTtlPCKV0ttAMVAQFtEKgXAsudpitGiuUw06LfXyz06xHvEK9jL9vS3xI HAAXaqnojpaS3+7hnHJaesIFZf8JKhFeIct7NPxA= X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 962BF385C401 for ; Tue, 8 Mar 2022 15:16:12 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 962BF385C401 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 0FCD5139F; Tue, 8 Mar 2022 07:16:12 -0800 (PST) Received: from [10.57.21.215] (unknown [10.57.21.215]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 7712B3F793; Tue, 8 Mar 2022 07:16:11 -0800 (PST) Message-ID: <1f472c95-12bc-bf0d-fa83-385405cc195a@arm.com> Date: Tue, 8 Mar 2022 15:16:15 +0000 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Thunderbird/91.6.1 Content-Language: en-US To: "gcc-patches@gcc.gnu.org" Subject: [aarch64] update reg-costs to include predicate move costs X-Spam-Status: No, score=-11.5 required=5.0 tests=BAYES_00, BODY_8BITS, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: "Andre Vieira \(lists\) via Gcc-patches" From: "Andre Vieira (lists)" Reply-To: "Andre Vieira \(lists\)" Cc: Richard Sandiford Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" Hi, This patch adds predicate move costs to several SVE enabled cores. 2022-02-25  Tamar Christina                         Andre Vieira gcc/ChangeLog:         * config/aarch64/aarch64-protos.h (struct cpu_regmove_cost): Add PR2PR member.         * config/aarch64/aarch64.cc (aarch64_register_move_cost): Use PR2PR costs when moving a predicate.         (generic_regmove_cost, cortexa57_regmove_cost, exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost, qdf24xx_regmove_cost, thunderx2t99_regmove_cost, thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost): Add PR2PR entry.         (cortexa76_regmove_cost): New.         (neoversen1_tunings): Use cortexa76_regmove_cost. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index d0e78d6a559a7c310b7f8c7877081a0e2baf6a05..f2fde35c6eb4989af8736db8fad004171c160282 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -192,6 +192,7 @@ struct cpu_regmove_cost const int GP2FP; const int FP2GP; const int FP2FP; + const int PR2PR; }; struct simd_vec_cost diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index dbeaaf484dbc070ae3fcc08530ec9bd20b8ab651..9a94f3a30b0f1acc3c9b8a0e3d703e60780d0cbc 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -526,7 +526,8 @@ static const struct cpu_regmove_cost generic_regmove_cost = their cost higher than memmov_cost. */ 5, /* GP2FP */ 5, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ }; static const struct cpu_regmove_cost cortexa57_regmove_cost = @@ -536,7 +537,8 @@ static const struct cpu_regmove_cost cortexa57_regmove_cost = their cost higher than memmov_cost. */ 5, /* GP2FP */ 5, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ }; static const struct cpu_regmove_cost cortexa53_regmove_cost = @@ -546,7 +548,8 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost = their cost higher than memmov_cost. */ 5, /* GP2FP */ 5, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ }; static const struct cpu_regmove_cost exynosm1_regmove_cost = @@ -556,7 +559,8 @@ static const struct cpu_regmove_cost exynosm1_regmove_cost = their cost higher than memmov_cost (actual, 4 and 9). */ 9, /* GP2FP */ 9, /* FP2GP */ - 1 /* FP2FP */ + 1, /* FP2FP */ + 1 /* PR2PR. */ }; static const struct cpu_regmove_cost thunderx_regmove_cost = @@ -564,7 +568,8 @@ static const struct cpu_regmove_cost thunderx_regmove_cost = 2, /* GP2GP */ 2, /* GP2FP */ 6, /* FP2GP */ - 4 /* FP2FP */ + 4, /* FP2FP */ + 4 /* PR2PR. */ }; static const struct cpu_regmove_cost xgene1_regmove_cost = @@ -574,7 +579,8 @@ static const struct cpu_regmove_cost xgene1_regmove_cost = their cost higher than memmov_cost. */ 8, /* GP2FP */ 8, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ }; static const struct cpu_regmove_cost qdf24xx_regmove_cost = @@ -583,7 +589,8 @@ static const struct cpu_regmove_cost qdf24xx_regmove_cost = /* Avoid the use of int<->fp moves for spilling. */ 6, /* GP2FP */ 6, /* FP2GP */ - 4 /* FP2FP */ + 4, /* FP2FP */ + 4 /* PR2PR. */ }; static const struct cpu_regmove_cost thunderx2t99_regmove_cost = @@ -593,6 +600,7 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost = 5, /* GP2FP */ 6, /* FP2GP */ 3, /* FP2FP */ + 3 /* PR2PR. */ }; static const struct cpu_regmove_cost thunderx3t110_regmove_cost = @@ -601,7 +609,8 @@ static const struct cpu_regmove_cost thunderx3t110_regmove_cost = /* Avoid the use of int<->fp moves for spilling. */ 4, /* GP2FP */ 5, /* FP2GP */ - 4 /* FP2FP */ + 4, /* FP2FP */ + 4 /* PR2PR. */ }; static const struct cpu_regmove_cost tsv110_regmove_cost = @@ -611,7 +620,8 @@ static const struct cpu_regmove_cost tsv110_regmove_cost = their cost higher than memmov_cost. */ 2, /* GP2FP */ 3, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ }; static const struct cpu_regmove_cost a64fx_regmove_cost = @@ -621,7 +631,19 @@ static const struct cpu_regmove_cost a64fx_regmove_cost = their cost higher than memmov_cost. */ 5, /* GP2FP */ 7, /* FP2GP */ - 2 /* FP2FP */ + 2, /* FP2FP */ + 2 /* PR2PR. */ +}; + +static const struct cpu_regmove_cost neoversen1_regmove_cost = +{ + 1, /* GP2GP */ + /* Spilling to int<->fp instead of memory is recommended so set + realistic costs compared to memmv_cost. */ + 3, /* GP2FP */ + 2, /* FP2GP */ + 2, /* FP2FP */ + 1 /* PR2PR. */ }; /* Generic costs for Advanced SIMD vector operations. */ @@ -1698,7 +1720,7 @@ static const struct tune_params neoversen1_tunings = { &cortexa76_extra_costs, &generic_addrcost_table, - &generic_regmove_cost, + &neoversen1_regmove_cost, &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, @@ -14438,6 +14460,11 @@ aarch64_register_move_cost (machine_mode mode, || (to == GENERAL_REGS && from == STACK_REG)) return regmove_cost->GP2GP; + /* Predicate to predicate moves are usually very cheap so cost them + separately. */ + if (from == PR_REGS && to == PR_REGS) + return regmove_cost->PR2PR; + /* To/From the stack register, we move via the gprs. */ if (to == STACK_REG || from == STACK_REG) return aarch64_register_move_cost (mode, from, GENERAL_REGS)