From patchwork Thu Jan 6 19:04:50 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 49665 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 29B053858031 for ; Thu, 6 Jan 2022 19:05:31 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 29B053858031 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1641495931; bh=8B9F/kiIW4vqFc4YFcTJv+M/xWu3PsF/biZf46BDApE=; h=Date:Subject:To:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=qXmS0wdTDF6bz0ivRwE9j8Q7PjuWCdoVokMtanbXi5lDve2CfcT0nzZblaI/UxQxH PhI7qO4zLm9K58SmtvskWE9nkclKuX4yR/5ztW2xLTBOQd9+4QtCbT3zR6RU1Ezh75 xdSZYGRAmDMYY+VsVPgn9nH4/Xu3KTk6XWuf5faE= X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mail-qv1-xf2c.google.com (mail-qv1-xf2c.google.com [IPv6:2607:f8b0:4864:20::f2c]) by sourceware.org (Postfix) with ESMTPS id 6E4C43858408 for ; Thu, 6 Jan 2022 19:05:02 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 6E4C43858408 Received: by mail-qv1-xf2c.google.com with SMTP id q4so3242870qvh.9 for ; Thu, 06 Jan 2022 11:05:02 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:from:date:message-id:subject:to; bh=8B9F/kiIW4vqFc4YFcTJv+M/xWu3PsF/biZf46BDApE=; b=ycb82FFOuJxebRb47x4iurRqorYeOEjVJ8VKjFhnh7kZ/MF58AAFVutn5R9752ohFH +lmY6J+5U5a39StV3XONbjcUio5z4vHykggVIR1hckjKICQEke2LXJJEFhcZsJ2oMfLc r5svBG0QxBMSZ9wNDOlXuFUEELZKrQB3iDSFHpbTaBByZCgmsOpdNeQccxvbVrXY4ICN OGGyJA4R0cmDll+fBK2qQLzXu3Y4SbLi6Dwt0M3iSsIZm0PunGcEaOKwtrbqZvY3wETE o36Qkto+OGNBQvgXcNnQTOAIH1/yCtiW6TQB465BJNxMT3Wpqm7zYih55IlDUVKycHnr KWig== X-Gm-Message-State: AOAM531dedw0hiIYqyNLjfrm3Ve4d5RRMHZtuEqG63zqtj/6XQprzRdR FmQ/P7JgBYbxuWkHvlNVvL2rtJVIhBBvy4qox5OelCC97G3mKg== X-Google-Smtp-Source: ABdhPJySgxQtM4j9UV0hbyLlFs03JZ1tshPSlC0XaLG8pmrpASKc6GtU14DgV0qngB/HHFWhIjcXUI4NNpZ2JCDiZLM= X-Received: by 2002:a05:6214:d61:: with SMTP id 1mr55593988qvs.125.1641495901463; Thu, 06 Jan 2022 11:05:01 -0800 (PST) MIME-Version: 1.0 Date: Thu, 6 Jan 2022 20:04:50 +0100 Message-ID: Subject: [PATCH] i386: Improve HImode interunit moves To: "gcc-patches@gcc.gnu.org" X-Spam-Status: No, score=-7.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Uros Bizjak via Gcc-patches From: Uros Bizjak Reply-To: Uros Bizjak Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" Currently, the compiler moves HImode values between GPR and XMM registers with: %vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0} %vpextrw\t{$0, %1, %k0|%k0, %1, 0} but it could use slightly faster and shorter: %vmovd\t{%k1, %0|%0, %k1} %vmovd\t{%1, %k0|%k0, %1} 2022-01-06 Uroš Bizjak gcc/ChangeLog: * config/i386/i386.c (ix86_output_ssemov) : Add %q modifier for operands in general registers. : Add %q modifier for operands in general registers. * config/i386/i386.md (*movhi_internal): Change type attribute of xmm-gpr interunit alternatives 9,10 to ssemov and mode attribute to SImode for non-avx512fp16 targets. (*movhf_internal): Ditto for xmm-gpr interunit alternatives 6,8. * config/i386/mmx.md (*movv2qi_internal): Ditto for xmm-gpr interunit alternatives 8,9. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102811-2.c (dg-final): Update scan-assembler-times directives. * gcc.target/i386/sse2-float16-2.c (dg-final): Update scan-assembler directives. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1a964fe00f4..aeb7db5a5e3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5535,15 +5535,30 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) case MODE_DI: /* Handle broken assemblers that require movd instead of movq. */ - if (!HAVE_AS_IX86_INTERUNIT_MOVQ - && (GENERAL_REG_P (operands[0]) - || GENERAL_REG_P (operands[1]))) - return "%vmovd\t{%1, %0|%0, %1}"; + if (GENERAL_REG_P (operands[0])) + { + if (HAVE_AS_IX86_INTERUNIT_MOVQ) + return "%vmovq\t{%1, %q0|%q0, %1}"; + else + return "%vmovd\t{%1, %q0|%q0, %1}"; + } + else if (GENERAL_REG_P (operands[1])) + { + if (HAVE_AS_IX86_INTERUNIT_MOVQ) + return "%vmovq\t{%q1, %0|%0, %q1}"; + else + return "%vmovd\t{%q1, %0|%0, %q1}"; + } else return "%vmovq\t{%1, %0|%0, %1}"; case MODE_SI: - return "%vmovd\t{%1, %0|%0, %1}"; + if (GENERAL_REG_P (operands[0])) + return "%vmovd\t{%1, %k0|%k0, %1}"; + else if (GENERAL_REG_P (operands[1])) + return "%vmovd\t{%k1, %0|%0, %k1}"; + else + return "%vmovd\t{%1, %0|%0, %1}"; case MODE_HI: if (GENERAL_REG_P (operands[0])) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9b424a3935b..376df1d51d1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2580,13 +2580,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) @@ -2614,13 +2610,13 @@ (const_string "mskmov") (eq_attr "alternative" "8") (const_string "msklog") - (eq_attr "alternative" "9,10,13,14") + (eq_attr "alternative" "13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) (eq_attr "alternative" "11") (const_string "sselog1") - (eq_attr "alternative" "12") + (eq_attr "alternative" "9,10,12") (const_string "ssemov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") @@ -2644,7 +2640,11 @@ ] (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "9,10,13,14") + (cond [(eq_attr "alternative" "9,10") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) + (eq_attr "alternative" "13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) @@ -3876,13 +3876,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; default: if (get_attr_mode (insn) == MODE_SI) @@ -3901,9 +3897,9 @@ (set (attr "type") (cond [(eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "5") + (eq_attr "alternative" "5,6,8") (const_string "ssemov") - (eq_attr "alternative" "6,7,8,9") + (eq_attr "alternative" "7,9") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") @@ -3930,7 +3926,12 @@ (set (attr "mode") (cond [(eq_attr "alternative" "4") (const_string "V4SF") - (eq_attr "alternative" "6,7,8,9") + (eq_attr "alternative" "6,8") + (if_then_else + (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) + (eq_attr "alternative" "7,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a409bb7c6c6..8e0a6490b7b 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -389,13 +389,9 @@ return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) - return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else - return MEM_P (operands[0]) - ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); @@ -412,13 +408,13 @@ ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "6,7,8,9") + (cond [(eq_attr "alternative" "6,7") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) (eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "5") + (eq_attr "alternative" "5,8,9") (const_string "ssemov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") @@ -440,10 +436,14 @@ ] (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "6,7,8,9") + (cond [(eq_attr "alternative" "6,7") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) + (eq_attr "alternative" "8,9") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "SI")) (eq_attr "alternative" "4") (cond [(match_test "TARGET_AVX") (const_string "TI") diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c index e511c665ae8..97bc9b14e7f 100644 --- a/gcc/testsuite/gcc.target/i386/pr102811-2.c +++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c @@ -1,7 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ -/* { dg-final { scan-assembler-times "pextrw" 1 } } */ -/* { dg-final { scan-assembler-times "pinsrw" 1 } } */ +/* { dg-final { scan-assembler-times "vmovd" 2 } } */ /* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */ short test (_Float16 a) { diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c index 3da7683fc31..25e17231c1a 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c @@ -13,4 +13,5 @@ foo (union flt x) return x.flt; } -/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */ +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 } } } */ +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32 } } } } */