From patchwork Sat Oct 15 00:20:56 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 58887 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 7C026385E442 for ; Sat, 15 Oct 2022 00:21:31 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7C026385E442 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1665793291; bh=SFEijbmXdXBpQ5hr6+0B4AUdc1+PmRc0lDFf2XCXVFM=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=NR0AmQoRoeZxpQpUqFP4YRNi5H8hx01mrO4ykD62ghiGMVT5wTGJmH+j+ktUt6nbO cQeAkfitQUk0lusvVOmC2+m7hnHc9tF9EDw+p9mJyZYegyhmmpjj7URzjJjzB9pUVA BmWSXdhZBg8f0/Q10ucOHv2xDkpwhNTaf/Iz/6tY= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pj1-x1029.google.com (mail-pj1-x1029.google.com [IPv6:2607:f8b0:4864:20::1029]) by sourceware.org (Postfix) with ESMTPS id E90A53857811 for ; Sat, 15 Oct 2022 00:21:07 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org E90A53857811 Received: by mail-pj1-x1029.google.com with SMTP id cl1so6226081pjb.1 for ; Fri, 14 Oct 2022 17:21:07 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=SFEijbmXdXBpQ5hr6+0B4AUdc1+PmRc0lDFf2XCXVFM=; b=mVC0K4fdQVKC3zP7XNJKB4+pv12UgQYMxXqqCZ45p41A7FgI9ze8CxObRHLaOIdAom YAOma6sVS+UEFnVEz/FMiwYvy4vGQLM6Kp1FPb8gKhM118VoY+4RXo/Ef5Cq6zatQeMj qeDRBFpSk9dcqIUKcUmD2F4CKYZbDM6zfCCtBnzeqVjY4EryyShF8HS8eMSLPkfKQWMX yFyVJAb36/OWSxdBSJtiWGiNb5Lkwx0uGDR8XxkqhgQC79nQzRSTtPIBtG9BEiIuxJBI RUQp9c1kTAv+G2w432aYJ8GrlnrKs6hiaq7Zxgk/EnmAzNBoPrKM90Rn8iWSoOgeD56l D4iA== X-Gm-Message-State: ACrzQf1bv9yB/38fUUSJbwReG4oyPbUXewL+S/qmFcGuXe8NMVlFvyjJ lQsnm9ZvNj2ZIhJ4ytRlEcA1DnU9W4FXBw== X-Google-Smtp-Source: AMsMyM7GDQ/P//AitKPkcm1U87l2VIPJJsqOvo//+NqvT9wkC76fYuwqc51NB0bQTcGrZDA7NPHmfQ== X-Received: by 2002:a17:902:f707:b0:184:e44f:88cc with SMTP id h7-20020a170902f70700b00184e44f88ccmr416051plo.42.1665793266601; Fri, 14 Oct 2022 17:21:06 -0700 (PDT) Received: from noahgold-DESK.. ([192.55.60.38]) by smtp.gmail.com with ESMTPSA id e13-20020a17090301cd00b00177c488fea5sm2293670plh.12.2022.10.14.17.21.05 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 14 Oct 2022 17:21:06 -0700 (PDT) To: libc-alpha@sourceware.org Subject: [PATCH v9 2/6] x86: Update memrchr to use new VEC macros Date: Fri, 14 Oct 2022 19:20:56 -0500 Message-Id: <20221015002100.129511-2-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221015002100.129511-1-goldstein.w.n@gmail.com> References: <20221014164008.1325863-1-goldstein.w.n@gmail.com> <20221015002100.129511-1-goldstein.w.n@gmail.com> MIME-Version: 1.0 X-Spam-Status: No, score=-11.1 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, URIBL_BLACK autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" Replace %VEC(n) -> %VMM(n) This commit does not change libc.so Tested build on x86-64 --- sysdeps/x86_64/multiarch/memrchr-evex.S | 42 ++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S index ea3a0a0a60..550b328c5a 100644 --- a/sysdeps/x86_64/multiarch/memrchr-evex.S +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S @@ -21,7 +21,7 @@ #if ISA_SHOULD_BUILD (4) # include -# include "evex256-vecs.h" +# include "x86-evex256-vecs.h" # if VEC_SIZE != 32 # error "VEC_SIZE != 32 unimplemented" # endif @@ -31,7 +31,7 @@ # endif # define PAGE_SIZE 4096 -# define VECMATCH VEC(0) +# define VMMMATCH VMM(0) .section SECTION(.text), "ax", @progbits ENTRY_P2ALIGN(MEMRCHR, 6) @@ -47,7 +47,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6) correct page cross check and 2) it correctly sets up end ptr to be subtract by lzcnt aligned. */ leaq -1(%rdi, %rdx), %rax - vpbroadcastb %esi, %VECMATCH + vpbroadcastb %esi, %VMMMATCH /* Check if we can load 1x VEC without cross a page. */ testl $(PAGE_SIZE - VEC_SIZE), %eax @@ -55,7 +55,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6) /* Don't use rax for pointer here because EVEX has better encoding with offset % VEC_SIZE == 0. */ - vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VMMMATCH, %k0 kmovd %k0, %ecx /* Fall through for rdx (len) <= VEC_SIZE (expect small sizes). */ @@ -96,7 +96,7 @@ L(more_1x_vec): movq %rax, %rdx /* Need no matter what. */ - vpcmpb $0, -(VEC_SIZE)(%rax), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx subq %rdi, %rdx @@ -115,7 +115,7 @@ L(last_2x_vec): /* Don't use rax for pointer here because EVEX has better encoding with offset % VEC_SIZE == 0. */ - vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VMMMATCH, %k0 kmovd %k0, %ecx /* NB: 64-bit lzcnt. This will naturally add 32 to position. */ lzcntq %rcx, %rcx @@ -131,7 +131,7 @@ L(last_2x_vec): L(page_cross): movq %rax, %rsi andq $-VEC_SIZE, %rsi - vpcmpb $0, (%rsi), %VECMATCH, %k0 + vpcmpb $0, (%rsi), %VMMMATCH, %k0 kmovd %k0, %r8d /* Shift out negative alignment (because we are starting from endptr and working backwards). */ @@ -165,13 +165,13 @@ L(more_2x_vec): testl %ecx, %ecx jnz L(ret_vec_x0_dec) - vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx testl %ecx, %ecx jnz L(ret_vec_x1) /* Need no matter what. */ - vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx subq $(VEC_SIZE * 4), %rdx @@ -185,7 +185,7 @@ L(last_vec): /* Need no matter what. */ - vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx lzcntl %ecx, %ecx subq $(VEC_SIZE * 3 + 1), %rax @@ -220,7 +220,7 @@ L(more_4x_vec): testl %ecx, %ecx jnz L(ret_vec_x2) - vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0 + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx testl %ecx, %ecx @@ -243,17 +243,17 @@ L(more_4x_vec): L(loop_4x_vec): /* Store 1 were not-equals and 0 where equals in k1 (used to mask later on). */ - vpcmpb $4, (VEC_SIZE * 3)(%rax), %VECMATCH, %k1 + vpcmpb $4, (VEC_SIZE * 3)(%rax), %VMMMATCH, %k1 /* VEC(2/3) will have zero-byte where we found a CHAR. */ - vpxorq (VEC_SIZE * 2)(%rax), %VECMATCH, %VEC(2) - vpxorq (VEC_SIZE * 1)(%rax), %VECMATCH, %VEC(3) - vpcmpb $0, (VEC_SIZE * 0)(%rax), %VECMATCH, %k4 + vpxorq (VEC_SIZE * 2)(%rax), %VMMMATCH, %VMM(2) + vpxorq (VEC_SIZE * 1)(%rax), %VMMMATCH, %VMM(3) + vpcmpb $0, (VEC_SIZE * 0)(%rax), %VMMMATCH, %k4 /* Combine VEC(2/3) with min and maskz with k1 (k1 has zero bit where CHAR is found and VEC(2/3) have zero-byte where CHAR is found. */ - vpminub %VEC(2), %VEC(3), %VEC(3){%k1}{z} - vptestnmb %VEC(3), %VEC(3), %k2 + vpminub %VMM(2), %VMM(3), %VMM(3){%k1}{z} + vptestnmb %VMM(3), %VMM(3), %k2 /* Any 1s and we found CHAR. */ kortestd %k2, %k4 @@ -270,7 +270,7 @@ L(loop_4x_vec): L(last_4x_vec): /* Used no matter what. */ - vpcmpb $0, (VEC_SIZE * -1)(%rax), %VECMATCH, %k0 + vpcmpb $0, (VEC_SIZE * -1)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx cmpl $(VEC_SIZE * 2), %edx @@ -280,14 +280,14 @@ L(last_4x_vec): jnz L(ret_vec_x0_dec) - vpcmpb $0, (VEC_SIZE * -2)(%rax), %VECMATCH, %k0 + vpcmpb $0, (VEC_SIZE * -2)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx testl %ecx, %ecx jnz L(ret_vec_x1) /* Used no matter what. */ - vpcmpb $0, (VEC_SIZE * -3)(%rax), %VECMATCH, %k0 + vpcmpb $0, (VEC_SIZE * -3)(%rax), %VMMMATCH, %k0 kmovd %k0, %ecx cmpl $(VEC_SIZE * 3), %edx @@ -309,7 +309,7 @@ L(loop_end): testl %ecx, %ecx jnz L(ret_vec_x0_end) - vptestnmb %VEC(2), %VEC(2), %k0 + vptestnmb %VMM(2), %VMM(2), %k0 kmovd %k0, %ecx testl %ecx, %ecx jnz L(ret_vec_x1_end)