From patchwork Thu Mar 4 16:34:56 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Szabolcs Nagy X-Patchwork-Id: 42258 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id D739A3AAA0D4; Thu, 4 Mar 2021 16:35:34 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D739A3AAA0D4 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1614875734; bh=4uZEUyPD4xQFCJseLHLSd7Qw52xc/t/Qo+IQGDIkPec=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=gHta4vJ+4VLDKnOaVy/+XvtucIaO9LpcRaE5H2SZjSMG9Va12lxGK9TnCw0Pmayjs kPw98w3ObpLlk7N5TC6VYYKw9K4C1WIQkoe10i6HnDyrP4W1sum+OHNz74A11rY4Yi X0qaVqW+LpJkpVwodS+5e9hk9m1uqIZ6NBphkjR4= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from EUR03-VE1-obe.outbound.protection.outlook.com (mail-eopbgr50060.outbound.protection.outlook.com [40.107.5.60]) by sourceware.org (Postfix) with ESMTPS id A1373386F46E for ; Thu, 4 Mar 2021 16:35:30 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org A1373386F46E Received: from AM6P194CA0108.EURP194.PROD.OUTLOOK.COM (2603:10a6:209:8f::49) by DB6PR0801MB1749.eurprd08.prod.outlook.com (2603:10a6:4:3b::8) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.3912.19; Thu, 4 Mar 2021 16:35:28 +0000 Received: from AM5EUR03FT007.eop-EUR03.prod.protection.outlook.com (2603:10a6:209:8f:cafe::c6) by AM6P194CA0108.outlook.office365.com (2603:10a6:209:8f::49) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.3912.17 via Frontend Transport; Thu, 4 Mar 2021 16:35:28 +0000 X-MS-Exchange-Authentication-Results: spf=pass (sender IP is 63.35.35.123) smtp.mailfrom=arm.com; sourceware.org; dkim=pass (signature was verified) header.d=armh.onmicrosoft.com;sourceware.org; dmarc=pass action=none header.from=arm.com; Received-SPF: Pass (protection.outlook.com: domain of arm.com designates 63.35.35.123 as permitted sender) receiver=protection.outlook.com; client-ip=63.35.35.123; helo=64aa7808-outbound-1.mta.getcheckrecipient.com; Received: from 64aa7808-outbound-1.mta.getcheckrecipient.com (63.35.35.123) by AM5EUR03FT007.mail.protection.outlook.com (10.152.16.145) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.3912.17 via Frontend Transport; Thu, 4 Mar 2021 16:35:27 +0000 Received: ("Tessian outbound 72bd4bdd1f59:v71"); Thu, 04 Mar 2021 16:35:26 +0000 X-CheckRecipientChecked: true X-CR-MTA-CID: 043cbbc893124910 X-CR-MTA-TID: 64aa7808 Received: from 6040653d020c.1 by 64aa7808-outbound-1.mta.getcheckrecipient.com id F40A11D7-59D6-4256-832E-EAB8AC6C593B.1; Thu, 04 Mar 2021 16:35:12 +0000 Received: from EUR05-VI1-obe.outbound.protection.outlook.com by 64aa7808-outbound-1.mta.getcheckrecipient.com with ESMTPS id 6040653d020c.1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384); Thu, 04 Mar 2021 16:35:12 +0000 ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=eApdy0WDYIJSJmyeKPBEf98ULS8onHP7nwhbasdAkGp7zzsNCOs7L074H8r0JaVNihb+5S1PkxDD4V1iAVwtg6zGCI4S5Y6bML7HKkJEO8i6frhDBHgIsoLvSquXUkzfo+RecmpOlfk8ilrcE73XvU85vo84ZjEB60zoP2Y+/c3vtiw84TqQA7id7Dd0nGQGVwgjAk7BvlfOk9et8lcIkA6y4iWAlAQAXXYf1O/DV6vK/acTLqWhVJuLcHQcfLudZpgml9r/n3mCfSrgLkk0l04ZiefuWNcolsyx2BvVLr0nToFU+PBzIsf+SAwvnWdhDWkFfn7DBYkuEZOVoRXyfA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=4uZEUyPD4xQFCJseLHLSd7Qw52xc/t/Qo+IQGDIkPec=; b=Xrox5qE+afsFjG9JlOzSSihjAvKS4pyre6UXOJAy00d6aRi7Q95sAhznS/XKh/Oh9i2qQV1Zzv9xojmIT3EKMsnBPXVHSPGWHZwAkmal8zOYzcrZ0+ezDk7yKHDa/deOtbGON79HXeaL45wXAPjHN+TRjMRiiJ5BzAxn6+U8e+pCfzjWKVnBWIUiIniPeCJoCXaahyUTdxa7Kc8tS8spl4KgMu7yxhNqDizW6+YhatW4l31q3QkPyIbRu6vUWx+J4HaEDJ32g7WlBrcUljjgp3GzXr9QioOddlZqO8XwUYUDA7cn94iYHsr/2wseiZP/iW/IqS3QD0qZZLesSljClg== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none Authentication-Results-Original: sourceware.org; dkim=none (message not signed) header.d=none;sourceware.org; dmarc=none action=none header.from=arm.com; Received: from PA4PR08MB6320.eurprd08.prod.outlook.com (2603:10a6:102:e5::9) by PAXPR08MB6574.eurprd08.prod.outlook.com (2603:10a6:102:12d::20) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.3912.17; Thu, 4 Mar 2021 16:35:02 +0000 Received: from PA4PR08MB6320.eurprd08.prod.outlook.com ([fe80::60f0:3773:69b8:e336]) by PA4PR08MB6320.eurprd08.prod.outlook.com ([fe80::60f0:3773:69b8:e336%2]) with mapi id 15.20.3912.021; Thu, 4 Mar 2021 16:35:02 +0000 To: libc-alpha@sourceware.org, Richard.Earnshaw@arm.com, DJ Delorie Subject: [PATCH 16/16] aarch64: Optimize __libc_mtag_tag_zero_region Date: Thu, 4 Mar 2021 16:34:56 +0000 Message-Id: <8d06a578d4431a8bfb2f424ec60fa442041a1173.1614874816.git.szabolcs.nagy@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: References: X-Originating-IP: [217.140.106.55] X-ClientProxiedBy: LNXP265CA0079.GBRP265.PROD.OUTLOOK.COM (2603:10a6:600:76::19) To PA4PR08MB6320.eurprd08.prod.outlook.com (2603:10a6:102:e5::9) MIME-Version: 1.0 X-MS-Exchange-MessageSentRepresentingType: 1 Received: from localhost.localdomain (217.140.106.55) by LNXP265CA0079.GBRP265.PROD.OUTLOOK.COM (2603:10a6:600:76::19) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.3890.23 via Frontend Transport; Thu, 4 Mar 2021 16:35:02 +0000 X-MS-PublicTrafficType: Email X-MS-Office365-Filtering-HT: Tenant X-MS-Office365-Filtering-Correlation-Id: cff06c59-74b3-444c-a993-08d8df2b8499 X-MS-TrafficTypeDiagnostic: PAXPR08MB6574:|DB6PR0801MB1749: X-MS-Exchange-Transport-Forked: True X-Microsoft-Antispam-PRVS: x-checkrecipientrouted: true NoDisclaimer: true X-MS-Oob-TLC-OOBClassifiers: OLM:10000;OLM:10000; X-MS-Exchange-SenderADCheck: 1 X-Microsoft-Antispam-Untrusted: BCL:0; X-Microsoft-Antispam-Message-Info-Original: ozfvQFrOhsVaxCfpbb6UyEB1e/JtGAqU9ymjyvWIYPMyaceJt2uYuUeeNd6d29uQg4sdrM84n9tmG6TwBpID7VJN9rbVBfVxW9TMvsa8i+FhGJdNGH6dMwRIdVZX12Yvd+qMlZJhCZzU0uxB9DkV74XNngc+t79W9GMIQnnZ7ZyjVubGLjaKUOx3T6s0+cGNHrhyzuLAMuRKpAltFM1vY0fc82KxCsapqElprSYhQS8bWsrsWCLw1SB2wSl0911W7EoFHj2nTVtCY7/IjwpXPxqn5/T6NmefYQ+0Sqipa9sr7Rlc7MAVsYNGwU9rwktQ46niZSIaHJkzali8hYDzldWTBKKvuGCmzBR4EkiAhFb9O3/m6O690V1QZrouh4cdBA0BbRB9RL4IyzyufLvSLR0KokzrtDH1sF1K56n2+cErpcu++FUXGzpGp30pD45+uuQKN+0r1zh4zJAws5IJpc/x9QEK8ztVYQgXp+mH6HQGSaxxTzyCmwKjjkool/L6RgoD/E1DOeplWe2rasq9wi6pQRlkpYTk1J/1Dp44kRDgTcMCgaMb/qRuFKb7m2dWXKJyyl9fu0mVhxitEL5hbw== X-Forefront-Antispam-Report-Untrusted: CIP:255.255.255.255; CTRY:; LANG:en; SCL:1; SRV:; IPV:NLI; SFV:NSPM; H:PA4PR08MB6320.eurprd08.prod.outlook.com; PTR:; CAT:NONE; SFS:(4636009)(366004)(136003)(346002)(376002)(396003)(39850400004)(69590400012)(86362001)(66556008)(83380400001)(6666004)(478600001)(36756003)(6486002)(6916009)(44832011)(8936002)(2906002)(2616005)(316002)(6512007)(186003)(16526019)(52116002)(5660300002)(26005)(956004)(66476007)(66946007)(8676002)(6506007); DIR:OUT; SFP:1101; X-MS-Exchange-AntiSpam-MessageData: T8K8cOCCTSaGdp9PeDDbhTabrmvW4bPZWatd1ntYaFnS/Hupa9BqmAJaCsr2V6lPfpK9c3Ruz6wjkjH6IfUUxPA3J4rFFJOl7UxmE1VXdsN5z9VbjCwkVrLNPguz1DDZUczb5iFPlvFjJhoTlrn4w8XgXw8vjBXssnqjHsP/lQzjLaTaDQYfXPYlhfeQsUaND3y/5wV1jC0cvFvU7DALzpvxVSTA5mj/OxNZz8eeKSkBEnEjTpIGYSQxChlZWyHUadxY7GcdVo5w0Ms0bm0gKBdpL8Owckabh/ZVHcvpBGAedbf74S65NwOYag8n5zVxbcsm2fKXP9fYdSId6Ctttx4c73LLe4Er41DF04E5xpkH/KLOQfrppcvxOqJmuKXub7+zIyxP9ESjXHofmN9fq2Qk3UrlZkq9cFjuUaQotIjKITJGOrFyyJ3AJiy9/enWwK3zpFnmmuZDFwvsUsYyy2WFoTcgQJ7Cx4cbDK+lEZ2ujmXyLZELDSmTwDK6Y3DqRo6ADAf7NBbtKb8ysHCCECn1uIHWmkE5mXfVJFRdNWuEWSADujFdGP7pu7RPvUn0Gix+6KF62XN0QS7kKNRy38ayuvtCw8ZCCq0yhoXm6E6W1gtQLJKYOfnaOxvJWYfLBYvjivcASAQqF+t/L/wMce++xhny1cvisRZNgW8onwfV+HVPmLoo7hJs5DwH84cVtTeM4gykDBA7fllzuRP0x6/fQean4teH3xwTafjDg2VlZO+VafCPGGXXaDb5Fi/aopfYeFk4wxDbYczO6BsTF+6mCzRk0GKeZ3X9Hg7CqO5Nl9YWR9jz5HkYIfrJVqHMuUUSGTnzpRheEFvKiOGfBi2S0YB29wwq+33eklFfQWqkZ9I27LcdOOLX/W7WSBgvtNTfyFOzg9iXRxy4ozg690dZd4lVYAu/qSs/os4VK+XSknk2cgdUIaU4mRTzI6esSjLUwUry2ofhNjEjmhm/RlDWAb0+nzlWCgWo+jLm1YmDSJPc2x6OKD2JK5FVjtezPgK6PYJIQMDmiN/bXH8OKJ8wq3pvJOXYDmj/V45u3EeXZ+fcNaQKpgSXr4bKMUL6TQTSjrPLMr5HWcjVxmWgk5AHR//zE7+qFjAh0bL5upNZLfQzMOMwGKqz17GqZRDWmd4z4yjtWlI8U9IsMX4dh5x/eFxlyJxi8Z1t6LtS06W/K4zzFj2CzRRu2mmKJl5ZNBBPYEy48ou5m9oZPVL6d4O52n78W/fHzarYcZB8rc08BfRwUcz+PnQ6vW7Y95PTBXEdu5RdpBtWNRKtiZheUXNcy/4In6ZiLcf9eLqiJ6DK8751764QKHfWX88BXfaz X-MS-Exchange-Transport-CrossTenantHeadersStamped: PAXPR08MB6574 Original-Authentication-Results: sourceware.org; dkim=none (message not signed) header.d=none; sourceware.org; dmarc=none action=none header.from=arm.com; X-EOPAttributedMessage: 0 X-MS-Exchange-Transport-CrossTenantHeadersStripped: AM5EUR03FT007.eop-EUR03.prod.protection.outlook.com X-MS-Office365-Filtering-Correlation-Id-Prvs: 1f9bc9ba-5565-4c5d-36d8-08d8df2b7588 X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: y9lpeEcdcF/I28k3R6Ol9q1EhOe7j2zTyTDfwTqCubVcqpD4saXwN4DiwkAtzeXZ8gINDRW8ZmVI0Oy3XaBAl5XZoisIwJaGqQl/7aOFNnqXKwYqTI0guDIFsc8OsVKm/FhMtXSDT5xD7xPjJw4dXfs28Azabqj40xgtaKDVhxn51yCv9NsobEuCd+MLtC1TFEGkd8UYiRNxgRwGcib12a1YjY8At4ovhIq2CXYYxxk7P6aOR1N8e8PIHYdtK/SAsbVDbj/V5srfU5yrz4t5v6gtL45WdVme6z6dITRffrTFXTrqTPDsMNmYveviZhwe6M0LOAbUdGFhstwuS4+f+9S2qL8/mBrj4mguPhl8GkMN0jxk290Q1xDOzqx4X27bkSnQd6lNwt4VKsEDnV/8CRE16FDn00zZLdb5O/C9YfGzSCFOtxxiGzzIyrGTm8fz3VjcX0TZABD+2dCzPDWEtE9twwLJIuJfMYZcpst12xHOpYzpVLSitPrSiaHhwzQ7RRHi3E8jkI+U0n/81Cg/fm/nc36oX+/DqRgG42kRC4WSvI2ZgQ+jbFooGhopct2Po1JflpEUozKWs5fNBO/US3ojs6cXMwkO1wlhW8lC00KdBHT369VHRfJuQBOTJkMMPlkRUcTbjICeG6kBTyOH4pVhEl/WugxrNPWZT4i0F1mIwQyZm/iH0OhJSuBMm0DO X-Forefront-Antispam-Report: CIP:63.35.35.123; CTRY:IE; LANG:en; SCL:1; SRV:; IPV:CAL; SFV:NSPM; H:64aa7808-outbound-1.mta.getcheckrecipient.com; PTR:ec2-63-35-35-123.eu-west-1.compute.amazonaws.com; CAT:NONE; SFS:(4636009)(39850400004)(396003)(376002)(346002)(136003)(36840700001)(46966006)(82310400003)(6512007)(69590400012)(356005)(36860700001)(36756003)(81166007)(47076005)(82740400003)(83380400001)(86362001)(8676002)(44832011)(70206006)(6486002)(70586007)(6506007)(2616005)(956004)(186003)(16526019)(6666004)(2906002)(336012)(26005)(8936002)(316002)(478600001)(5660300002)(6862004); DIR:OUT; SFP:1101; X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-OriginalArrivalTime: 04 Mar 2021 16:35:27.3536 (UTC) X-MS-Exchange-CrossTenant-Network-Message-Id: cff06c59-74b3-444c-a993-08d8df2b8499 X-MS-Exchange-CrossTenant-Id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp: TenantId=f34e5979-57d9-4aaa-ad4d-b122a662184d; Ip=[63.35.35.123]; Helo=[64aa7808-outbound-1.mta.getcheckrecipient.com] X-MS-Exchange-CrossTenant-AuthSource: AM5EUR03FT007.eop-EUR03.prod.protection.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Anonymous X-MS-Exchange-CrossTenant-FromEntityHeader: HybridOnPrem X-MS-Exchange-Transport-CrossTenantHeadersStamped: DB6PR0801MB1749 X-Spam-Status: No, score=-13.9 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, GIT_PATCH_0, MSGID_FROM_MTA_HEADER, RCVD_IN_DNSWL_NONE, RCVD_IN_MSPIKE_H2, SPF_HELO_PASS, SPF_PASS, TXREP, UNPARSEABLE_RELAY autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Szabolcs Nagy via Libc-alpha From: Szabolcs Nagy Reply-To: Szabolcs Nagy Errors-To: libc-alpha-bounces@sourceware.org Sender: "Libc-alpha" This is a target hook for memory tagging, the original was a naive implementation. Uses the same algorithm as __libc_mtag_tag_region, but with instructions that also zero the memory. This was not benchmarked on real cpu, but expected to be faster than the naive implementation. --- sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++---- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S index 74d398bba5..7d955fbd0c 100644 --- a/sysdeps/aarch64/__mtag_tag_zero_region.S +++ b/sysdeps/aarch64/__mtag_tag_zero_region.S @@ -20,30 +20,94 @@ #ifdef USE_MTAG +/* Assumptions: + * + * ARMv8-a, AArch64, MTE, LP64 ABI. + * + * Interface contract: + * Address is 16 byte aligned and size is multiple of 16. + * Returns the passed pointer. + * The memory region may remain untagged if tagging is not enabled. + */ .arch armv8.5-a .arch_extension memtag -/* NB, only supported on variants with 64-bit pointers. */ +#define dstin x0 +#define count x1 +#define dst x2 +#define dstend x3 +#define tmp x4 +#define zva_val x4 -/* FIXME: This is a minimal implementation. We could do much better than - this for large values of COUNT. */ +ENTRY (__libc_mtag_tag_zero_region) + PTR_ARG (0) + SIZE_ARG (1) -#define dstin x0 -#define count x1 -#define dst x2 + add dstend, dstin, count -ENTRY(__libc_mtag_tag_zero_region) + cmp count, 96 + b.hi L(set_long) - mov dst, dstin -L(loop): - stzg dst, [dst], #16 - subs count, count, 16 - bne L(loop) -#if 0 - /* This is not currently needed, since for now we are only called - to tag memory that is taggable. */ - ldg dstin, [dstin] // Recover the tag created (might be untagged). + tbnz count, 6, L(set96) + + /* Set 0, 16, 32, or 48 bytes. */ + lsr tmp, count, 5 + add tmp, dstin, tmp, lsl 4 + cbz count, L(end) + stzg dstin, [dstin] + stzg dstin, [tmp] + stzg dstin, [dstend, -16] +L(end): + ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + stz2g dstin, [dstend, -32] + ret + + .p2align 4 + /* Size is > 96 bytes. */ +L(set_long): + cmp count, 160 + b.lo L(no_zva) + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(no_zva) #endif + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +L(zva_loop): + add dst, dst, 64 + dc gzva, dst + subs count, count, 64 + b.hi L(zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] ret + +L(no_zva): + sub dst, dstin, 32 /* Dst is biased by -32. */ + sub count, count, 64 /* Adjust count for loop. */ +L(no_zva_loop): + stz2g dstin, [dst, 32] + stz2g dstin, [dst, 64]! + subs count, count, 64 + b.hi L(no_zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] + ret + END (__libc_mtag_tag_zero_region) #endif /* USE_MTAG */