From patchwork Tue Nov 30 14:20:09 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 48289 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 204243857818 for ; Tue, 30 Nov 2021 14:21:24 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 204243857818 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1638282084; bh=vO6dC9iwZlZsrgTIRSeoIiFsq43b19ixpXl8eGaTTsg=; h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=HVdob7O727c4o/M9c4Aen5tF7zZXwFMJbznOSItjCPC+8zfOkLwBKHRK1/sxZhrU+ b5jqrD4+D0YCJ7sJ8BZ5ixjBZHY/OQeNT3pdm1HfMPC69V00lEV2Dnj+ispvbv3cDW pHUGYrT3caskIRkX/nooIqt9i52wlgRKBM6RtVHU= X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mail-io1-xd33.google.com (mail-io1-xd33.google.com [IPv6:2607:f8b0:4864:20::d33]) by sourceware.org (Postfix) with ESMTPS id 9C790385C409 for ; Tue, 30 Nov 2021 14:20:11 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9C790385C409 Received: by mail-io1-xd33.google.com with SMTP id z18so26236062iof.5 for ; Tue, 30 Nov 2021 06:20:11 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:subject:date:message-id:mime-version :content-transfer-encoding; bh=vO6dC9iwZlZsrgTIRSeoIiFsq43b19ixpXl8eGaTTsg=; b=NQg9hUmfmxcwZJ0YQEyPCBw3muGNJ5spCLhxGPCBGzyMQ87YhIuqB9OFoMrn1tMkwR NXUhtPYvHm8avPKOAIKNTm+FMu8LIzwbJJmFqvcVpX8nXbkHSaaGyCmdiT/62Pp8ARq8 aZD2XxgnbiYdRhCLX2tW2YST4DrGVrIQzXhfz/DME9Nn3i6qPLal6bYm9j3WtnCwT2yd gHIGDs9/imB8uLf6lAFgWYwLMuQA/vj6IIGPokg8hGSn2AML6wgdq4+HIzcLZaTUAnhf g+j2rdxANFKTllbq/sfa+0tB61DKmYXg/ppj662MzgeKO3dyy1/2axeqLfS/ExXm8sY0 AQvQ== X-Gm-Message-State: AOAM533YbD5lTxWyg6Huqc+vjbRP9PMmaPN/3i/oyUOykeB8Dcie2mB7 13+CMXP8n3/NU0hBmvQVt5l1VHWOILk= X-Google-Smtp-Source: ABdhPJzNrG1A5RTLb0yea6Z+RcwLLNWWAmVSORb90jQXTUpP6j2hj4QMo5o4fKp6X1ulj36OZT2ZpQ== X-Received: by 2002:a6b:ba05:: with SMTP id k5mr57998358iof.194.1638282010427; Tue, 30 Nov 2021 06:20:10 -0800 (PST) Received: from gnu-tgl-3.localdomain (208-92-185-234.scinternet.net. [208.92.185.234]) by smtp.gmail.com with ESMTPSA id k13sm10157387ilq.38.2021.11.30.06.20.09 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 30 Nov 2021 06:20:09 -0800 (PST) Received: from gnu-tgl-3.home (localhost [IPv6:::1]) by gnu-tgl-3.localdomain (Postfix) with ESMTP id 31190C0DC6 for ; Tue, 30 Nov 2021 06:20:09 -0800 (PST) To: gcc-patches@gcc.gnu.org Subject: [PATCH] libsanitizer: Use SSE to save and restore XMM registers Date: Tue, 30 Nov 2021 06:20:09 -0800 Message-Id: <20211130142009.832466-1-hjl.tools@gmail.com> X-Mailer: git-send-email 2.33.1 MIME-Version: 1.0 X-Spam-Status: No, score=-3031.5 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: "H.J. Lu via Gcc-patches" From: "H.J. Lu" Reply-To: "H.J. Lu" Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" Use SSE, instead of AVX, to save and restore XMM registers to support processors without AVX. The affected codes are unused in upstream since https://github.com/llvm/llvm-project/commit/66d4ce7e26a5 and will be removed in https://reviews.llvm.org/D112604 This fixed FAIL: g++.dg/tsan/pthread_cond_clockwait.C -O0 execution test FAIL: g++.dg/tsan/pthread_cond_clockwait.C -O2 execution test on machines without AVX. PR sanitizer/103466 * tsan/tsan_rtl_amd64.S (__tsan_trace_switch_thunk): Replace vmovdqu with movdqu. (__tsan_report_race_thunk): Likewise. --- libsanitizer/tsan/tsan_rtl_amd64.S | 128 ++++++++++++++--------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/libsanitizer/tsan/tsan_rtl_amd64.S b/libsanitizer/tsan/tsan_rtl_amd64.S index 632b19d1815..c15b01e49e5 100644 --- a/libsanitizer/tsan/tsan_rtl_amd64.S +++ b/libsanitizer/tsan/tsan_rtl_amd64.S @@ -45,22 +45,22 @@ ASM_SYMBOL(__tsan_trace_switch_thunk): # All XMM registers are caller-saved. sub $0x100, %rsp CFI_ADJUST_CFA_OFFSET(0x100) - vmovdqu %xmm0, 0x0(%rsp) - vmovdqu %xmm1, 0x10(%rsp) - vmovdqu %xmm2, 0x20(%rsp) - vmovdqu %xmm3, 0x30(%rsp) - vmovdqu %xmm4, 0x40(%rsp) - vmovdqu %xmm5, 0x50(%rsp) - vmovdqu %xmm6, 0x60(%rsp) - vmovdqu %xmm7, 0x70(%rsp) - vmovdqu %xmm8, 0x80(%rsp) - vmovdqu %xmm9, 0x90(%rsp) - vmovdqu %xmm10, 0xa0(%rsp) - vmovdqu %xmm11, 0xb0(%rsp) - vmovdqu %xmm12, 0xc0(%rsp) - vmovdqu %xmm13, 0xd0(%rsp) - vmovdqu %xmm14, 0xe0(%rsp) - vmovdqu %xmm15, 0xf0(%rsp) + movdqu %xmm0, 0x0(%rsp) + movdqu %xmm1, 0x10(%rsp) + movdqu %xmm2, 0x20(%rsp) + movdqu %xmm3, 0x30(%rsp) + movdqu %xmm4, 0x40(%rsp) + movdqu %xmm5, 0x50(%rsp) + movdqu %xmm6, 0x60(%rsp) + movdqu %xmm7, 0x70(%rsp) + movdqu %xmm8, 0x80(%rsp) + movdqu %xmm9, 0x90(%rsp) + movdqu %xmm10, 0xa0(%rsp) + movdqu %xmm11, 0xb0(%rsp) + movdqu %xmm12, 0xc0(%rsp) + movdqu %xmm13, 0xd0(%rsp) + movdqu %xmm14, 0xe0(%rsp) + movdqu %xmm15, 0xf0(%rsp) # Align stack frame. push %rbx # non-scratch CFI_ADJUST_CFA_OFFSET(8) @@ -78,22 +78,22 @@ ASM_SYMBOL(__tsan_trace_switch_thunk): pop %rbx CFI_ADJUST_CFA_OFFSET(-8) # Restore scratch registers. - vmovdqu 0x0(%rsp), %xmm0 - vmovdqu 0x10(%rsp), %xmm1 - vmovdqu 0x20(%rsp), %xmm2 - vmovdqu 0x30(%rsp), %xmm3 - vmovdqu 0x40(%rsp), %xmm4 - vmovdqu 0x50(%rsp), %xmm5 - vmovdqu 0x60(%rsp), %xmm6 - vmovdqu 0x70(%rsp), %xmm7 - vmovdqu 0x80(%rsp), %xmm8 - vmovdqu 0x90(%rsp), %xmm9 - vmovdqu 0xa0(%rsp), %xmm10 - vmovdqu 0xb0(%rsp), %xmm11 - vmovdqu 0xc0(%rsp), %xmm12 - vmovdqu 0xd0(%rsp), %xmm13 - vmovdqu 0xe0(%rsp), %xmm14 - vmovdqu 0xf0(%rsp), %xmm15 + movdqu 0x0(%rsp), %xmm0 + movdqu 0x10(%rsp), %xmm1 + movdqu 0x20(%rsp), %xmm2 + movdqu 0x30(%rsp), %xmm3 + movdqu 0x40(%rsp), %xmm4 + movdqu 0x50(%rsp), %xmm5 + movdqu 0x60(%rsp), %xmm6 + movdqu 0x70(%rsp), %xmm7 + movdqu 0x80(%rsp), %xmm8 + movdqu 0x90(%rsp), %xmm9 + movdqu 0xa0(%rsp), %xmm10 + movdqu 0xb0(%rsp), %xmm11 + movdqu 0xc0(%rsp), %xmm12 + movdqu 0xd0(%rsp), %xmm13 + movdqu 0xe0(%rsp), %xmm14 + movdqu 0xf0(%rsp), %xmm15 add $0x100, %rsp CFI_ADJUST_CFA_OFFSET(-0x100) pop %r11 @@ -163,22 +163,22 @@ ASM_SYMBOL(__tsan_report_race_thunk): # All XMM registers are caller-saved. sub $0x100, %rsp CFI_ADJUST_CFA_OFFSET(0x100) - vmovdqu %xmm0, 0x0(%rsp) - vmovdqu %xmm1, 0x10(%rsp) - vmovdqu %xmm2, 0x20(%rsp) - vmovdqu %xmm3, 0x30(%rsp) - vmovdqu %xmm4, 0x40(%rsp) - vmovdqu %xmm5, 0x50(%rsp) - vmovdqu %xmm6, 0x60(%rsp) - vmovdqu %xmm7, 0x70(%rsp) - vmovdqu %xmm8, 0x80(%rsp) - vmovdqu %xmm9, 0x90(%rsp) - vmovdqu %xmm10, 0xa0(%rsp) - vmovdqu %xmm11, 0xb0(%rsp) - vmovdqu %xmm12, 0xc0(%rsp) - vmovdqu %xmm13, 0xd0(%rsp) - vmovdqu %xmm14, 0xe0(%rsp) - vmovdqu %xmm15, 0xf0(%rsp) + movdqu %xmm0, 0x0(%rsp) + movdqu %xmm1, 0x10(%rsp) + movdqu %xmm2, 0x20(%rsp) + movdqu %xmm3, 0x30(%rsp) + movdqu %xmm4, 0x40(%rsp) + movdqu %xmm5, 0x50(%rsp) + movdqu %xmm6, 0x60(%rsp) + movdqu %xmm7, 0x70(%rsp) + movdqu %xmm8, 0x80(%rsp) + movdqu %xmm9, 0x90(%rsp) + movdqu %xmm10, 0xa0(%rsp) + movdqu %xmm11, 0xb0(%rsp) + movdqu %xmm12, 0xc0(%rsp) + movdqu %xmm13, 0xd0(%rsp) + movdqu %xmm14, 0xe0(%rsp) + movdqu %xmm15, 0xf0(%rsp) # Align stack frame. push %rbx # non-scratch CFI_ADJUST_CFA_OFFSET(8) @@ -196,22 +196,22 @@ ASM_SYMBOL(__tsan_report_race_thunk): pop %rbx CFI_ADJUST_CFA_OFFSET(-8) # Restore scratch registers. - vmovdqu 0x0(%rsp), %xmm0 - vmovdqu 0x10(%rsp), %xmm1 - vmovdqu 0x20(%rsp), %xmm2 - vmovdqu 0x30(%rsp), %xmm3 - vmovdqu 0x40(%rsp), %xmm4 - vmovdqu 0x50(%rsp), %xmm5 - vmovdqu 0x60(%rsp), %xmm6 - vmovdqu 0x70(%rsp), %xmm7 - vmovdqu 0x80(%rsp), %xmm8 - vmovdqu 0x90(%rsp), %xmm9 - vmovdqu 0xa0(%rsp), %xmm10 - vmovdqu 0xb0(%rsp), %xmm11 - vmovdqu 0xc0(%rsp), %xmm12 - vmovdqu 0xd0(%rsp), %xmm13 - vmovdqu 0xe0(%rsp), %xmm14 - vmovdqu 0xf0(%rsp), %xmm15 + movdqu 0x0(%rsp), %xmm0 + movdqu 0x10(%rsp), %xmm1 + movdqu 0x20(%rsp), %xmm2 + movdqu 0x30(%rsp), %xmm3 + movdqu 0x40(%rsp), %xmm4 + movdqu 0x50(%rsp), %xmm5 + movdqu 0x60(%rsp), %xmm6 + movdqu 0x70(%rsp), %xmm7 + movdqu 0x80(%rsp), %xmm8 + movdqu 0x90(%rsp), %xmm9 + movdqu 0xa0(%rsp), %xmm10 + movdqu 0xb0(%rsp), %xmm11 + movdqu 0xc0(%rsp), %xmm12 + movdqu 0xd0(%rsp), %xmm13 + movdqu 0xe0(%rsp), %xmm14 + movdqu 0xf0(%rsp), %xmm15 add $0x100, %rsp CFI_ADJUST_CFA_OFFSET(-0x100) pop %r11