From patchwork Fri Jun 2 13:19:07 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: develop--- via Libc-alpha X-Patchwork-Id: 70507 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 1F7373858C36 for ; Fri, 2 Jun 2023 13:19:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 1F7373858C36 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1685711981; bh=X1Tftku1fhVFevFw6ZOS7YkYRVMhDLPbMPdk487XPlk=; h=To:Cc:Subject:Date:List-Id:List-Unsubscribe:List-Archive: List-Post:List-Help:List-Subscribe:From:Reply-To:From; b=iory+vZ/c5EkmVR2abMB8DL+O/Ph43xzaWeJqR9RVOiIiJ/QYnTxV8k5RQ2N+zfc5 Gr6HJYy8aLRi13AF81mBJG0P5zLQw/7N5towE7h4s0g85w+TEvC+AwR7bwpzUm0+LL ufpBS1xddOtVwYH3ErzfZxc/EKpDsy0bXcoJW9Sg= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pf1-x435.google.com (mail-pf1-x435.google.com [IPv6:2607:f8b0:4864:20::435]) by sourceware.org (Postfix) with ESMTPS id 45D8D3858D32 for ; Fri, 2 Jun 2023 13:19:17 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 45D8D3858D32 Received: by mail-pf1-x435.google.com with SMTP id d2e1a72fcca58-64d4e4598f0so2348344b3a.2 for ; Fri, 02 Jun 2023 06:19:17 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1685711956; x=1688303956; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=X1Tftku1fhVFevFw6ZOS7YkYRVMhDLPbMPdk487XPlk=; b=Sg2gjCrmn2pq2VPU+ooi6/sDMhea78fATDqwYMgvgkupDxiMPUdw1S6YAFAj5M4JBu Lxav1DoMPEVqugt48z7OnEqIBd3VmXm+z5TUmpQNylMTlkD286077uK1Fx4+eHcHcjVM vqRFJf85hVRFXz+OiKlQewuaRo4PXum97ITfP2lQLotFJ/8WDYN6A/o85/UZSN0nvA2d HWjegwS9g4QOKq5NziSv9d+nV7sWNoyyHc4gEVYXDiO5W5vnoUe1oNVZCTU2rCurALC3 R05svXnojMnXIckJVTlSJwVtWQ4JhsuQSjCYwx+KMcgCPLpn8OcwSCBttRBUYWT0SWmv V07w== X-Gm-Message-State: AC+VfDxiMJVg/8YchTfVrRctw44u5k4EoE0DKOtwPpd+x13kncWhpSvt TmL1I3P2vT+kfZcMcuZNnYW//t24jns7D1iQ X-Google-Smtp-Source: ACHHUZ7jIet/9wXr9bqA/AjBGPaC8B4xsSjhGxe4LBAZCILkE3/8aasN6nliLVNXvSrk+IJxqzjm0w== X-Received: by 2002:a05:6a20:e185:b0:109:38b4:a210 with SMTP id ks5-20020a056a20e18500b0010938b4a210mr11093316pzb.29.1685711955651; Fri, 02 Jun 2023 06:19:15 -0700 (PDT) Received: from lib-genoa-01.amd.com ([165.204.156.251]) by smtp.gmail.com with ESMTPSA id f16-20020a63f750000000b00528da88275bsm1201294pgk.47.2023.06.02.06.19.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 02 Jun 2023 06:19:15 -0700 (PDT) X-Google-Original-From: sajan.karumanchi@amd.com To: libc-alpha@sourceware.org, carlos@redhat.com, fweimer@redhat.com Cc: Sajan Karumanchi , premachandra.mallappa@amd.com Subject: [PATCH] x86: Fix for cache computation on AMD legacy cpus. Date: Fri, 2 Jun 2023 13:19:07 +0000 Message-Id: <20230602131907.1375745-1-sajan.karumanchi@amd.com> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: "sajan.karumanchi--- via Libc-alpha" From: develop--- via Libc-alpha Reply-To: sajan.karumanchi@gmail.com Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" From: Sajan Karumanchi Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' set to Zero, thus resulting in zeroed-out computed cache values. This patch reintroduces the old way of cache computation as a failsafe option to handle these exceptions. Reviewed-by: Premachandra Mallappa --- sysdeps/x86/dl-cacheinfo.h | 218 +++++++++++++++++++++++++++++++++---- 1 file changed, 194 insertions(+), 24 deletions(-) diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index 877e73d700..1d15b6bcd8 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -315,40 +315,203 @@ handle_amd (int name) { unsigned int eax; unsigned int ebx; - unsigned int ecx; + unsigned int ecx = 0; unsigned int edx; - unsigned int count = 0x1; + unsigned int max_cpuid = 0; + unsigned int fn = 0; /* No level 4 cache (yet). */ if (name > _SC_LEVEL3_CACHE_LINESIZE) return 0; - if (name >= _SC_LEVEL3_CACHE_SIZE) - count = 0x3; - else if (name >= _SC_LEVEL2_CACHE_SIZE) - count = 0x2; - else if (name >= _SC_LEVEL1_DCACHE_SIZE) - count = 0x0; + __cpuid(0x80000000, max_cpuid, ebx, ecx, edx); + + if (max_cpuid >= 0x8000001D) + /* Use __cpuid__ '0x8000_001D' to compute cache details. */ + { + unsigned int count = 0x1; + + if (name >= _SC_LEVEL3_CACHE_SIZE) + count = 0x3; + else if (name >= _SC_LEVEL2_CACHE_SIZE) + count = 0x2; + else if (name >= _SC_LEVEL1_DCACHE_SIZE) + count = 0x0; + + __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); + + if (ecx != 0) + { + switch (name) + { + case _SC_LEVEL1_ICACHE_ASSOC: + case _SC_LEVEL1_DCACHE_ASSOC: + case _SC_LEVEL2_CACHE_ASSOC: + case _SC_LEVEL3_CACHE_ASSOC: + return ((ebx >> 22) & 0x3ff) + 1; + case _SC_LEVEL1_ICACHE_LINESIZE: + case _SC_LEVEL1_DCACHE_LINESIZE: + case _SC_LEVEL2_CACHE_LINESIZE: + case _SC_LEVEL3_CACHE_LINESIZE: + return (ebx & 0xfff) + 1; + case _SC_LEVEL1_ICACHE_SIZE: + case _SC_LEVEL1_DCACHE_SIZE: + case _SC_LEVEL2_CACHE_SIZE: + case _SC_LEVEL3_CACHE_SIZE: + return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); + default: + __builtin_unreachable (); + } + return -1; + } + } + + /* Legacy cache computation when __cpuid__ 0x8000_001D is Zero */ + fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); + + if (max_cpuid < fn) + return 0; + + __cpuid (fn, eax, ebx, ecx, edx); - __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); + if (name < _SC_LEVEL1_DCACHE_SIZE) + { + name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; + ecx = edx; + } switch (name) { - case _SC_LEVEL1_ICACHE_ASSOC: - case _SC_LEVEL1_DCACHE_ASSOC: - case _SC_LEVEL2_CACHE_ASSOC: + case _SC_LEVEL1_DCACHE_SIZE: + return (ecx >> 14) & 0x3fc00; + + case _SC_LEVEL1_DCACHE_ASSOC: + ecx >>= 16; + if ((ecx & 0xff) == 0xff) + { + /* Fully associative. */ + return (ecx << 2) & 0x3fc00; + } + return ecx & 0xff; + + case _SC_LEVEL1_DCACHE_LINESIZE: + return ecx & 0xff; + + case _SC_LEVEL2_CACHE_SIZE: + return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; + + case _SC_LEVEL2_CACHE_ASSOC: + switch ((ecx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (ecx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); + default: + return 0; + } + + case _SC_LEVEL2_CACHE_LINESIZE: + return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; + + case _SC_LEVEL3_CACHE_SIZE: + { + long int total_l3_cache = 0, l3_cache_per_thread = 0; + unsigned int threads = 0; + const struct cpu_features *cpu_features; + + if ((edx & 0xf000) == 0) + return 0; + + total_l3_cache = (edx & 0x3ffc0000) << 1; + cpu_features = __get_cpu_features (); + + /* Figure out the number of logical threads that share L3. */ + if (max_cpuid >= 0x80000008) + { + /* Get width of APIC ID. */ + __cpuid (0x80000008, eax, ebx, ecx, edx); + threads = 1 << ((ecx >> 12) & 0x0f); + } + + if (threads == 0 || cpu_features->basic.family >= 0x17) + { + /* If APIC ID width is not available, use logical + processor count. */ + __cpuid (0x00000001, eax, ebx, ecx, edx); + if ((edx & (1 << 28)) != 0) + threads = (ebx >> 16) & 0xff; + } + + /* Cap usage of highest cache level to the number of + supported threads. */ + if (threads > 0) + l3_cache_per_thread = total_l3_cache/threads; + + /* Get shared cache per ccx for Zen architectures. */ + if (cpu_features->basic.family >= 0x17) + { + long int l3_cache_per_ccx = 0; + /* Get number of threads share the L3 cache in CCX. */ + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; + l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; + return l3_cache_per_ccx; + } + else + { + return l3_cache_per_thread; + } + } + case _SC_LEVEL3_CACHE_ASSOC: - return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; - case _SC_LEVEL1_ICACHE_LINESIZE: - case _SC_LEVEL1_DCACHE_LINESIZE: - case _SC_LEVEL2_CACHE_LINESIZE: + switch ((edx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (edx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); + default: + return 0; + } + case _SC_LEVEL3_CACHE_LINESIZE: - return ecx ? (ebx & 0xfff) + 1 : 0; - case _SC_LEVEL1_ICACHE_SIZE: - case _SC_LEVEL1_DCACHE_SIZE: - case _SC_LEVEL2_CACHE_SIZE: - case _SC_LEVEL3_CACHE_SIZE: - return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; + default: __builtin_unreachable (); } @@ -713,8 +876,15 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); if (shared <= 0) - /* No shared L3 cache. All we have is the L2 cache. */ - shared = core; + { + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + } + else if (cpu_features->basic.family < 0x17) + { + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } } cpu_features->level1_icache_size = level1_icache_size;