diff options
395 files changed, 12329 insertions, 1828 deletions
diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst index b5fbf54dca19..5ea9f4a3b76e 100644 --- a/Documentation/admin-guide/acpi/ssdt-overlays.rst +++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst @@ -103,7 +103,7 @@ allows a persistent, OS independent way of storing the user defined SSDTs. There is also work underway to implement EFI support for loading user defined SSDTs and using this method will make it easier to convert to the EFI loading mechanism when that will arrive. To enable it, the -CONFIG_EFI_CUSTOM_SSDT_OVERLAYS shoyld be chosen to y. +CONFIG_EFI_CUSTOM_SSDT_OVERLAYS should be chosen to y. In order to load SSDTs from an EFI variable the ``"efivar_ssdt=..."`` kernel command line parameter can be used (the name has a limitation of 16 characters). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 85fb0fa5d091..a1457995fd41 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4064,7 +4064,7 @@ extra details on the taint flags that users can pick to compose the bitmask to assign to panic_on_taint. - panic_on_warn panic() instead of WARN(). Useful to cause kdump + panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump on a WARN(). parkbd.port= [HW] Parallel port number the keyboard adapter is diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 67bd239ead0b..38c0b5213736 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -25,6 +25,7 @@ description: | allOf: - $ref: /schemas/cpu.yaml# + - $ref: extensions.yaml properties: compatible: @@ -82,25 +83,6 @@ properties: description: The blocksize in bytes for the Zicboz cache operations. - riscv,isa: - description: - Identifies the specific RISC-V instruction set architecture - supported by the hart. These are documented in the RISC-V - User-Level ISA document, available from - https://riscv.org/specifications/ - - Due to revisions of the ISA specification, some deviations - have arisen over time. - Notably, riscv,isa was defined prior to the creation of the - Zicntr, Zicsr, Zifencei and Zihpm extensions and thus "i" - implies "zicntr_zicsr_zifencei_zihpm". - - While the isa strings in ISA specification are case - insensitive, letters in the riscv,isa string must be all - lowercase. - $ref: /schemas/types.yaml#/definitions/string - pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ - # RISC-V has multiple properties for cache op block sizes as the sizes # differ between individual CBO extensions cache-op-block-size: false @@ -139,8 +121,17 @@ properties: DMIPS/MHz, relative to highest capacity-dmips-mhz in the system. +anyOf: + - required: + - riscv,isa + - required: + - riscv,isa-base + +dependencies: + riscv,isa-base: [ "riscv,isa-extensions" ] + riscv,isa-extensions: [ "riscv,isa-base" ] + required: - - riscv,isa - interrupt-controller unevaluatedProperties: false @@ -160,7 +151,9 @@ examples: i-cache-sets = <128>; i-cache-size = <16384>; reg = <0>; - riscv,isa = "rv64imac"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "c"; + cpu_intc0: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -183,8 +176,10 @@ examples: i-tlb-size = <32>; mmu-type = "riscv,sv39"; reg = <1>; - riscv,isa = "rv64imafdc"; tlb-split; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c"; + cpu_intc1: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -202,8 +197,10 @@ examples: device_type = "cpu"; reg = <0>; compatible = "riscv"; - riscv,isa = "rv64imafdc"; mmu-type = "riscv,sv48"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c"; + interrupt-controller { #interrupt-cells = <1>; interrupt-controller; diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml new file mode 100644 index 000000000000..cc1f546fdbdc --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: (GPL-2.0 OR MIT) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/extensions.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: RISC-V ISA extensions + +maintainers: + - Paul Walmsley <[email protected]> + - Palmer Dabbelt <[email protected]> + - Conor Dooley <[email protected]> + +description: | + RISC-V has a large number of extensions, some of which are "standard" + extensions, meaning they are ratified by RISC-V International, and others + are "vendor" extensions. + This document defines properties that indicate whether a hart supports a + given extension. + + Once a standard extension has been ratified, no changes in behaviour can be + made without the creation of a new extension. + The properties for standard extensions therefore map to their originally + ratified states, with the exception of the I, Zicntr & Zihpm extensions. + See the "i" property for more information. + +select: + properties: + compatible: + contains: + const: riscv + +properties: + riscv,isa: + description: + Identifies the specific RISC-V instruction set architecture + supported by the hart. These are documented in the RISC-V + User-Level ISA document, available from + https://riscv.org/specifications/ + + Due to revisions of the ISA specification, some deviations + have arisen over time. + Notably, riscv,isa was defined prior to the creation of the + Zicntr, Zicsr, Zifencei and Zihpm extensions and thus "i" + implies "zicntr_zicsr_zifencei_zihpm". + + While the isa strings in ISA specification are case + insensitive, letters in the riscv,isa string must be all + lowercase. + $ref: /schemas/types.yaml#/definitions/string + pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + deprecated: true + + riscv,isa-base: + description: + The base ISA implemented by this hart, as described by the 20191213 + version of the unprivileged ISA specification. + enum: + - rv32i + - rv64i + + riscv,isa-extensions: + $ref: /schemas/types.yaml#/definitions/string-array + minItems: 1 + description: Extensions supported by the hart. + items: + anyOf: + # single letter extensions, in canonical order + - const: i + description: | + The base integer instruction set, as ratified in the 20191213 + version of the unprivileged ISA specification. + + This does not include Chapter 10, "Counters", which was moved into + the Zicntr and Zihpm extensions after the ratification of the + 20191213 version of the unprivileged specification. + + - const: m + description: + The standard M extension for integer multiplication and division, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: a + description: + The standard A extension for atomic instructions, as ratified in the + 20191213 version of the unprivileged ISA specification. + + - const: f + description: + The standard F extension for single-precision floating point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: d + description: + The standard D extension for double-precision floating-point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: q + description: + The standard Q extension for quad-precision floating-point, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: c + description: + The standard C extension for compressed instructions, as ratified in + the 20191213 version of the unprivileged ISA specification. + + - const: v + description: + The standard V extension for vector operations, as ratified + in-and-around commit 7a6c8ae ("Fix text that describes vfmv.v.f + encoding") of the riscv-v-spec. + + - const: h + description: + The standard H extension for hypervisors as ratified in the 20191213 + version of the privileged ISA specification. + + # multi-letter extensions, sorted alphanumerically + - const: smaia + description: | + The standard Smaia supervisor-level extension for the advanced + interrupt architecture for machine-mode-visible csr and behavioural + changes to interrupts as frozen at commit ccbddab ("Merge pull + request #42 from riscv/jhauser-2023-RC4") of riscv-aia. + + - const: ssaia + description: | + The standard Ssaia supervisor-level extension for the advanced + interrupt architecture for supervisor-mode-visible csr and + behavioural changes to interrupts as frozen at commit ccbddab + ("Merge pull request #42 from riscv/jhauser-2023-RC4") of riscv-aia. + + - const: sscofpmf + description: | + The standard Sscofpmf supervisor-level extension for count overflow + and mode-based filtering as ratified at commit 01d1df0 ("Add ability + to manually trigger workflow. (#2)") of riscv-count-overflow. + + - const: sstc + description: | + The standard Sstc supervisor-level extension for time compare as + ratified at commit 3f9ed34 ("Add ability to manually trigger + workflow. (#2)") of riscv-time-compare. + + - const: svinval + description: + The standard Svinval supervisor-level extension for fine-grained + address-translation cache invalidation as ratified in the 20191213 + version of the privileged ISA specification. + + - const: svnapot + description: + The standard Svnapot supervisor-level extensions for napot + translation contiguity as ratified in the 20191213 version of the + privileged ISA specification. + + - const: svpbmt + description: + The standard Svpbmt supervisor-level extensions for page-based + memory types as ratified in the 20191213 version of the privileged + ISA specification. + + - const: zba + description: | + The standard Zba bit-manipulation extension for address generation + acceleration instructions as ratified at commit 6d33919 ("Merge pull + request #158 from hirooih/clmul-fix-loop-end-condition") of + riscv-bitmanip. + + - const: zbb + description: | + The standard Zbb bit-manipulation extension for basic bit-manipulation + as ratified at commit 6d33919 ("Merge pull request #158 from + hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zbc + description: | + The standard Zbc bit-manipulation extension for carry-less + multiplication as ratified at commit 6d33919 ("Merge pull request + #158 from hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zbs + description: | + The standard Zbs bit-manipulation extension for single-bit + instructions as ratified at commit 6d33919 ("Merge pull request #158 + from hirooih/clmul-fix-loop-end-condition") of riscv-bitmanip. + + - const: zicbom + description: + The standard Zicbom extension for base cache management operations as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + + - const: zicbop + description: + The standard Zicbop extension for cache-block prefetch instructions + as ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of + riscv-CMOs. + + - const: zicboz + description: + The standard Zicboz extension for cache-block zeroing as ratified + in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + + - const: zicntr + description: + The standard Zicntr extension for base counters and timers, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: zicsr + description: | + The standard Zicsr extension for control and status register + instructions, as ratified in the 20191213 version of the + unprivileged ISA specification. + + This does not include Chapter 10, "Counters", which documents + special case read-only CSRs, that were moved into the Zicntr and + Zihpm extensions after the ratification of the 20191213 version of + the unprivileged specification. + + - const: zifencei + description: + The standard Zifencei extension for instruction-fetch fence, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: zihintpause + description: + The standard Zihintpause extension for pause hints, as ratified in + commit d8ab5c7 ("Zihintpause is ratified") of the riscv-isa-manual. + + - const: zihpm + description: + The standard Zihpm extension for hardware performance counters, as + ratified in the 20191213 version of the unprivileged ISA + specification. + + - const: ztso + description: + The standard Ztso extension for total store ordering, as ratified + in commit 2e5236 ("Ztso is now ratified.") of the + riscv-isa-manual. + +additionalProperties: true +... diff --git a/Documentation/process/6.Followthrough.rst b/Documentation/process/6.Followthrough.rst index a173cd5f93d2..66fa400c6d94 100644 --- a/Documentation/process/6.Followthrough.rst +++ b/Documentation/process/6.Followthrough.rst @@ -51,6 +51,13 @@ mind: working toward the creation of the best kernel they can; they are not trying to create discomfort for their employers' competitors. + - Be prepared for seemingly silly requests for coding style changes + and requests to factor out some of your code to shared parts of + the kernel. One job the maintainers do is to keep things looking + the same. Sometimes this means that the clever hack in your driver + to get around a problem actually needs to become a generalized + kernel feature ready for next time. + What all of this comes down to is that, when reviewers send you comments, you need to pay attention to the technical observations that they are making. Do not let their form of expression or your own pride keep that diff --git a/Documentation/riscv/acpi.rst b/Documentation/riscv/acpi.rst new file mode 100644 index 000000000000..9870a282815b --- /dev/null +++ b/Documentation/riscv/acpi.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +ACPI on RISC-V +============== + +The ISA string parsing rules for ACPI are defined by `Version ASCIIDOC +Conversion, 12/2022 of the RISC-V specifications, as defined by tag +"riscv-isa-release-1239329-2023-05-23" (commit 1239329 +) <https://github.com/riscv/riscv-isa-manual/releases/tag/riscv-isa-release-1239329-2023-05-23>`_ diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst index 95cf9c1e1da1..81cf6e616476 100644 --- a/Documentation/riscv/index.rst +++ b/Documentation/riscv/index.rst @@ -5,6 +5,7 @@ RISC-V architecture .. toctree:: :maxdepth: 1 + acpi boot-image-header vm-layout hwprobe diff --git a/Documentation/riscv/vector.rst b/Documentation/riscv/vector.rst index 48f189d79e41..165b7ed0ac4f 100644 --- a/Documentation/riscv/vector.rst +++ b/Documentation/riscv/vector.rst @@ -130,3 +130,11 @@ processes in form of sysctl knob: Modifying the system default enablement status does not affect the enablement status of any existing process of thread that do not make an execve() call. + +3. Vector Register State Across System Calls +--------------------------------------------- + +As indicated by version 1.0 of the V extension [1], vector registers are +clobbered by system calls. + +1: https://github.com/riscv/riscv-v-spec/blob/master/calling-convention.adoc diff --git a/Documentation/translations/zh_CN/process/2.Process.rst b/Documentation/translations/zh_CN/process/2.Process.rst index 4a6ed0219494..e68c9de0f7f8 100644 --- a/Documentation/translations/zh_CN/process/2.Process.rst +++ b/Documentation/translations/zh_CN/process/2.Process.rst @@ -358,7 +358,7 @@ Andrew Morton 为有抱负的内核开发人员提供了如下建议 机器上始终完美运行”。通常的方法是和其他人一起解决问题(这可能需 要坚持!),但就是如此——这是内核开发的一部分。 -(http://lwn.net/articles/283982/) +(http://lwn.net/Articles/283982/) 在没有明显问题需要解决的情况下,通常建议开发人员查看当前的回归和开放缺陷 列表。从来都不缺少需要解决的问题;通过解决这些问题,开发人员将从该过程获得 diff --git a/Documentation/translations/zh_CN/process/3.Early-stage.rst b/Documentation/translations/zh_CN/process/3.Early-stage.rst index de53dd12e911..2caba4753b75 100644 --- a/Documentation/translations/zh_CN/process/3.Early-stage.rst +++ b/Documentation/translations/zh_CN/process/3.Early-stage.rst @@ -44,7 +44,7 @@ 试图向这些人传达用户需求是浪费时间。他们太“聪明”了,根本听不到少数 人的话。 -(http://lwn.net/articles/131776/) +(http://lwn.net/Articles/131776/) 实际情况却是不同的;与特定模块相比,内核开发人员更关心系统稳定性、长期维护 以及找到问题的正确解决方案。这个故事的寓意是把重点放在问题上——而不是具体的 diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst index 94f7f866f103..7cac9424f5d5 100644 --- a/Documentation/translations/zh_CN/process/4.Coding.rst +++ b/Documentation/translations/zh_CN/process/4.Coding.rst @@ -149,7 +149,7 @@ Linus对这个问题给出了最佳答案: 所以我们不会通过引入新问题来修复错误。这种方式是靠不住的,没人知道 是否真的有进展。是前进两步、后退一步,还是前进一步、后退两步? -(http://lwn.net/articles/243460/) +(http://lwn.net/Articles/243460/) 特别不受欢迎的一种回归类型是用户空间ABI的任何变化。一旦接口被导出到用户空间, 就必须无限期地支持它。这一事实使得用户空间接口的创建特别具有挑战性:因为它们 diff --git a/Documentation/translations/zh_CN/process/7.AdvancedTopics.rst b/Documentation/translations/zh_CN/process/7.AdvancedTopics.rst index 6d0dadae13b1..57beca02181c 100644 --- a/Documentation/translations/zh_CN/process/7.AdvancedTopics.rst +++ b/Documentation/translations/zh_CN/process/7.AdvancedTopics.rst @@ -98,7 +98,7 @@ Git提供了一些强大的工具,可以让您重写开发历史。一个不� 你可以给我发补丁,但当我从你那里拉取一个Git补丁时,我需要知道你清楚 自己在做什么,我需要能够相信事情而 *无需* 手动检查每个单独的更改。 -(http://lwn.net/articles/224135/)。 +(http://lwn.net/Articles/224135/)。 为了避免这种情况,请确保给定分支中的所有补丁都与相关主题紧密相关;“驱动程序 修复”分支不应更改核心内存管理代码。而且,最重要的是,不要使用Git树来绕过 diff --git a/Documentation/translations/zh_TW/process/2.Process.rst b/Documentation/translations/zh_TW/process/2.Process.rst index b01cdd3a39ae..9d465df1f6c3 100644 --- a/Documentation/translations/zh_TW/process/2.Process.rst +++ b/Documentation/translations/zh_TW/process/2.Process.rst @@ -361,7 +361,7 @@ Andrew Morton 爲有抱負的內核開發人員提供了如下建議 機器上始終完美運行」。通常的方法是和其他人一起解決問題(這可能需 要堅持!),但就是如此——這是內核開發的一部分。 -(http://lwn.net/articles/283982/) +(http://lwn.net/Articles/283982/) 在沒有明顯問題需要解決的情況下,通常建議開發人員查看當前的回歸和開放缺陷 列表。從來都不缺少需要解決的問題;通過解決這些問題,開發人員將從該過程獲得 diff --git a/Documentation/translations/zh_TW/process/3.Early-stage.rst b/Documentation/translations/zh_TW/process/3.Early-stage.rst index ab2a45fd65a4..076873ca0905 100644 --- a/Documentation/translations/zh_TW/process/3.Early-stage.rst +++ b/Documentation/translations/zh_TW/process/3.Early-stage.rst @@ -47,7 +47,7 @@ 試圖向這些人傳達用戶需求是浪費時間。他們太「聰明」了,根本聽不到少數 人的話。 -(http://lwn.net/articles/131776/) +(http://lwn.net/Articles/131776/) 實際情況卻是不同的;與特定模塊相比,內核開發人員更關心系統穩定性、長期維護 以及找到問題的正確解決方案。這個故事的寓意是把重點放在問題上——而不是具體的 diff --git a/Documentation/translations/zh_TW/process/4.Coding.rst b/Documentation/translations/zh_TW/process/4.Coding.rst index ccc3946227a0..7fc0344ed16b 100644 --- a/Documentation/translations/zh_TW/process/4.Coding.rst +++ b/Documentation/translations/zh_TW/process/4.Coding.rst @@ -152,7 +152,7 @@ Linus對這個問題給出了最佳答案: 所以我們不會通過引入新問題來修復錯誤。這種方式是靠不住的,沒人知道 是否真的有進展。是前進兩步、後退一步,還是前進一步、後退兩步? -(http://lwn.net/articles/243460/) +(http://lwn.net/Articles/243460/) 特別不受歡迎的一種回歸類型是用戶空間ABI的任何變化。一旦接口被導出到用戶空間, 就必須無限期地支持它。這一事實使得用戶空間接口的創建特別具有挑戰性:因爲它們 diff --git a/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst b/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst index 3de093d0f170..4fbc104a37ca 100644 --- a/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst +++ b/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst @@ -101,7 +101,7 @@ Git提供了一些強大的工具,可以讓您重寫開發歷史。一個不� 你可以給我發補丁,但當我從你那裡拉取一個Git補丁時,我需要知道你清楚 自己在做什麼,我需要能夠相信事情而 *無需* 手動檢查每個單獨的更改。 -(http://lwn.net/articles/224135/)。 +(http://lwn.net/Articles/224135/)。 爲了避免這種情況,請確保給定分支中的所有補丁都與相關主題緊密相關;「驅動程序 修復」分支不應更改核心內存管理代碼。而且,最重要的是,不要使用Git樹來繞過 diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 487b6328b3e7..995780088eb2 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -57,7 +57,7 @@ information, see the SEV Key Management spec [api-spec]_ The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled -and ``ENOTTY` if it is disabled (on some older versions of Linux, +and ``ENOTTY`` if it is disabled (on some older versions of Linux, the ioctl runs normally even with a NULL argument, and therefore will likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd:: diff --git a/MAINTAINERS b/MAINTAINERS index b3648da1c5b2..64f10559d3b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,81 +1,5 @@ -List of maintainers and how to submit kernel changes -==================================================== - -Please try to follow the guidelines below. This will make things -easier on the maintainers. Not all of these guidelines matter for every -trivial patch so apply some common sense. - -Tips for patch submitters -------------------------- - -1. Always *test* your changes, however small, on at least 4 or - 5 people, preferably many more. - -2. Try to release a few ALPHA test versions to the net. Announce - them onto the kernel channel and await results. This is especially - important for device drivers, because often that's the only way - you will find things like the fact version 3 firmware needs - a magic fix you didn't know about, or some clown changed the - chips on a board and not its name. (Don't laugh! Look at the - SMC etherpower for that.) - -3. Make sure your changes compile correctly in multiple - configurations. In particular check that changes work both as a - module and built into the kernel. - -4. When you are happy with a change make it generally available for - testing and await feedback. - -5. Make a patch available to the relevant maintainer in the list. Use - ``diff -u`` to make the patch easy to merge. Be prepared to get your - changes sent back with seemingly silly requests about formatting - and variable names. These aren't as silly as they seem. One - job the maintainers (and especially Linus) do is to keep things - looking the same. Sometimes this means that the clever hack in - your driver to get around a problem actually needs to become a - generalized kernel feature ready for next time. - - PLEASE check your patch with the automated style checker - (scripts/checkpatch.pl) to catch trivial style violations. - See Documentation/process/coding-style.rst for guidance here. - - PLEASE CC: the maintainers and mailing lists that are generated - by ``scripts/get_maintainer.pl.`` The results returned by the - script will be best if you have git installed and are making - your changes in a branch derived from Linus' latest git tree. - See Documentation/process/submitting-patches.rst for details. - - PLEASE try to include any credit lines you want added with the - patch. It avoids people being missed off by mistake and makes - it easier to know who wants adding and who doesn't. - - PLEASE document known bugs. If it doesn't work for everything - or does something very odd once a month document it. - - PLEASE remember that submissions must be made under the terms - of the Linux Foundation certificate of contribution and should - include a Signed-off-by: line. The current version of this - "Developer's Certificate of Origin" (DCO) is listed in the file - Documentation/process/submitting-patches.rst. - -6. Make sure you have the right to send any changes you make. If you - do changes at work you may find your employer owns the patch - not you. - -7. When sending security related changes or reports to a maintainer - please Cc: [email protected], especially if the maintainer - does not respond. Please keep in mind that the security team is - a small set of people who can be efficient only when working on - verified bugs. Please only Cc: this list when you have identified - that the bug would present a short-term risk to other users if it - were publicly disclosed. For example, reports of address leaks do - not represent an immediate threat and are better handled publicly, - and ideally, should come with a patch proposal. Please do not send - automated reports to this list either. Such bugs will be handled - better and faster in the usual public places. See - Documentation/process/security-bugs.rst for details. - -8. Happy hacking. +List of maintainers +=================== Descriptions of section entries and preferred order --------------------------------------------------- @@ -3575,18 +3499,24 @@ M: Yury Norov <[email protected]> R: Andy Shevchenko <[email protected]> R: Rasmus Villemoes <[email protected]> S: Maintained +F: include/linux/bitfield.h F: include/linux/bitmap.h +F: include/linux/bits.h F: include/linux/cpumask.h F: include/linux/find.h F: include/linux/nodemask.h +F: include/vdso/bits.h F: lib/bitmap.c F: lib/cpumask.c F: lib/cpumask_kunit.c F: lib/find_bit.c F: lib/find_bit_benchmark.c F: lib/test_bitmap.c +F: tools/include/linux/bitfield.h F: tools/include/linux/bitmap.h +F: tools/include/linux/bits.h F: tools/include/linux/find.h +F: tools/include/vdso/bits.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 757d01a68ffd..5ff1942b04fc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -213,9 +213,9 @@ int main(void) DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7])); DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp)); DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs)); +#endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); #endif -#endif return 0; } diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 6aaf8dc60610..2a54fadbeaf5 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -240,7 +240,7 @@ config PPC_EARLY_DEBUG_40x config PPC_EARLY_DEBUG_CPM bool "Early serial debugging for Freescale CPM-based serial ports" - depends on SERIAL_CPM + depends on SERIAL_CPM=y help Select this to enable early debugging for Freescale chips using a CPM-based serial port. This assumes that the bootwrapper diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index 6612160c19d5..dff1ea074d9d 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -476,12 +476,12 @@ * channel 1 (but only USB 2.0 subset) to USB 2.0 pins on mPCIe * slot 1 (CN5), channels 2 and 3 to connector P600. * - * P2020 PCIe Root Port uses 1MB of PCIe MEM and xHCI controller + * P2020 PCIe Root Port does not use PCIe MEM and xHCI controller * uses 64kB + 8kB of PCIe MEM. No PCIe IO is used or required. - * So allocate 2MB of PCIe MEM for this PCIe bus. + * So allocate 128kB of PCIe MEM for this PCIe bus. */ reg = <0 0xffe08000 0 0x1000>; - ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00200000>, /* MEM */ + ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00020000>, /* MEM */ <0x01000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; /* IO */ pcie@0 { diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index cd632ba9ebff..0161226d8fec 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -24,6 +24,7 @@ #include <linux/stringify.h> #include <asm/machdep.h> +#include <asm/nmi.h> #include <asm/rtas.h> #include "pseries.h" #include "vas.h" /* vas_migration_handler() */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b49793cf34eb..4c07b9189c86 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -100,11 +100,18 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION @@ -114,7 +121,8 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL - select HAVE_RETHOOK if !XIP_KERNEL + # https://github.com/ClangBuiltLinux/linux/issues/1881 + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI @@ -123,6 +131,7 @@ config RISCV select HAVE_PERF_USER_STACK_DUMP select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -148,12 +157,6 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION config CLANG_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_CLANG @@ -872,6 +875,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION +config ARCH_SUSPEND_POSSIBLE + def_bool y + endmenu # "Power management options" menu "CPU Power Management" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index c259dc925ec1..be84b14f0118 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -45,8 +45,11 @@ static bool errata_probe_cmo(unsigned int stage, if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return false; - riscv_cbom_block_size = L1_CACHE_BYTES; - riscv_noncoherent_supported(); + if (stage == RISCV_ALTERNATIVES_BOOT) { + riscv_cbom_block_size = L1_CACHE_BYTES; + riscv_noncoherent_supported(); + } + return true; } diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 04c0b07bf6cd..3d78930cab51 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -33,6 +33,11 @@ static inline void __riscv_v_vstate_clean(struct pt_regs *regs) regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; } +static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; +} + static inline void riscv_v_vstate_off(struct pt_regs *regs) { regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; @@ -128,6 +133,34 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ riscv_v_disable(); } +static inline void __riscv_v_vstate_discard(void) +{ + unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); + + riscv_v_enable(); + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vmv.v.i v0, -1\n\t" + "vmv.v.i v8, -1\n\t" + "vmv.v.i v16, -1\n\t" + "vmv.v.i v24, -1\n\t" + "vsetvl %0, x0, %1\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (vtype_inval) : "memory"); + riscv_v_disable(); +} + +static inline void riscv_v_vstate_discard(struct pt_regs *regs) +{ + if ((regs->status & SR_VS) == SR_VS_OFF) + return; + + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); +} + static inline void riscv_v_vstate_save(struct task_struct *task, struct pt_regs *regs) { @@ -173,6 +206,7 @@ static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return fals static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) +#define riscv_v_vstate_discard(regs) do {} while (0) #define riscv_v_vstate_save(task, regs) do {} while (0) #define riscv_v_vstate_restore(task, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index 8b8a8541673a..8c8712aa9551 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -15,6 +15,8 @@ /* The size of END signal context header. */ #define END_HDR_SIZE 0x0 +#ifndef __ASSEMBLY__ + struct __sc_riscv_v_state { struct __riscv_v_ext_state v_state; } __attribute__((aligned(16))); @@ -33,4 +35,6 @@ struct sigcontext { }; }; +#endif /*!__ASSEMBLY__*/ + #endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */ diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 23e533766a49..85bbce0f758c 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -58,7 +58,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) if (cpuid_to_hartid_map(i) == hartid) return i; - pr_err("Couldn't find cpu id for hartid [%lu]\n", hartid); return -ENOENT; } diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index bb0b76e1a6d4..f4d6acb38dd0 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -238,10 +238,11 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; - riscv_ipi_enable(); - store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); + + riscv_ipi_enable(); + numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); probe_vendor_features(curr_cpuid); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5158961ea977..f910dfccbf5d 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -150,12 +150,18 @@ DO_ERROR_INFO(do_trap_insn_fault, asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs) { + bool handled; + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); local_irq_enable(); - if (!riscv_v_first_use_handler(regs)) + handled = riscv_v_first_use_handler(regs); + + local_irq_disable(); + + if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); @@ -296,6 +302,8 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + riscv_v_vstate_discard(regs); + syscall = syscall_enter_from_user_mode(regs, syscall); if (syscall < NR_syscalls) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 9a68e7eaae4d..2cf76218a5bd 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -15,6 +15,7 @@ #include <asm/vdso.h> #include <linux/time_namespace.h> #include <vdso/datapage.h> +#include <vdso/vsyscall.h> enum vvar_pages { VVAR_DATA_PAGE_OFFSET, diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index f9c8e19ab301..8d92fb6c522c 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -167,6 +167,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) return true; } riscv_v_vstate_on(regs); + riscv_v_vstate_restore(current, regs); return true; } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index eab9edc3b631..50767647fbc6 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -98,12 +98,6 @@ SECTIONS __soc_builtin_dtb_table_end = .; } - . = ALIGN(8); - .alternative : { - __alt_start = .; - *(.alternative) - __alt_end = .; - } __init_end = .; . = ALIGN(16); diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index e5f9f4677bbf..492dd4b8f3d6 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -85,11 +85,11 @@ SECTIONS INIT_DATA_SECTION(16) .init.pi : { - *(.init.pi*) + KEEP(*(.init.pi*)) } .init.bss : { - *(.init.bss) /* from the EFI stub */ + KEEP(*(.init.bss*)) /* from the EFI stub */ } .exit.data : { @@ -112,7 +112,7 @@ SECTIONS . = ALIGN(8); .alternative : { __alt_start = .; - *(.alternative) + KEEP(*(.alternative)) __alt_end = .; } __init_end = .; diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fca532ddf3ec..fbc59b3f69f2 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -104,9 +104,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size); unsigned int riscv_cboz_block_size; EXPORT_SYMBOL_GPL(riscv_cboz_block_size); -static void cbo_get_block_size(struct device_node *node, - const char *name, u32 *block_size, - unsigned long *first_hartid) +static void __init cbo_get_block_size(struct device_node *node, + const char *name, u32 *block_size, + unsigned long *first_hartid) { unsigned long hartid; u32 val; @@ -126,7 +126,7 @@ static void cbo_get_block_size(struct device_node *node, } } -void riscv_init_cbo_blocksizes(void) +void __init riscv_init_cbo_blocksizes(void) { unsigned long cbom_hartid, cboz_hartid; u32 cbom_block_size = 0, cboz_block_size = 0; diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index d919efab6eba..d51a75864e53 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -10,7 +10,7 @@ #include <linux/mm.h> #include <asm/cacheflush.h> -static bool noncoherent_supported; +static bool noncoherent_supported __ro_after_init; void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 542883b3b49b..96225a8533ad 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -73,7 +73,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } out: - WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + if (pte) { + pte_t pteval = ptep_get_lockless(pte); + + WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval)); + } return pte; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4b95d8999120..70fb31960b63 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -267,7 +267,6 @@ static void __init setup_bootmem(void) dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - memblock_allow_resize(); } #ifdef CONFIG_MMU @@ -1370,6 +1369,9 @@ void __init paging_init(void) { setup_bootmem(); setup_vm_final(); + + /* Depend on that Linear Mapping is ready */ + memblock_allow_resize(); } void __init misc_mem_init(void) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ed646c583e4f..5ed242897b0d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,6 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbac KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector +KBUILD_CFLAGS_DECOMPRESSOR += -fPIE KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index b07b0610950e..bbefe5e86bdf 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -26,10 +26,10 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/uaccess.h> +#include <linux/io.h> #include <asm/appldata.h> #include <asm/vtimer.h> -#include <linux/uaccess.h> -#include <asm/io.h> #include <asm/smp.h> #include "appldata.h" diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 21c3147bd92a..fc608f9b79ab 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -15,7 +15,7 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/slab.h> -#include <asm/io.h> +#include <linux/io.h> #include "appldata.h" diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 3f79b9efb803..637c29c3f6e3 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -67,7 +67,7 @@ ipl_start: jz .Lagain1 # skip dataset header larl %r13,.L_eof clc 0(3,%r4),0(%r13) # if it is EOFx - jz .Lagain1 # skip dateset trailer + jz .Lagain1 # skip data set trailer lgr %r5,%r2 la %r6,COMMAND_LINE-PARMAREA(%r12) lgr %r7,%r2 @@ -185,19 +185,19 @@ ipl_start: larl %r13,.Lcrash lpsw 0(%r13) - .align 8 + .balign 8 .Lwaitpsw: .quad 0x0202000180000000,.Lioint .Lnewpswmask: .quad 0x0000000180000000 - .align 8 + .balign 8 .Lorb: .long 0x00000000,0x0080ff00,.Lccws .Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - .align 8 + .balign 8 .Lcr6: .quad 0x00000000ff000000 - .align 8 + .balign 8 .Lcrash:.long 0x000a0000,0x00000000 - .align 8 + .balign 8 .Lccws: .rept 19 .long 0x02600050,0x00000000 .endr @@ -207,7 +207,7 @@ ipl_start: .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ - .align 8 + .balign 8 .Lcpuid:.fill 8,1,0 # @@ -265,7 +265,7 @@ SYM_CODE_START_LOCAL(startup_normal) brasl %r14,startup_kernel SYM_CODE_END(startup_normal) - .align 8 + .balign 8 6: .long 0x7fffffff,0xffffffff .Lext_new_psw: .quad 0x0002000180000000,0x1b0 # disabled wait diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S index f015469e7db9..f7107c76258c 100644 --- a/arch/s390/boot/head_kdump.S +++ b/arch/s390/boot/head_kdump.S @@ -82,12 +82,12 @@ SYM_CODE_START_LOCAL(startup_kdump) # # Startup of kdump (relocated new kernel) # -.align 2 + .balign 2 startup_kdump_relocated: basr %r13,0 0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... SYM_CODE_END(startup_kdump) -.align 8 + .balign 8 .Lrestart_psw: .quad 0x0000000080000000,0x0000000000000000 + startup #else @@ -95,7 +95,7 @@ SYM_CODE_START_LOCAL(startup_kdump) larl %r13,startup_kdump_crash lpswe 0(%r13) SYM_CODE_END(startup_kdump) -.align 8 + .balign 8 startup_kdump_crash: .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash #endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index 6ea17628ea10..34ee47926891 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -48,7 +48,7 @@ * * Note that the constant definitions below are extended in order to compute * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. - * The righmost doubleword can be 0 to prevent contribution to the result or + * The rightmost doubleword can be 0 to prevent contribution to the result or * can be multiplied by 1 to perform an XOR without the need for a separate * VECTOR EXCLUSIVE OR instruction. * diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index d5d967166bac..40c2b82f083a 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -333,7 +333,7 @@ union ap_qact_ap_info { }; /** - * ap_qact(): Query AP combatibility type. + * ap_qact(): Query AP compatibility type. * @qid: The AP queue number * @apinfo: On input the info about the AP queue. On output the * alternate AP queue info provided by the qact function diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index c5bd9f4437e5..f2240392c708 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -8,8 +8,8 @@ #ifndef _ASM_S390_APPLDATA_H #define _ASM_S390_APPLDATA_H +#include <linux/io.h> #include <asm/diag.h> -#include <asm/io.h> #define APPLDATA_START_INTERVAL_REC 0x80 #define APPLDATA_STOP_REC 0x81 diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 55a02a153dfc..e6532477f126 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -25,7 +25,7 @@ #define __EX_TABLE(_section, _fault, _target, _type) \ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 4;) \ + stringify_in_c(.balign 4;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ stringify_in_c(.short (_type);) \ @@ -34,7 +34,7 @@ #define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 4;) \ + stringify_in_c(.balign 4;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ stringify_in_c(.short (_type);) \ diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h index dec1c4ce628c..c260adb25997 100644 --- a/arch/s390/include/asm/dma.h +++ b/arch/s390/include/asm/dma.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_DMA_H #define _ASM_S390_DMA_H -#include <asm/io.h> +#include <linux/io.h> /* * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 8aa1f6530a3e..69ccc464a430 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -118,8 +118,8 @@ struct lowcore { __u64 avg_steal_timer; /* 0x0300 */ __u64 last_update_timer; /* 0x0308 */ __u64 last_update_clock; /* 0x0310 */ - __u64 int_clock; /* 0x0318*/ - __u64 mcck_clock; /* 0x0320 */ + __u64 int_clock; /* 0x0318 */ + __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ __u64 clock_comparator; /* 0x0328 */ __u64 boot_clock[2]; /* 0x0330 */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 8a2a3b5d1e29..a9c138fcd2ad 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -19,7 +19,7 @@ #define PAGE_SHIFT _PAGE_SHIFT #define PAGE_SIZE _PAGE_SIZE #define PAGE_MASK _PAGE_MASK -#define PAGE_DEFAULT_ACC 0 +#define PAGE_DEFAULT_ACC _AC(0, UL) /* storage-protection override */ #define PAGE_SPO_ACC 9 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) @@ -179,8 +179,6 @@ int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE #endif -#endif /* !__ASSEMBLY__ */ - #define __PAGE_OFFSET 0x0UL #define PAGE_OFFSET 0x0UL @@ -204,6 +202,8 @@ int arch_make_page_accessible(struct page *page); #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC +#endif /* !__ASSEMBLY__ */ + #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index bfb8c3cb8aee..d28bf8fb2799 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -23,7 +23,31 @@ #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) -#ifndef __ASSEMBLY__ +#define PSW32_MASK_PER _AC(0x40000000, UL) +#define PSW32_MASK_DAT _AC(0x04000000, UL) +#define PSW32_MASK_IO _AC(0x02000000, UL) +#define PSW32_MASK_EXT _AC(0x01000000, UL) +#define PSW32_MASK_KEY _AC(0x00F00000, UL) +#define PSW32_MASK_BASE _AC(0x00080000, UL) /* Always one */ +#define PSW32_MASK_MCHECK _AC(0x00040000, UL) +#define PSW32_MASK_WAIT _AC(0x00020000, UL) +#define PSW32_MASK_PSTATE _AC(0x00010000, UL) +#define PSW32_MASK_ASC _AC(0x0000C000, UL) +#define PSW32_MASK_CC _AC(0x00003000, UL) +#define PSW32_MASK_PM _AC(0x00000f00, UL) +#define PSW32_MASK_RI _AC(0x00000080, UL) + +#define PSW32_ADDR_AMODE _AC(0x80000000, UL) +#define PSW32_ADDR_INSN _AC(0x7FFFFFFF, UL) + +#define PSW32_DEFAULT_KEY ((PAGE_DEFAULT_ACC) << 20) + +#define PSW32_ASC_PRIMARY _AC(0x00000000, UL) +#define PSW32_ASC_ACCREG _AC(0x00004000, UL) +#define PSW32_ASC_SECONDARY _AC(0x00008000, UL) +#define PSW32_ASC_HOME _AC(0x0000C000, UL) + +#define PSW_DEFAULT_KEY ((PAGE_DEFAULT_ACC) << 52) #define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \ PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT) @@ -31,6 +55,8 @@ PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) +#ifndef __ASSEMBLY__ + struct psw_bits { unsigned long : 1; unsigned long per : 1; /* PER-Mask */ @@ -71,30 +97,6 @@ enum { &(*(struct psw_bits *)(&(__psw))); \ })) -#define PSW32_MASK_PER 0x40000000UL -#define PSW32_MASK_DAT 0x04000000UL -#define PSW32_MASK_IO 0x02000000UL -#define PSW32_MASK_EXT 0x01000000UL -#define PSW32_MASK_KEY 0x00F00000UL -#define PSW32_MASK_BASE 0x00080000UL /* Always one */ -#define PSW32_MASK_MCHECK 0x00040000UL -#define PSW32_MASK_WAIT 0x00020000UL -#define PSW32_MASK_PSTATE 0x00010000UL -#define PSW32_MASK_ASC 0x0000C000UL -#define PSW32_MASK_CC 0x00003000UL -#define PSW32_MASK_PM 0x00000f00UL -#define PSW32_MASK_RI 0x00000080UL - -#define PSW32_ADDR_AMODE 0x80000000UL -#define PSW32_ADDR_INSN 0x7FFFFFFFUL - -#define PSW32_DEFAULT_KEY (((u32)PAGE_DEFAULT_ACC) << 20) - -#define PSW32_ASC_PRIMARY 0x00000000UL -#define PSW32_ASC_ACCREG 0x00004000UL -#define PSW32_ASC_SECONDARY 0x00008000UL -#define PSW32_ASC_HOME 0x0000C000UL - typedef struct { unsigned int mask; unsigned int addr; diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h index ecbe94941403..115434ab98fb 100644 --- a/arch/s390/include/uapi/asm/cmb.h +++ b/arch/s390/include/uapi/asm/cmb.h @@ -31,7 +31,7 @@ struct cmbdata { __u64 size; __u64 elapsed_time; - /* basic and exended format: */ + /* basic and extended format: */ __u64 ssch_rsch_count; __u64 sample_count; __u64 device_connect_time; diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 9c49c3d67cd5..b11d98800458 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -24,7 +24,7 @@ /* * struct dasd_information2_t * represents any data about the device, which is visible to userspace. - * including foramt and featueres. + * including format and featueres. */ typedef struct dasd_information2_t { unsigned int devno; /* S/390 devno */ diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index f7bae1c63bd6..5faf0a1d2c16 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -353,7 +353,7 @@ struct pkey_kblob2pkey2 { * Is able to find out which type of secure key is given (CCA AES secure * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private * key) and tries to find all matching crypto cards based on the MKVP and maybe - * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys + * other criteria (like CCA AES cipher keys need a CEX5C or higher, EP11 keys * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of * APQNs is further filtered by the key's mkvp which needs to match to either * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters @@ -370,7 +370,7 @@ struct pkey_kblob2pkey2 { * is empty (apqn_entries is 0) the apqn_entries field is updated to the number * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0 * but the number of apqn targets does not fit into the list, the apqn_targets - * field is updatedd with the number of reqired entries but there are no apqn + * field is updated with the number of required entries but there are no apqn * values stored in the list and the ioctl returns with ENOSPC. If no matching * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0. */ @@ -408,7 +408,7 @@ struct pkey_apqns4key { * is empty (apqn_entries is 0) the apqn_entries field is updated to the number * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0 * but the number of apqn targets does not fit into the list, the apqn_targets - * field is updatedd with the number of reqired entries but there are no apqn + * field is updated with the number of required entries but there are no apqn * values stored in the list and the ioctl returns with ENOSPC. If no matching * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0. */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index ad64d673b5e6..f0fe3bcc78a8 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -166,6 +166,64 @@ #endif /* __s390x__ */ +#ifndef __s390x__ + +#define PSW_MASK_PER _AC(0x40000000, UL) +#define PSW_MASK_DAT _AC(0x04000000, UL) +#define PSW_MASK_IO _AC(0x02000000, UL) +#define PSW_MASK_EXT _AC(0x01000000, UL) +#define PSW_MASK_KEY _AC(0x00F00000, UL) +#define PSW_MASK_BASE _AC(0x00080000, UL) /* always one */ +#define PSW_MASK_MCHECK _AC(0x00040000, UL) +#define PSW_MASK_WAIT _AC(0x00020000, UL) +#define PSW_MASK_PSTATE _AC(0x00010000, UL) +#define PSW_MASK_ASC _AC(0x0000C000, UL) +#define PSW_MASK_CC _AC(0x00003000, UL) +#define PSW_MASK_PM _AC(0x00000F00, UL) +#define PSW_MASK_RI _AC(0x00000000, UL) +#define PSW_MASK_EA _AC(0x00000000, UL) +#define PSW_MASK_BA _AC(0x00000000, UL) + +#define PSW_MASK_USER _AC(0x0000FF00, UL) + +#define PSW_ADDR_AMODE _AC(0x80000000, UL) +#define PSW_ADDR_INSN _AC(0x7FFFFFFF, UL) + +#define PSW_ASC_PRIMARY _AC(0x00000000, UL) +#define PSW_ASC_ACCREG _AC(0x00004000, UL) +#define PSW_ASC_SECONDARY _AC(0x00008000, UL) +#define PSW_ASC_HOME _AC(0x0000C000, UL) + +#else /* __s390x__ */ + +#define PSW_MASK_PER _AC(0x4000000000000000, UL) +#define PSW_MASK_DAT _AC(0x0400000000000000, UL) +#define PSW_MASK_IO _AC(0x0200000000000000, UL) +#define PSW_MASK_EXT _AC(0x0100000000000000, UL) +#define PSW_MASK_BASE _AC(0x0000000000000000, UL) +#define PSW_MASK_KEY _AC(0x00F0000000000000, UL) +#define PSW_MASK_MCHECK _AC(0x0004000000000000, UL) +#define PSW_MASK_WAIT _AC(0x0002000000000000, UL) +#define PSW_MASK_PSTATE _AC(0x0001000000000000, UL) +#define PSW_MASK_ASC _AC(0x0000C00000000000, UL) +#define PSW_MASK_CC _AC(0x0000300000000000, UL) +#define PSW_MASK_PM _AC(0x00000F0000000000, UL) +#define PSW_MASK_RI _AC(0x0000008000000000, UL) +#define PSW_MASK_EA _AC(0x0000000100000000, UL) +#define PSW_MASK_BA _AC(0x0000000080000000, UL) + +#define PSW_MASK_USER _AC(0x0000FF0180000000, UL) + +#define PSW_ADDR_AMODE _AC(0x0000000000000000, UL) +#define PSW_ADDR_INSN _AC(0xFFFFFFFFFFFFFFFF, UL) + +#define PSW_ASC_PRIMARY _AC(0x0000000000000000, UL) +#define PSW_ASC_ACCREG _AC(0x0000400000000000, UL) +#define PSW_ASC_SECONDARY _AC(0x0000800000000000, UL) +#define PSW_ASC_HOME _AC(0x0000C00000000000, UL) + +#endif /* __s390x__ */ + #define NUM_GPRS 16 #define NUM_FPRS 16 #define NUM_CRS 16 @@ -214,69 +272,6 @@ typedef struct { unsigned long addr; } __attribute__ ((aligned(8))) psw_t; -#ifndef __s390x__ - -#define PSW_MASK_PER 0x40000000UL -#define PSW_MASK_DAT 0x04000000UL -#define PSW_MASK_IO 0x02000000UL -#define PSW_MASK_EXT 0x01000000UL -#define PSW_MASK_KEY 0x00F00000UL -#define PSW_MASK_BASE 0x00080000UL /* always one */ -#define PSW_MASK_MCHECK 0x00040000UL -#define PSW_MASK_WAIT 0x00020000UL -#define PSW_MASK_PSTATE 0x00010000UL -#define PSW_MASK_ASC 0x0000C000UL -#define PSW_MASK_CC 0x00003000UL -#define PSW_MASK_PM 0x00000F00UL -#define PSW_MASK_RI 0x00000000UL -#define PSW_MASK_EA 0x00000000UL -#define PSW_MASK_BA 0x00000000UL - -#define PSW_MASK_USER 0x0000FF00UL - -#define PSW_ADDR_AMODE 0x80000000UL -#define PSW_ADDR_INSN 0x7FFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) - -#define PSW_ASC_PRIMARY 0x00000000UL -#define PSW_ASC_ACCREG 0x00004000UL -#define PSW_ASC_SECONDARY 0x00008000UL -#define PSW_ASC_HOME 0x0000C000UL - -#else /* __s390x__ */ - -#define PSW_MASK_PER 0x4000000000000000UL -#define PSW_MASK_DAT 0x0400000000000000UL -#define PSW_MASK_IO 0x0200000000000000UL -#define PSW_MASK_EXT 0x0100000000000000UL -#define PSW_MASK_BASE 0x0000000000000000UL -#define PSW_MASK_KEY 0x00F0000000000000UL -#define PSW_MASK_MCHECK 0x0004000000000000UL -#define PSW_MASK_WAIT 0x0002000000000000UL -#define PSW_MASK_PSTATE 0x0001000000000000UL -#define PSW_MASK_ASC 0x0000C00000000000UL -#define PSW_MASK_CC 0x0000300000000000UL -#define PSW_MASK_PM 0x00000F0000000000UL -#define PSW_MASK_RI 0x0000008000000000UL -#define PSW_MASK_EA 0x0000000100000000UL -#define PSW_MASK_BA 0x0000000080000000UL - -#define PSW_MASK_USER 0x0000FF0180000000UL - -#define PSW_ADDR_AMODE 0x0000000000000000UL -#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) - -#define PSW_ASC_PRIMARY 0x0000000000000000UL -#define PSW_ASC_ACCREG 0x0000400000000000UL -#define PSW_ASC_SECONDARY 0x0000800000000000UL -#define PSW_ASC_HOME 0x0000C00000000000UL - -#endif /* __s390x__ */ - - /* * The s390_regs structure is used to define the elf_gregset_t. */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 3f8e760298c2..81cf72088041 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 72e106cfd8c7..b210a29d3ee9 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -16,10 +16,10 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/mm.h> +#include <linux/io.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/io.h> static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 90bbb4ea1d08..89dc826a8d2e 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -24,8 +24,8 @@ #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/io.h> #include <asm/dis.h> -#include <asm/io.h> #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/debug.h> @@ -516,7 +516,7 @@ void show_code(struct pt_regs *regs) if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } - /* Code snapshot useable ? */ + /* Code snapshot usable ? */ if ((regs->psw.addr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); return; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index e5b6c1369e8e..a660f4b6d654 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -136,7 +136,7 @@ _LPP_OFFSET = __LC_LPP clgfrl %r14,.Lrange_size\@ jhe \outside_label .section .rodata, "a" - .align 4 + .balign 4 .Lrange_size\@: .long \end - \start .previous @@ -488,7 +488,6 @@ SYM_FUNC_END(psw_idle) * Machine check handler routines */ SYM_CODE_START(mcck_int_handler) - stckf __LC_MCCK_CLOCK BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer @@ -598,8 +597,9 @@ SYM_CODE_START(restart_int_handler) TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f lctlg %c0,%c15,__LC_CREGS_SAVE_AREA -0: larl %r15,stosm_tmp - stosm 0(%r15),0x04 # turn dat on, keep irqs off +0: larl %r15,daton_psw + lpswe 0(%r15) # turn dat on, keep irqs off +.Ldaton: lg %r15,__LC_RESTART_STACK xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -646,7 +646,11 @@ SYM_CODE_END(stack_overflow) .balign 4 SYM_DATA_LOCAL(stop_lock, .long 0) SYM_DATA_LOCAL(this_cpu, .short 0) -SYM_DATA_LOCAL(stosm_tmp, .byte 0) + .balign 8 +SYM_DATA_START_LOCAL(daton_psw) + .quad PSW_KERNEL_BITS + .quad .Ldaton +SYM_DATA_END(daton_psw) .section .rodata, "a" #define SYSCALL(esame,emu) .quad __s390x_ ## esame diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index df77ba102096..45413b04efc5 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -36,5 +36,5 @@ SYM_CODE_START(startup_continue) lpswe dw_psw-.(%r13) # load disabled wait psw SYM_CODE_END(startup_continue) - .align 16 + .balign 16 SYM_DATA_LOCAL(dw_psw, .quad 0x0002000180000000,0x0000000000000000) diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S index b6335296dcd8..0fe4d725e98b 100644 --- a/arch/s390/kernel/kprobes_insn_page.S +++ b/arch/s390/kernel/kprobes_insn_page.S @@ -13,7 +13,7 @@ * would be in the data section instead. */ .section .kprobes.text, "ax" - .align 4096 + .balign 4096 SYM_CODE_START(kprobes_insn_page) .rept 2048 .word 0x07fe diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 717bbcc056e5..d1b16d83e49a 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -14,7 +14,7 @@ static int __init nobp_setup_early(char *str) return rc; if (enabled && test_facility(82)) { /* - * The user explicitely requested nobp=1, enable it and + * The user explicitly requested nobp=1, enable it and * disable the expoline support. */ __set_facility(82, alt_stfle_fac_list); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 90679143534b..850c11ea631a 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -172,9 +172,9 @@ static void cpum_cf_free_root(void) cpu_cf_root.cfptr = NULL; irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); on_each_cpu(cpum_cf_reset_cpu, NULL, 1); - debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n", + debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n", __func__, refcount_read(&cpu_cf_root.refcnt), - cpu_cf_root.cfptr); + !cpu_cf_root.cfptr); } /* @@ -975,10 +975,6 @@ static int cfdiag_push_sample(struct perf_event *event, } overflow = perf_event_overflow(event, &data, ®s); - debug_sprintf_event(cf_dbg, 3, - "%s event %#llx sample_type %#llx raw %d ov %d\n", - __func__, event->hw.config, - event->attr.sample_type, raw.size, overflow); if (overflow) event->pmu->stop(event, 0); @@ -1105,10 +1101,6 @@ static int cpum_cf_online_cpu(unsigned int cpu) { int rc = 0; - debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d " - "opencnt %d\n", __func__, cpu, - refcount_read(&cpu_cf_root.refcnt), - refcount_read(&cfset_opencnt)); /* * Ignore notification for perf_event_open(). * Handle only /dev/hwctr device sessions. @@ -1127,9 +1119,6 @@ static int cfset_offline_cpu(unsigned int cpu); static int cpum_cf_offline_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n", - __func__, cpu, refcount_read(&cpu_cf_root.refcnt), - refcount_read(&cfset_opencnt)); /* * During task exit processing of grouped perf events triggered by CPU * hotplug processing, pmu_disable() is called as part of perf context @@ -1337,8 +1326,6 @@ static void cfset_ioctl_off(void *parm) cpuhw->state, S390_HWCTR_DEVICE, rc); if (!cpuhw->dev_state) cpuhw->flags &= ~PMU_F_IN_USE; - debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", - __func__, rc, cpuhw->state, cpuhw->dev_state); } /* Start counter sets on particular CPU */ @@ -1360,8 +1347,6 @@ static void cfset_ioctl_on(void *parm) else pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); - debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", - __func__, rc, cpuhw->state, cpuhw->dev_state); } static void cfset_release_cpu(void *p) @@ -1369,8 +1354,6 @@ static void cfset_release_cpu(void *p) struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int rc; - debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", - __func__, cpuhw->state, cpuhw->dev_state); cpuhw->dev_state = 0; rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ if (rc) @@ -1459,7 +1442,6 @@ static int cfset_all_start(struct cfset_request *req) if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); rc = -EIO; - debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); } free_cpumask_var(mask); return rc; @@ -1516,8 +1498,6 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) if (put_user(cpus, &ctrset_read->no_cpus)) rc = -EFAULT; out: - debug_sprintf_event(cf_dbg, 4, "%s rc %d copied %ld\n", __func__, rc, - uptr - (void __user *)ctrset_read->data); return rc; } @@ -1565,8 +1545,6 @@ static void cfset_cpu_read(void *parm) cpuhw->used += space; cpuhw->sets += 1; } - debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, - cpuhw->sets, cpuhw->used); } } @@ -1661,8 +1639,6 @@ static long cfset_ioctl_start(unsigned long arg, struct file *file) if (!ret) { cfset_session_add(preq); file->private_data = preq; - debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n", - __func__, preq->ctrset, need, ret); } else { kfree(preq); } @@ -1761,8 +1737,6 @@ static int cfset_offline_cpu(unsigned int cpu) static void cfdiag_read(struct perf_event *event) { - debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, - event->attr.config, local64_read(&event->count)); } static int get_authctrsets(void) @@ -1807,8 +1781,6 @@ static int cfdiag_event_init2(struct perf_event *event) if (!event->hw.config_base) err = -EINVAL; - debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", - __func__, err, event->hw.config_base); return err; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 8ecfbce4ac92..06efad5b4f93 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -22,7 +22,7 @@ #include <asm/irq.h> #include <asm/debug.h> #include <asm/timex.h> -#include <asm-generic/io.h> +#include <linux/io.h> /* Minimum number of sample-data-block-tables: * At least one table is required for the sampling buffer structure. @@ -43,7 +43,7 @@ #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) static inline int require_table_link(const void *sdbt) { - return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; + return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; } /* Minimum and maximum sampling buffer sizes: @@ -192,7 +192,7 @@ static void free_sampling_buffer(struct sf_buffer *sfb) if (is_link_entry(curr)) { curr = get_next_sdbt(curr); if (sdbt) - free_page((unsigned long) sdbt); + free_page((unsigned long)sdbt); /* If the origin is reached, sampling buffer is freed */ if (curr == sfb->sdbt) @@ -278,7 +278,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, for (i = 0; i < num_sdb; i++) { /* Allocate a new SDB-table if it is full. */ if (require_table_link(tail)) { - new = (unsigned long *) get_zeroed_page(gfp_flags); + new = (unsigned long *)get_zeroed_page(gfp_flags); if (!new) { rc = -ENOMEM; break; @@ -304,7 +304,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, */ if (tail_prev) { sfb->num_sdbt--; - free_page((unsigned long) new); + free_page((unsigned long)new); tail = tail_prev; } break; @@ -343,7 +343,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) return -EINVAL; /* Allocate the sample-data-block-table origin */ - sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (!sfb->sdbt) return -ENOMEM; sfb->num_sdb = 0; @@ -594,11 +594,10 @@ static DEFINE_MUTEX(pmc_reserve_mutex); #define PMC_FAILURE 2 static void setup_pmc_cpu(void *flags) { - int err; struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); + int err = 0; - err = 0; - switch (*((int *) flags)) { + switch (*((int *)flags)) { case PMC_INIT: memset(cpusf, 0, sizeof(*cpusf)); err = qsi(&cpusf->qsi); @@ -606,22 +605,18 @@ static void setup_pmc_cpu(void *flags) break; cpusf->flags |= PMU_F_RESERVED; err = sf_disable(); - if (err) - pr_err("Switching off the sampling facility failed " - "with rc %i\n", err); break; case PMC_RELEASE: cpusf->flags &= ~PMU_F_RESERVED; err = sf_disable(); - if (err) { - pr_err("Switching off the sampling facility failed " - "with rc %i\n", err); - } else + if (!err) deallocate_buffers(cpusf); break; } - if (err) - *((int *) flags) |= PMC_FAILURE; + if (err) { + *((int *)flags) |= PMC_FAILURE; + pr_err("Switching off the sampling facility failed with rc %i\n", err); + } } static void release_pmc_hardware(void) @@ -963,10 +958,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) return -ENOENT; } - /* Check online status of the CPU to which the event is pinned */ - if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; - /* Force reset of idle/hv excludes regardless of what the * user requested. */ @@ -1026,8 +1017,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) err = lsctl(&cpuhw->lsctl); if (err) { cpuhw->flags &= ~PMU_F_ENABLED; - pr_err("Loading sampling controls failed: op %i err %i\n", - 1, err); + pr_err("Loading sampling controls failed: op 1 err %i\n", err); return; } @@ -1061,8 +1051,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) err = lsctl(&inactive); if (err) { - pr_err("Loading sampling controls failed: op %i err %i\n", - 2, err); + pr_err("Loading sampling controls failed: op 2 err %i\n", err); return; } @@ -1221,7 +1210,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, te = trailer_entry_ptr((unsigned long)sdbt); sample = (struct hws_basic_entry *)sdbt; - while ((unsigned long *) sample < (unsigned long *) te) { + while ((unsigned long *)sample < (unsigned long *)te) { /* Check for an empty sample */ if (!sample->def || sample->LS) break; @@ -1298,7 +1287,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) if (SAMPL_DIAG_MODE(&event->hw)) return; - sdbt = (unsigned long *) TEAR_REG(hwc); + sdbt = (unsigned long *)TEAR_REG(hwc); done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { /* Get the trailer entry of the sample-data-block */ @@ -1670,9 +1659,6 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", num_sdb); - debug_sprintf_event(sfdbg, 1, - "%s: AUX buffer used up\n", - __func__); break; } if (WARN_ON_ONCE(!aux)) @@ -1804,7 +1790,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, /* Allocate the first SDBT */ sfb->num_sdbt = 0; - sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (!sfb->sdbt) goto no_sdbt; aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt; @@ -1816,7 +1802,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, */ for (i = 0; i < nr_pages; i++, tail++) { if (require_table_link(tail)) { - new = (unsigned long *) get_zeroed_page(GFP_KERNEL); + new = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (!new) goto no_sdbt; aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; @@ -1865,7 +1851,7 @@ static void cpumsf_pmu_read(struct perf_event *event) /* Nothing to do ... updates are interrupt-driven */ } -/* Check if the new sampling period/freqeuncy is appropriate. +/* Check if the new sampling period/frequency is appropriate. * * Return non-zero on error and zero on passed checks. */ @@ -1973,8 +1959,8 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); if (!SAMPL_DIAG_MODE(&event->hw)) { cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt); - cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; - TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt; + TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt; } /* Ensure sampling functions are in the disabled state. If disabled, diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 3b4f384f77f7..c57c1a203256 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -84,7 +84,7 @@ static int paiext_root_alloc(void) /* The memory is already zeroed. */ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); if (!paiext_root.mapptr) { - /* Returing without refcnt adjustment is ok. The + /* Returning without refcnt adjustment is ok. The * error code is handled by paiext_alloc() which * decrements refcnt when an event can not be * created. @@ -190,7 +190,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } else { - /* Multiple invocation, check whats active. + /* Multiple invocation, check what is active. * Supported are multiple counter events or only one sampling * event concurrently at any one time. */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 87ca3a727604..258000417724 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -30,8 +30,8 @@ #include <linux/export.h> #include <linux/init_task.h> #include <linux/entry-common.h> +#include <linux/io.h> #include <asm/cpu_mf.h> -#include <asm/io.h> #include <asm/processor.h> #include <asm/vtimer.h> #include <asm/exec.h> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fe10da1a271e..00d76448319d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -529,7 +529,7 @@ static void __init setup_resources(void) res->start = start; /* * In memblock, end points to the first byte after the - * range while in resourses, end points to the last byte in + * range while in resources, end points to the last byte in * the range. */ res->end = end - 1; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 726de4f4df01..f9a2b755f510 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -113,7 +113,7 @@ early_param("smt", early_smt); /* * The smp_cpu_state_mutex must be held when changing the state or polarization - * member of a pcpu data structure within the pcpu_devices arreay. + * member of a pcpu data structure within the pcpu_devices array. */ DEFINE_MUTEX(smp_cpu_state_mutex); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 276278199c44..d34d3548c046 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -702,7 +702,7 @@ static void stp_work_fn(struct work_struct *work) if (!check_sync_clock()) /* - * There is a usable clock but the synchonization failed. + * There is a usable clock but the synchronization failed. * Retry after a second. */ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index bafd3147eb4e..23e868b79a6c 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -19,6 +19,7 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \ @@ -40,8 +41,11 @@ KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) + $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,ld) + $(call if_changed,vdso_and_check) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index a766d286e15f..fc1c6ff8178f 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -24,6 +24,7 @@ KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_64 += -m64 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64)) KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T @@ -44,9 +45,12 @@ KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check) + # link rule for the .so file, .lds has to be first $(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE - $(call if_changed,ld) + $(call if_changed,vdso_and_check) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 3eb85f254881..6d6bc19b37dc 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -478,7 +478,7 @@ struct trans_exc_code_bits { }; enum { - FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ + FSI_UNKNOWN = 0, /* Unknown whether fetch or store */ FSI_STORE = 1, /* Exception was due to store operation */ FSI_FETCH = 2 /* Exception was due to fetch operation */ }; @@ -625,7 +625,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * Returns: - zero on success; @gpa contains the resulting absolute address * - a negative value if guest access failed due to e.g. broken * guest mapping - * - a positve value if an access exception happened. In this case + * - a positive value if an access exception happened. In this case * the returned value is the program interruption code as defined * by the architecture */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2cda8d9d7c6e..954d39adf85c 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -630,7 +630,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - /* process PER, also if the instrution is processed in user space */ + /* process PER, also if the instruction is processed in user space */ if (vcpu->arch.sie_block->icptstatus & 0x02 && (!rc || rc == -EOPNOTSUPP)) per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 670019696464..d1e768bcfe1d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4161,7 +4161,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) vcpu->run->s.regs.fpc = 0; /* * Do not reset these registers in the protected case, as some of - * them are overlayed and they are not accessible in this case + * them are overlaid and they are not accessible in this case * anyway. */ if (!kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 7dab00f1e833..ffa7739c7a28 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -427,7 +427,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) /* - * Register device with the specified KVM. If interpetation facilities are + * Register device with the specified KVM. If interpretation facilities are * available, enable them and let userspace indicate whether or not they will * be used (specify SHM bit to disable). */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9f8a192bd750..dc4cfa8795c0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -13,7 +13,7 @@ #include <linux/errno.h> #include <linux/mm_types.h> #include <linux/pgtable.h> - +#include <linux/io.h> #include <asm/asm-offsets.h> #include <asm/facility.h> #include <asm/current.h> @@ -22,7 +22,6 @@ #include <asm/sysinfo.h> #include <asm/page-states.h> #include <asm/gmap.h> -#include <asm/io.h> #include <asm/ptrace.h> #include <asm/sclp.h> #include <asm/ap.h> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 3ce5f4351156..2f34c7c3c5ab 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -273,7 +273,7 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.header.rc, uvcb.header.rrc); WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); - /* Inteded memory leak on "impossible" error */ + /* Intended memory leak on "impossible" error */ if (!cc) kvm_s390_pv_dealloc_vm(kvm); return cc ? -EIO : 0; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index cb747bf6c798..d9696b530064 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -469,7 +469,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) * * This interception will occur at the source cpu when a source cpu sends an * external call to a target cpu and the target cpu has the WAIT bit set in - * its cpuflags. Interception will occurr after the interrupt indicator bits at + * its cpuflags. Interception will occur after the interrupt indicator bits at * the target cpu have been set. All error cases will lead to instruction * interception, therefore nothing is to be checked or prepared. */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 0333ee482eb8..61499293c2ac 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -504,7 +504,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->mso = new_mso; scb_s->prefix = new_prefix; - /* We have to definetly flush the tlb if this scb never ran */ + /* We have to definitely flush the tlb if this scb never ran */ if (scb_s->ihcpu != 0xffffU) scb_s->ihcpu = scb_o->ihcpu; @@ -901,7 +901,7 @@ static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr, (vaddr & 0xfffffffffffff000UL) | /* 52-53: store / fetch */ (((unsigned int) !write_flag) + 1) << 10, - /* 62-63: asce id (alway primary == 0) */ + /* 62-63: asce id (always primary == 0) */ .exc_access_id = 0, /* always primary */ .op_access_id = 0, /* not MVPG */ }; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 04d4c6cf898e..81c53440b3e6 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -13,8 +13,8 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> +#include <linux/io.h> #include <asm/alternative.h> -#include <asm/io.h> int spin_retry = -1; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index f4b6fc746fce..989ebd0912b4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1740,7 +1740,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow); * The r2t parameter specifies the address of the source table. The * four pages of the source table are made read-only in the parent gmap * address space. A write to the source table area @r2t will automatically - * remove the shadow r2 table and all of its decendents. + * remove the shadow r2 table and all of its descendants. * * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the * shadow table structure is incomplete, -ENOMEM if out of memory and diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d02a61620cfa..cbe1df1e9c18 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -13,9 +13,9 @@ #include <linux/gfp.h> #include <linux/cpu.h> #include <linux/uio.h> +#include <linux/io.h> #include <asm/asm-extable.h> #include <asm/ctl_reg.h> -#include <asm/io.h> #include <asm/abs_lowcore.h> #include <asm/stacktrace.h> #include <asm/maccess.h> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b9dcb4ae6c59..b26649233d12 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -481,6 +481,7 @@ static int remove_pagetable(unsigned long start, unsigned long end, bool direct) */ static int vmem_add_range(unsigned long start, unsigned long size) { + start = (unsigned long)__va(start); return add_pagetable(start, start + size, true); } @@ -489,6 +490,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) */ static void vmem_remove_range(unsigned long start, unsigned long size) { + start = (unsigned long)__va(start); remove_pagetable(start, start + size, true); } @@ -556,7 +558,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) * to any physical address. If missing, allocate segment- and region- * table entries along. Meeting a large segment- or region-table entry * while traversing is an error, since the function is expected to be - * called against virtual regions reserverd for 4KB mappings only. + * called against virtual regions reserved for 4KB mappings only. */ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) { diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f95d7e401b96..5e9371fbf3d5 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -523,12 +523,12 @@ extern const char bpf_plt_end[]; #define BPF_PLT_SIZE 32 asm( ".pushsection .rodata\n" - " .align 8\n" + " .balign 8\n" "bpf_plt:\n" " lgrl %r0,bpf_plt_ret\n" " lgrl %r1,bpf_plt_target\n" " br %r1\n" - " .align 8\n" + " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" "bpf_plt_end:\n" diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 4ab0cf829999..ff8f24854c64 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -163,7 +163,7 @@ static void zpci_handle_cpu_local_irq(bool rescan) if (!rescan || irqs_on++) /* End of second scan with interrupts on. */ break; - /* First scan complete, reenable interrupts. */ + /* First scan complete, re-enable interrupts. */ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib)) break; bit = 0; @@ -202,7 +202,7 @@ static void zpci_handle_fallback_irq(void) if (irqs_on++) /* End of second scan with interrupts on. */ break; - /* First scan complete, reenable interrupts. */ + /* First scan complete, re-enable interrupts. */ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib)) break; cpu = 0; @@ -247,7 +247,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, if (irqs_on++) /* End of second scan with interrupts on. */ break; - /* First scan complete, reenable interrupts. */ + /* First scan complete, re-enable interrupts. */ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib)) break; si = 0; diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index e5bd1a503528..0f93f2e72eba 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -100,7 +100,7 @@ SYM_CODE_START(purgatory_start) * checksum verification only (%r2 = 0 -> verification only). * * Check now and preserve over C function call by storing in - * %r10 whith + * %r10 with * 1 -> checksum verification only * 0 -> load new kernel */ diff --git a/arch/sparc/video/fbdev.c b/arch/sparc/video/fbdev.c index 25837f128132..bff66dd1909a 100644 --- a/arch/sparc/video/fbdev.c +++ b/arch/sparc/video/fbdev.c @@ -21,3 +21,6 @@ int fb_is_primary_device(struct fb_info *info) return 0; } EXPORT_SYMBOL(fb_is_primary_device); + +MODULE_DESCRIPTION("Sparc fbdev helpers"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index fe00a5783f53..48d15dd785f6 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -9,6 +9,7 @@ */ #include <linux/acpi.h> +#include <linux/bits.h> #include <linux/device.h> #include <linux/err.h> #include <linux/kernel.h> @@ -19,13 +20,16 @@ #include "internal.h" +/* Exclude devices that have no _CRS resources provided */ +#define ACPI_ALLOW_WO_RESOURCES BIT(0) + static const struct acpi_device_id forbidden_id_list[] = { {"ACPI0009", 0}, /* IOxAPIC */ {"ACPI000A", 0}, /* IOAPIC */ {"PNP0000", 0}, /* PIC */ {"PNP0100", 0}, /* Timer */ {"PNP0200", 0}, /* AT DMA Controller */ - {"SMB0001", 0}, /* ACPI SMBUS virtual device */ + {ACPI_SMBUS_MS_HID, ACPI_ALLOW_WO_RESOURCES}, /* ACPI SMBUS virtual device */ { } }; @@ -83,6 +87,15 @@ static void acpi_platform_fill_resource(struct acpi_device *adev, dest->parent = pci_find_resource(to_pci_dev(parent), dest); } +static unsigned int acpi_platform_resource_count(struct acpi_resource *ares, void *data) +{ + bool *has_resources = data; + + *has_resources = true; + + return AE_CTRL_TERMINATE; +} + /** * acpi_create_platform_device - Create platform device for ACPI device node * @adev: ACPI device node to create a platform device for. @@ -100,6 +113,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, struct acpi_device *parent = acpi_dev_parent(adev); struct platform_device *pdev = NULL; struct platform_device_info pdevinfo; + const struct acpi_device_id *match; struct resource_entry *rentry; struct list_head resource_list; struct resource *resources = NULL; @@ -109,8 +123,19 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, if (adev->physical_node_count) return NULL; - if (!acpi_match_device_ids(adev, forbidden_id_list)) - return ERR_PTR(-EINVAL); + match = acpi_match_acpi_device(forbidden_id_list, adev); + if (match) { + if (match->driver_data & ACPI_ALLOW_WO_RESOURCES) { + bool has_resources = false; + + acpi_walk_resources(adev->handle, METHOD_NAME__CRS, + acpi_platform_resource_count, &has_resources); + if (has_resources) + return ERR_PTR(-EINVAL); + } else { + return ERR_PTR(-EINVAL); + } + } INIT_LIST_HEAD(&resource_list); count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index e3e0bd0c5a50..2fc2b43a4ed3 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -682,7 +682,7 @@ bool acpi_device_is_first_physical_node(struct acpi_device *adev, * resources available from it but they will be matched normally using functions * provided by their bus types (and analogously for their modalias). */ -struct acpi_device *acpi_companion_match(const struct device *dev) +const struct acpi_device *acpi_companion_match(const struct device *dev) { struct acpi_device *adev; @@ -706,7 +706,7 @@ struct acpi_device *acpi_companion_match(const struct device *dev) * identifiers and a _DSD object with the "compatible" property, use that * property to match against the given list of identifiers. */ -static bool acpi_of_match_device(struct acpi_device *adev, +static bool acpi_of_match_device(const struct acpi_device *adev, const struct of_device_id *of_match_table, const struct of_device_id **of_id) { @@ -808,7 +808,7 @@ static bool __acpi_match_device_cls(const struct acpi_device_id *id, return true; } -static bool __acpi_match_device(struct acpi_device *device, +static bool __acpi_match_device(const struct acpi_device *device, const struct acpi_device_id *acpi_ids, const struct of_device_id *of_ids, const struct acpi_device_id **acpi_id, @@ -851,6 +851,26 @@ out_acpi_match: } /** + * acpi_match_acpi_device - Match an ACPI device against a given list of ACPI IDs + * @ids: Array of struct acpi_device_id objects to match against. + * @adev: The ACPI device pointer to match. + * + * Match the ACPI device @adev against a given list of ACPI IDs @ids. + * + * Return: + * a pointer to the first matching ACPI ID on success or %NULL on failure. + */ +const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids, + const struct acpi_device *adev) +{ + const struct acpi_device_id *id = NULL; + + __acpi_match_device(adev, ids, NULL, &id, NULL); + return id; +} +EXPORT_SYMBOL_GPL(acpi_match_acpi_device); + +/** * acpi_match_device - Match a struct device against a given list of ACPI IDs * @ids: Array of struct acpi_device_id object to match against. * @dev: The device structure to match. @@ -864,10 +884,7 @@ out_acpi_match: const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev) { - const struct acpi_device_id *id = NULL; - - __acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL); - return id; + return acpi_match_acpi_device(ids, acpi_companion_match(dev)); } EXPORT_SYMBOL_GPL(acpi_match_device); diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 0fbfbaa8d8e3..b9bbf0746199 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -283,7 +283,7 @@ int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env } EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); -static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size) +static int __acpi_device_modalias(const struct acpi_device *adev, char *buf, int size) { int len, count; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 06ad497067ac..f4148dc50b9c 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -11,6 +11,8 @@ #include <linux/idr.h> +extern struct acpi_device *acpi_root; + int early_acpi_osi_init(void); int acpi_osi_init(void); acpi_status acpi_os_initialize1(void); @@ -119,7 +121,7 @@ int acpi_bus_register_early_device(int type); /* -------------------------------------------------------------------------- Device Matching and Notification -------------------------------------------------------------------------- */ -struct acpi_device *acpi_companion_match(const struct device *dev); +const struct acpi_device *acpi_companion_match(const struct device *dev); int __acpi_device_uevent_modalias(const struct acpi_device *adev, struct kobj_uevent_env *env); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1c3e1e2bb0b5..5b145f1aaa1b 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -23,8 +23,7 @@ #include <linux/dma-direct.h> #include "internal.h" - -extern struct acpi_device *acpi_root; +#include "sleep.h" #define ACPI_BUS_CLASS "system_bus" #define ACPI_BUS_HID "LNXSYBUS" @@ -930,26 +929,29 @@ static int acpi_bus_extract_wakeup_device_power_package(struct acpi_device *dev) return err; } +/* Do not use a button for S5 wakeup */ +#define ACPI_AVOID_WAKE_FROM_S5 BIT(0) + static bool acpi_wakeup_gpe_init(struct acpi_device *device) { static const struct acpi_device_id button_device_ids[] = { - {"PNP0C0C", 0}, /* Power button */ - {"PNP0C0D", 0}, /* Lid */ - {"PNP0C0E", 0}, /* Sleep button */ + {"PNP0C0C", 0}, /* Power button */ + {"PNP0C0D", ACPI_AVOID_WAKE_FROM_S5}, /* Lid */ + {"PNP0C0E", ACPI_AVOID_WAKE_FROM_S5}, /* Sleep button */ {"", 0}, }; struct acpi_device_wakeup *wakeup = &device->wakeup; + const struct acpi_device_id *match; acpi_status status; wakeup->flags.notifier_present = 0; /* Power button, Lid switch always enable wakeup */ - if (!acpi_match_device_ids(device, button_device_ids)) { - if (!acpi_match_device_ids(device, &button_device_ids[1])) { - /* Do not use Lid/sleep button for S5 wakeup */ - if (wakeup->sleep_state == ACPI_STATE_S5) - wakeup->sleep_state = ACPI_STATE_S4; - } + match = acpi_match_acpi_device(button_device_ids, device); + if (match) { + if ((match->driver_data & ACPI_AVOID_WAKE_FROM_S5) && + wakeup->sleep_state == ACPI_STATE_S5) + wakeup->sleep_state = ACPI_STATE_S4; acpi_mark_gpe_for_wake(wakeup->gpe_device, wakeup->gpe_number); device_set_wakeup_capable(&device->dev, true); return true; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 9f5b2d28bff5..44e44b8d9ce6 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -92,17 +92,6 @@ config ZCRYPT_DEBUG If unsure, say N. -config ZCRYPT_MULTIDEVNODES - bool "Support for multiple zcrypt device nodes" - default y - depends on S390 - depends on ZCRYPT - help - With this option enabled the zcrypt device driver can - provide multiple devices nodes in /dev. Each device - node can get customized to limit access and narrow - down the use of the available crypto hardware. - config PKEY tristate "Kernel API for protected key handling" depends on S390 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a84bd4a0c421..2f9c14aca73c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -286,6 +286,9 @@ extern int amdgpu_user_partt_mode; #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) +/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */ +#define AMDGPU_SWCTF_EXTRA_DELAY 50 + struct amdgpu_xcp_mgr; struct amdgpu_device; struct amdgpu_irq_src; @@ -1277,9 +1280,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); -#define for_each_inst(i, inst_mask) \ - for (i = ffs(inst_mask) - 1; inst_mask; \ - inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) +#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i)) +#define for_each_inst(i, inst_mask) \ + for (i = ffs(inst_mask); i-- != 0; \ + i = ffs(inst_mask & BIT_MASK_UPPER(i + 1))) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9ba4817a9148..f4e3c133a16c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1791,6 +1791,15 @@ const struct attribute_group amdgpu_vbios_version_attr_group = { .attrs = amdgpu_vbios_version_attrs }; +int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev) +{ + if (adev->mode_info.atom_context) + return devm_device_add_group(adev->dev, + &amdgpu_vbios_version_attr_group); + + return 0; +} + /** * amdgpu_atombios_fini - free the driver info and callbacks for atombios * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 4153d520e2a3..b639a80ee3fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -217,5 +217,6 @@ int amdgpu_atombios_get_data_table(struct amdgpu_device *adev, void amdgpu_atombios_fini(struct amdgpu_device *adev); int amdgpu_atombios_init(struct amdgpu_device *adev); +int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ef4b9a41f20a..0b7f4c4d58e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -327,10 +327,13 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = igp_info->v11.umachannelnumber; if (!mem_channel_number) mem_channel_number = 1; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; mem_type = igp_info->v11.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; @@ -345,10 +348,13 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = igp_info->v21.umachannelnumber; if (!mem_channel_number) mem_channel_number = 1; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; mem_type = igp_info->v21.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d9503882ea97..040f4cb6ab2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -136,9 +136,6 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and two for the CS job */ - p->uf_entry.tv.num_shared = 3; - drm_gem_object_put(gobj); size = amdgpu_bo_size(bo); @@ -912,15 +909,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, mutex_lock(&p->bo_list->bo_list_mutex); - /* One for TTM and one for the CS job */ + /* One for TTM and one for each CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 2; + e->tv.num_shared = 1 + p->gang_size; + p->uf_entry.tv.num_shared = 1 + p->gang_size; amdgpu_bo_list_get_list(p->bo_list, &p->validated); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); + /* Two for VM updates, one for TTM and one for each CS job */ + p->vm_pd.tv.num_shared = 3 + p->gang_size; + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); @@ -1653,15 +1654,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, continue; r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; + dma_fence_put(fence); if (r < 0) return r; if (r == 0) break; - - if (fence->error) - return fence->error; } memset(wait, 0, sizeof(*wait)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e25f085ee886..a92c6189b4b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2552,7 +2552,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -3673,6 +3673,23 @@ static const struct attribute *amdgpu_dev_attributes[] = { NULL }; +static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) +{ + if (amdgpu_mcbp == 1) + adev->gfx.mcbp = true; + + if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && + adev->gfx.num_gfx_rings) + adev->gfx.mcbp = true; + + if (amdgpu_sriov_vf(adev)) + adev->gfx.mcbp = true; + + if (adev->gfx.mcbp) + DRM_INFO("MCBP is enabled\n"); +} + /** * amdgpu_device_init - initialize the driver * @@ -3824,9 +3841,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - if (amdgpu_mcbp) - DRM_INFO("MCBP is enabled\n"); - /* * Reset domain needs to be present early, before XGMI hive discovered * (if any) and intitialized to use reset sem and in_gpu reset flag @@ -3852,6 +3866,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + amdgpu_device_set_mcbp(adev); + /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) @@ -4018,6 +4034,11 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); + r = amdgpu_atombios_sysfs_init(adev); + if (r) + drm_err(&adev->ddev, + "registering atombios sysfs failed (%d).\n", r); + r = amdgpu_pm_sysfs_init(adev); if (r) DRM_ERROR("registering pm sysfs failed (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3b711babd4e2..0593ef8fe0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -180,7 +180,7 @@ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; -int amdgpu_mcbp; +int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; int amdgpu_mes_kiq; @@ -634,10 +634,10 @@ module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444); /** * DOC: mcbp (int) - * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled) + * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default)) */ MODULE_PARM_DESC(mcbp, - "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)"); + "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)"); module_param_named(mcbp, amdgpu_mcbp, int, 0444); /** @@ -2899,12 +2899,10 @@ static struct pci_error_handlers amdgpu_pci_err_handler = { extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; -extern const struct attribute_group amdgpu_vbios_version_attr_group; static const struct attribute_group *amdgpu_sysfs_groups[] = { &amdgpu_vram_mgr_attr_group, &amdgpu_gtt_mgr_attr_group, - &amdgpu_vbios_version_attr_group, NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ce0f7a8ad4b8..a4ff515ce896 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -434,6 +434,7 @@ struct amdgpu_gfx { uint16_t xcc_mask; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; + bool mcbp; /* mid command buffer preemption */ }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 3add4b4f0667..2ff2897fd1db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -255,7 +255,8 @@ int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if * if (amdgpu_ras_is_supported(adev, ras_block->block)) { for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) + if (adev->jpeg.harvest_config & (1 << i) || + !adev->jpeg.inst[i].ras_poison_irq.funcs) continue; r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e3531aa3c8bd..cca5a495611f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -805,7 +805,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_mcbp) + if (adev->gfx.mcbp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1247,7 +1247,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e15c27e05564..6d676bdd1505 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -839,6 +839,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 7): case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): return true; default: @@ -2039,6 +2040,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp) psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); dev_err(psp->adev->dev, "SECUREDISPLAY: query securedisplay TA failed. ret 0x%x\n", securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret); + /* don't try again */ + psp->securedisplay_context.context.bin_desc.size_bytes = 0; } return 0; @@ -3703,7 +3706,6 @@ static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) { int ret = 0; - struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -3712,10 +3714,6 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): - if (!psp->adev) { - psp->adev = adev; - psp_v13_0_set_psp_funcs(psp); - } ret = sysfs_create_bin_file(&adev->dev->kobj, &psp_vbflash_bin_attr); if (ret) dev_err(adev->dev, "Failed to create device file psp_vbflash"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c index 12010c988c8b..123bcf5c2bb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c @@ -116,7 +116,6 @@ static const struct file_operations amdgpu_rap_debugfs_ops = { void amdgpu_rap_debugfs_init(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; if (!adev->psp.rap_context.context.initialized) @@ -124,5 +123,4 @@ void amdgpu_rap_debugfs_init(struct amdgpu_device *adev) debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root, adev, &amdgpu_rap_debugfs_ops); -#endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4769a18304d7..8aaa427f8c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2065,6 +2065,14 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET; reset_context.method = AMD_RESET_METHOD_MODE2; } + + /* Fatal error occurs in poison mode, mode1 reset is used to + * recover gpu. + */ + if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) { + ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + } } amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); @@ -2955,9 +2963,12 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) return; if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + dev_info(adev->dev, "uncorrectable hardware error" "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 46bf1889a9d7..ffb49b2d533a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -340,6 +340,7 @@ enum amdgpu_ras_ret { #define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2) #define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0) +#define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1) struct amdgpu_ras_err_status_reg_entry { uint32_t hwip; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index 73516abef662..b779ee4bbaa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -423,6 +423,9 @@ void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mu struct amdgpu_ring_mux *mux = &adev->gfx.muxer; unsigned offset; + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + return; + offset = ring->wptr & ring->buf_mask; amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 78ec3420ef85..dacf281d2b21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -72,7 +72,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, int r; /* don't enable OS preemption on SDMA under SRIOV */ - if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp) + if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; if (ring->is_mes_queue) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index acbef1a24b9c..ae455aab5d29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1198,7 +1198,8 @@ int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (amdgpu_ras_is_supported(adev, ras_block->block)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << i) || + !adev->vcn.inst[i].ras_poison_irq.funcs) continue; r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 25b4d7f0bd35..41aa853a07d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -66,9 +66,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->cg_flags = 0; adev->pg_flags = 0; - /* enable mcbp for sriov */ - amdgpu_mcbp = 1; - /* Reduce kcq number to 2 to reduce latency */ if (amdgpu_num_kcq == -1) amdgpu_num_kcq = 2; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..291977b93b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(&before->list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(&after->list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); } @@ -2233,16 +2245,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { + /* Sanity checks */ + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; + goto unreserve_bo; + } + vm->pte_support_ats = pte_support_ats; r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index d733fa6e7477..d175e862f222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -132,6 +132,9 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) for (i = 0; i < MAX_XCP; ++i) xcp_mgr->xcp[i].valid = false; + /* This is needed for figuring out memory id of xcp */ + xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions; + for (i = 0; i < num_xcps; ++i) { for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) { ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j, @@ -157,7 +160,6 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) xcp_mgr->num_xcps = num_xcps; amdgpu_xcp_update_partition_sched_list(adev); - xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions; return 0; } @@ -232,7 +234,10 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) ddev = adev_to_drm(adev); - for (i = 0; i < MAX_XCP; i++) { + /* xcp #0 shares drm device setting with adev */ + adev->xcp_mgr->xcp->ddev = ddev; + + for (i = 1; i < MAX_XCP; i++) { ret = amdgpu_xcp_drm_dev_alloc(&p_ddev); if (ret) return ret; @@ -322,7 +327,7 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev, if (!adev->xcp_mgr) return 0; - for (i = 0; i < MAX_XCP; i++) { + for (i = 1; i < MAX_XCP; i++) { ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); if (ret) return ret; @@ -339,7 +344,7 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) if (!adev->xcp_mgr) return; - for (i = 0; i < MAX_XCP; i++) { + for (i = 1; i < MAX_XCP; i++) { p_ddev = adev->xcp_mgr->xcp[i].ddev; drm_dev_unplug(p_ddev); p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index be984f8c71c7..44af8022b89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8307,7 +8307,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) @@ -8482,7 +8482,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, { uint32_t dw2 = 0; - if (amdgpu_mcbp) + if (ring->adev->gfx.mcbp) gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 690e121d9dda..3a7af59e83ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5311,7 +5311,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c1ee54d4c3d3..9e3b835bdbb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -623,12 +623,28 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { - int ret; - - ret = psp_spatial_partition(&adev->psp, NUM_XCC(adev->gfx.xcc_mask) / - num_xccs_per_xcp); - if (ret) - return ret; + int ret, i, num_xcc; + u32 tmp = 0; + + if (adev->psp.funcs) { + ret = psp_spatial_partition(&adev->psp, + NUM_XCC(adev->gfx.xcc_mask) / + num_xccs_per_xcp); + if (ret) + return ret; + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); + WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, + tmp); + } + ret = 0; + } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; @@ -1762,6 +1778,8 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); gfx_v9_4_3_xcc_mqd_init(ring, xcc_id); gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); @@ -1960,6 +1978,16 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) if (amdgpu_gfx_disable_kcq(adev, xcc_id)) DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id); + if (amdgpu_sriov_vf(adev)) { + /* must disable polling for SRIOV when hw finished, otherwise + * CPC engine may still keep fetching WB address which is already + * invalid after sw finished and trigger DMAR reading error in + * hypervisor side. + */ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0); + return; + } + /* Use deinitialize sequence from CAIL when unbinding device * from driver, otherwise KIQ is hanging when binding back */ @@ -1984,7 +2012,8 @@ static int gfx_v9_4_3_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v9_4_3_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + gfx_v9_4_3_init_golden_registers(adev); gfx_v9_4_3_constants_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index aa761ff3a5fa..4038455d7998 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -345,8 +345,8 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) } #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1 -#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x00000009 // 1=1us, 9=1ms -#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 4ms +#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x0000000A // 1=1us, 9=1ms, 10=4ms +#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 400ms static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, bool enable) @@ -479,9 +479,12 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data); def = data = RREG32_PCIE(smnPCIE_LC_CNTL); - data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; - data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; - data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT; + data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + if (pci_is_thunderbolt_attached(adev->pdev)) + data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + else + data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL, data); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index ea5e12390d18..f413898dda37 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -578,6 +578,9 @@ static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable, return; } + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + return; + for_each_inst(i, inst_mask) { f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1); @@ -899,15 +902,12 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, WREG32_SDMA(i, regSDMA_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - - /* unhalt engine */ - temp = RREG32_SDMA(i, regSDMA_F32_CNTL); - temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0); - WREG32_SDMA(i, regSDMA_F32_CNTL, temp); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* unhalt engine */ + temp = RREG32_SDMA(i, regSDMA_F32_CNTL); + temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0); + WREG32_SDMA(i, regSDMA_F32_CNTL, temp); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index b48bb5212488..259795098173 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1424,8 +1424,10 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) */ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9d4abfd8b55e..0b3dc754e06b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -138,9 +138,12 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(9, 4, 0): /* VEGA20 */ case IP_VERSION(9, 4, 1): /* ARCTURUS */ case IP_VERSION(9, 4, 2): /* ALDEBARAN */ - case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; break; + case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ + kfd->device_info.event_interrupt_class = + &event_interrupt_class_v9_4_3; + break; case IP_VERSION(10, 3, 1): /* VANGOGH */ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ case IP_VERSION(10, 3, 6): /* GC 10.3.6 */ @@ -518,6 +521,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b)))) @@ -598,6 +602,41 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) } } +static void kfd_setup_interrupt_bitmap(struct kfd_node *node, + unsigned int kfd_node_idx) +{ + struct amdgpu_device *adev = node->adev; + uint32_t xcc_mask = node->xcc_mask; + uint32_t xcc, mapped_xcc; + /* + * Interrupt bitmap is setup for processing interrupts from + * different XCDs and AIDs. + * Interrupt bitmap is defined as follows: + * 1. Bits 0-15 - correspond to the NodeId field. + * Each bit corresponds to NodeId number. For example, if + * a KFD node has interrupt bitmap set to 0x7, then this + * KFD node will process interrupts with NodeId = 0, 1 and 2 + * in the IH cookie. + * 2. Bits 16-31 - unused. + * + * Please note that the kfd_node_idx argument passed to this + * function is not related to NodeId field received in the + * IH cookie. + * + * In CPX mode, a KFD node will process an interrupt if: + * - the Node Id matches the corresponding bit set in + * Bits 0-15. + * - AND VMID reported in the interrupt lies within the + * VMID range of the node. + */ + for_each_inst(xcc, xcc_mask) { + mapped_xcc = GET_INST(GC, xcc); + node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2)); + } + dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx, + node->interrupt_bitmap); +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -797,6 +836,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, amdgpu_amdkfd_get_local_mem_info(kfd->adev, &node->local_mem_info, node->xcp); + if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) + kfd_setup_interrupt_bitmap(node, i); + /* Initialize the KFD node */ if (kfd_init_node(node)) { dev_err(kfd_device, "Error initializing KFD node\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d5c9f30552e3..f0731a6a5306 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -446,7 +446,36 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, } } +static bool event_interrupt_isr_v9_4_3(struct kfd_node *node, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) +{ + uint16_t node_id, vmid; + + /* + * For GFX 9.4.3, process the interrupt if: + * - NodeID field in IH entry matches the corresponding bit + * set in interrupt_bitmap Bits 0-15. + * OR + * - If partition mode is CPX and interrupt came from + * Node_id 0,4,8,12, then check if the Bit (16 + client id) + * is set in interrupt bitmap Bits 16-31. + */ + node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (kfd_irq_is_from_node(node, node_id, vmid)) + return event_interrupt_isr_v9(node, ih_ring_entry, + patched_ihre, patched_flag); + return false; +} + const struct kfd_event_interrupt_class event_interrupt_class_v9 = { .interrupt_isr = event_interrupt_isr_v9, .interrupt_wq = event_interrupt_wq_v9, }; + +const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3 = { + .interrupt_isr = event_interrupt_isr_v9_4_3, + .interrupt_wq = event_interrupt_wq_v9, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7364a5d77c6e..d4c9ee3f9953 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1444,6 +1444,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; extern const struct kfd_event_interrupt_class event_interrupt_class_v9; +extern const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3; extern const struct kfd_event_interrupt_class event_interrupt_class_v10; extern const struct kfd_event_interrupt_class event_interrupt_class_v11; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3d3611705d41..a844e68211ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2142,6 +2142,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) int kfd_process_drain_interrupts(struct kfd_process_device *pdd) { uint32_t irq_drain_fence[8]; + uint8_t node_id = 0; int r = 0; if (!KFD_IS_SOC15(pdd->dev)) @@ -2154,6 +2155,14 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd) KFD_IRQ_FENCE_CLIENTID; irq_drain_fence[3] = pdd->process->pasid; + /* + * For GFX 9.4.3, send the NodeId also in IH cookie DW[3] + */ + if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) { + node_id = ffs(pdd->dev->interrupt_bitmap) - 1; + irq_drain_fence[3] |= node_id << 16; + } + /* ensure stale irqs scheduled KFD interrupts and send drain fence. */ if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev, irq_drain_fence)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9ad1a2186a24..ba9d69054119 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,16 +123,24 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (gws) - ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, - gws, &mem); - else - ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, - pqn->q->gws); - if (unlikely(ret)) - return ret; + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, &mem); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + pqn->q->gws = mem; + } else { + /* + * Intentionally set GWS to a non-NULL value + * for GFX 9.4.3. + */ + pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; + } - pqn->q->gws = mem; pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, @@ -164,7 +172,8 @@ void pqm_uninit(struct process_queue_manager *pqm) struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->gws) + if (pqn->q && pqn->q->gws && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -446,8 +455,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, + pqn->q->gws); pdd->qpd.num_gws = 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 90b86a6ac7bd..61fc62f3e003 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2107,6 +2107,10 @@ int kfd_topology_add_device(struct kfd_node *gpu) if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; + if (dev->gpu->adev->gmc.is_app_apu || + dev->gpu->adev->gmc.xgmi.connected_to_cpu) + dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index e3f3b0b93a59..10138676f27f 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -40,6 +40,7 @@ #define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf) #define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1) #define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff) +#define SOC15_NODEID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) >> 16 & 0xff) #define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4])) #define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5])) #define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6])) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 514f6785a020..ff0a217b9d56 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5063,11 +5063,7 @@ static inline void fill_dc_dirty_rect(struct drm_plane *plane, s32 y, s32 width, s32 height, int *i, bool ffu) { - if (*i > DC_MAX_DIRTY_RECTS) - return; - - if (*i == DC_MAX_DIRTY_RECTS) - goto out; + WARN_ON(*i >= DC_MAX_DIRTY_RECTS); dirty_rect->x = x; dirty_rect->y = y; @@ -5083,7 +5079,6 @@ static inline void fill_dc_dirty_rect(struct drm_plane *plane, "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)", plane->base.id, x, y, width, height); -out: (*i)++; } @@ -5170,6 +5165,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, *dirty_regions_changed = bb_changed; + if ((num_clips + (bb_changed ? 2 : 0)) > DC_MAX_DIRTY_RECTS) + goto ffu; + if (bb_changed) { fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], new_plane_state->crtc_x, @@ -5199,9 +5197,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, new_plane_state->crtc_h, &i, false); } - if (i > DC_MAX_DIRTY_RECTS) - goto ffu; - flip_addrs->dirty_rect_count = i; return; @@ -7258,13 +7253,7 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - /* most eDP supports only timings from its edid, - * usually only detailed timings are available - * from eDP edid. timings which are not from edid - * may damage eDP - */ - if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) - amdgpu_dm_connector_add_common_modes(encoder, connector); + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 5ea3284b2b77..d63ee636483b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -336,6 +336,153 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, return size; } +static bool dp_mst_is_end_device(struct amdgpu_dm_connector *aconnector) +{ + bool is_end_device = false; + struct drm_dp_mst_topology_mgr *mgr = NULL; + struct drm_dp_mst_port *port = NULL; + + if (aconnector->mst_root && aconnector->mst_root->mst_mgr.mst_state) { + mgr = &aconnector->mst_root->mst_mgr; + port = aconnector->mst_output_port; + + drm_modeset_lock(&mgr->base.lock, NULL); + if (port->pdt == DP_PEER_DEVICE_SST_SINK || + port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV) + is_end_device = true; + drm_modeset_unlock(&mgr->base.lock); + } + + return is_end_device; +} + +/* Change MST link setting + * + * valid lane count value: 1, 2, 4 + * valid link rate value: + * 06h = 1.62Gbps per lane + * 0Ah = 2.7Gbps per lane + * 0Ch = 3.24Gbps per lane + * 14h = 5.4Gbps per lane + * 1Eh = 8.1Gbps per lane + * 3E8h = 10.0Gbps per lane + * 546h = 13.5Gbps per lane + * 7D0h = 20.0Gbps per lane + * + * debugfs is located at /sys/kernel/debug/dri/0/DP-x/mst_link_settings + * + * for example, to force to 2 lane, 10.0GHz, + * echo 2 0x3e8 > /sys/kernel/debug/dri/0/DP-x/mst_link_settings + * + * Valid input will trigger hotplug event to get new link setting applied + * Invalid input will trigger training setting reset + * + * The usage can be referred to link_settings entry + * + */ +static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; + struct dc_link *link = aconnector->dc_link; + struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev); + struct dc *dc = (struct dc *)link->dc; + struct dc_link_settings prefer_link_settings; + char *wr_buf = NULL; + const uint32_t wr_buf_size = 40; + /* 0: lane_count; 1: link_rate */ + int max_param_num = 2; + uint8_t param_nums = 0; + long param[2]; + bool valid_input = true; + + if (!dp_mst_is_end_device(aconnector)) + return -EINVAL; + + if (size == 0) + return -EINVAL; + + wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); + if (!wr_buf) + return -ENOSPC; + + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { + kfree(wr_buf); + return -EINVAL; + } + + if (param_nums <= 0) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("user data not be read\n"); + return -EINVAL; + } + + switch (param[0]) { + case LANE_COUNT_ONE: + case LANE_COUNT_TWO: + case LANE_COUNT_FOUR: + break; + default: + valid_input = false; + break; + } + + switch (param[1]) { + case LINK_RATE_LOW: + case LINK_RATE_HIGH: + case LINK_RATE_RBR2: + case LINK_RATE_HIGH2: + case LINK_RATE_HIGH3: + case LINK_RATE_UHBR10: + case LINK_RATE_UHBR13_5: + case LINK_RATE_UHBR20: + break; + default: + valid_input = false; + break; + } + + if (!valid_input) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("Invalid Input value No HW will be programmed\n"); + mutex_lock(&adev->dm.dc_lock); + dc_link_set_preferred_training_settings(dc, NULL, NULL, link, false); + mutex_unlock(&adev->dm.dc_lock); + return -EINVAL; + } + + /* save user force lane_count, link_rate to preferred settings + * spread spectrum will not be changed + */ + prefer_link_settings.link_spread = link->cur_link_settings.link_spread; + prefer_link_settings.use_link_rate_set = false; + prefer_link_settings.lane_count = param[0]; + prefer_link_settings.link_rate = param[1]; + + /* skip immediate retrain, and train to new link setting after hotplug event triggered */ + mutex_lock(&adev->dm.dc_lock); + dc_link_set_preferred_training_settings(dc, &prefer_link_settings, NULL, link, true); + mutex_unlock(&adev->dm.dc_lock); + + mutex_lock(&aconnector->base.dev->mode_config.mutex); + aconnector->base.force = DRM_FORCE_OFF; + mutex_unlock(&aconnector->base.dev->mode_config.mutex); + drm_kms_helper_hotplug_event(aconnector->base.dev); + + msleep(100); + + mutex_lock(&aconnector->base.dev->mode_config.mutex); + aconnector->base.force = DRM_FORCE_UNSPECIFIED; + mutex_unlock(&aconnector->base.dev->mode_config.mutex); + drm_kms_helper_hotplug_event(aconnector->base.dev); + + kfree(wr_buf); + return size; +} + /* function: get current DP PHY settings: voltage swing, pre-emphasis, * post-cursor2 (defined by VESA DP specification) * @@ -2668,6 +2815,12 @@ static const struct file_operations dp_dsc_disable_passthrough_debugfs_fops = { .llseek = default_llseek }; +static const struct file_operations dp_mst_link_settings_debugfs_fops = { + .owner = THIS_MODULE, + .write = dp_mst_link_setting, + .llseek = default_llseek +}; + static const struct { char *name; const struct file_operations *fops; @@ -2691,7 +2844,8 @@ static const struct { {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, {"is_mst_connector", &dp_is_mst_connector_fops}, {"mst_progress_status", &dp_mst_progress_status_fops}, - {"is_dpia_link", &is_dpia_link_fops} + {"is_dpia_link", &is_dpia_link_fops}, + {"mst_link_settings", &dp_mst_link_settings_debugfs_fops} }; static const struct { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index cd20cfc04996..d9a482908380 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -44,6 +44,30 @@ #include "dm_helpers.h" #include "ddc_service_types.h" +static u32 edid_extract_panel_id(struct edid *edid) +{ + return (u32)edid->mfg_id[0] << 24 | + (u32)edid->mfg_id[1] << 16 | + (u32)EDID_PRODUCT_ID(edid); +} + +static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) +{ + uint32_t panel_id = edid_extract_panel_id(edid); + + switch (panel_id) { + /* Workaround for some monitors which does not work well with FAMS */ + case drm_edid_encode_panel_id('S', 'A', 'M', 0x0E5E): + case drm_edid_encode_panel_id('S', 'A', 'M', 0x7053): + case drm_edid_encode_panel_id('S', 'A', 'M', 0x71AC): + DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.disable_fams = true; + break; + default: + return; + } +} + /* dm_helpers_parse_edid_caps * * Parse edid caps @@ -115,6 +139,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; + apply_edid_quirks(edid_buf, edid_caps); + kfree(sads); kfree(sadb); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index d647f68fd563..4f61d4f257cd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -24,6 +24,7 @@ */ #include "amdgpu_dm_psr.h" +#include "dc_dmub_srv.h" #include "dc.h" #include "dm_helpers.h" #include "amdgpu_dm.h" @@ -50,7 +51,7 @@ static bool link_supports_psrsu(struct dc_link *link) !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap) return false; - return true; + return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); } /* diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 6a811755e2e6..cb992aca760d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -541,9 +541,18 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.p_state_change_support = p_state_change_support; /* to disable P-State switching, set UCLK min = max */ - if (!clk_mgr_base->clks.p_state_change_support) - dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); + if (!clk_mgr_base->clks.p_state_change_support) { + if (dc->clk_mgr->dc_mode_softmax_enabled) { + /* On DCN32x we will never have the functional UCLK min above the softmax + * since we calculate mode support based on softmax being the max UCLK + * frequency. + */ + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + } else { + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz); + } + } } /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */ @@ -808,8 +817,7 @@ static void dcn32_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) if (!clk_mgr->smu_present) return; - dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->bw_params->max_memclk_mhz); } /* Get current memclk states, update bounding box */ @@ -827,6 +835,7 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz, &num_entries_per_clk->num_memclk_levels); clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + clk_mgr_base->bw_params->dc_mode_softmax_memclk = clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz; /* memclk must have at least one level */ num_entries_per_clk->num_memclk_levels = num_entries_per_clk->num_memclk_levels ? num_entries_per_clk->num_memclk_levels : 1; @@ -841,7 +850,8 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) } else { num_levels = num_entries_per_clk->num_fclk_levels; } - + clk_mgr_base->bw_params->max_memclk_mhz = + clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; if (clk_mgr->dpm_present && !num_levels) @@ -894,6 +904,25 @@ static bool dcn32_is_smu_present(struct clk_mgr *clk_mgr_base) return clk_mgr->smu_present; } +static void dcn32_set_max_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} + +static void dcn32_set_min_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} static struct clk_mgr_funcs dcn32_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, @@ -904,6 +933,8 @@ static struct clk_mgr_funcs dcn32_funcs = { .notify_wm_ranges = dcn32_notify_wm_ranges, .set_hard_min_memclk = dcn32_set_hard_min_memclk, .set_hard_max_memclk = dcn32_set_hard_max_memclk, + .set_max_memclk = dcn32_set_max_memclk, + .set_min_memclk = dcn32_set_min_memclk, .get_memclk_states_from_smu = dcn32_get_memclk_states_from_smu, .are_clock_states_equal = dcn32_are_clock_states_equal, .enable_pme_wa = dcn32_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index dd3a9d06c6e2..d133e4186a52 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1629,6 +1629,9 @@ bool dc_validate_boot_timing(const struct dc *dc, return false; } + if (dc->debug.force_odm_combine) + return false; + /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; @@ -3577,6 +3580,13 @@ static void commit_planes_for_stream_fast(struct dc *dc, hwss_execute_sequence(dc, context->block_sequence, context->block_sequence_steps); + /* Clear update flags so next flip doesn't have redundant programming + * (if there's no stream update, the update flags are not cleared). + */ + if (top_pipe_to_program->plane_state) + top_pipe_to_program->plane_state->update_flags.raw = 0; + if (top_pipe_to_program->stream) + top_pipe_to_program->stream->update_flags.raw = 0; } static void commit_planes_for_stream(struct dc *dc, @@ -4233,6 +4243,117 @@ static void update_seamless_boot_flags(struct dc *dc, } } +static void populate_fast_updates(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update) +{ + int i = 0; + + if (stream_update) { + fast_update[0].out_transfer_func = stream_update->out_transfer_func; + fast_update[0].output_csc_transform = stream_update->output_csc_transform; + } + + for (i = 0; i < surface_count; i++) { + fast_update[i].flip_addr = srf_updates[i].flip_addr; + fast_update[i].gamma = srf_updates[i].gamma; + fast_update[i].gamut_remap_matrix = srf_updates[i].gamut_remap_matrix; + fast_update[i].input_csc_color_matrix = srf_updates[i].input_csc_color_matrix; + fast_update[i].coeff_reduction_factor = srf_updates[i].coeff_reduction_factor; + } +} + +static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_count) +{ + int i; + + if (fast_update[0].out_transfer_func || + fast_update[0].output_csc_transform) + return true; + + for (i = 0; i < surface_count; i++) { + if (fast_update[i].flip_addr || + fast_update[i].gamma || + fast_update[i].gamut_remap_matrix || + fast_update[i].input_csc_color_matrix || + fast_update[i].coeff_reduction_factor) + return true; + } + + return false; +} + +static bool full_update_required(struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update, + struct dc_stream_state *stream) +{ + + int i; + struct dc_stream_status *stream_status; + + for (i = 0; i < surface_count; i++) { + if (srf_updates && + (srf_updates[i].plane_info || + srf_updates[i].scaling_info || + (srf_updates[i].hdr_mult.value && + srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) || + srf_updates[i].in_transfer_func || + srf_updates[i].func_shaper || + srf_updates[i].lut3d_func || + srf_updates[i].blend_tf)) + return true; + } + + if (stream_update && + (((stream_update->src.height != 0 && stream_update->src.width != 0) || + (stream_update->dst.height != 0 && stream_update->dst.width != 0) || + stream_update->integer_scaling_update) || + stream_update->hdr_static_metadata || + stream_update->abm_level || + stream_update->periodic_interrupt || + stream_update->vrr_infopacket || + stream_update->vsc_infopacket || + stream_update->vsp_infopacket || + stream_update->hfvsif_infopacket || + stream_update->vtem_infopacket || + stream_update->adaptive_sync_infopacket || + stream_update->dpms_off || + stream_update->allow_freesync || + stream_update->vrr_active_variable || + stream_update->vrr_active_fixed || + stream_update->gamut_remap || + stream_update->output_color_space || + stream_update->dither_option || + stream_update->wb_update || + stream_update->dsc_config || + stream_update->mst_bw_update || + stream_update->func_shaper || + stream_update->lut3d_func || + stream_update->pending_test_pattern || + stream_update->crtc_timing_adjust)) + return true; + + if (stream) { + stream_status = dc_stream_get_status(stream); + if (stream_status == NULL || stream_status->plane_count != surface_count) + return true; + } + + return false; +} + +static bool fast_update_only(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update, + struct dc_stream_state *stream) +{ + return fast_updates_exist(fast_update, surface_count) + && !full_update_required(srf_updates, surface_count, stream_update, stream); +} + bool dc_update_planes_and_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, @@ -4242,6 +4363,7 @@ bool dc_update_planes_and_stream(struct dc *dc, enum surface_update_type update_type; int i; struct mall_temp_config mall_temp_config; + struct dc_fast_update fast_update[MAX_SURFACES] = {0}; /* In cases where MPO and split or ODM are used transitions can * cause underflow. Apply stream configuration with minimal pipe @@ -4250,6 +4372,7 @@ bool dc_update_planes_and_stream(struct dc *dc, bool force_minimal_pipe_splitting; bool is_plane_addition; + populate_fast_updates(fast_update, srf_updates, surface_count, stream_update); force_minimal_pipe_splitting = could_mpcc_tree_change_for_active_pipes( dc, stream, @@ -4300,7 +4423,8 @@ bool dc_update_planes_and_stream(struct dc *dc, } update_seamless_boot_flags(dc, context, surface_count, stream); - if (!dc->debug.enable_legacy_fast_update && update_type == UPDATE_TYPE_FAST) { + if (fast_update_only(fast_update, srf_updates, surface_count, stream_update, stream) && + !dc->debug.enable_legacy_fast_update) { commit_planes_for_stream_fast(dc, srf_updates, surface_count, @@ -4357,7 +4481,9 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_state *context; struct dc_context *dc_ctx = dc->ctx; int i, j; + struct dc_fast_update fast_update[MAX_SURFACES] = {0}; + populate_fast_updates(fast_update, srf_updates, surface_count, stream_update); stream_status = dc_stream_get_status(stream); context = dc->current_state; @@ -4443,7 +4569,8 @@ void dc_commit_updates_for_stream(struct dc *dc, TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES); update_seamless_boot_flags(dc, context, surface_count, stream); - if (!dc->debug.enable_legacy_fast_update && update_type == UPDATE_TYPE_FAST) { + if (fast_update_only(fast_update, srf_updates, surface_count, stream_update, stream) && + !dc->debug.enable_legacy_fast_update) { commit_planes_for_stream_fast(dc, srf_updates, surface_count, @@ -4753,15 +4880,17 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl */ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) { - uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; - unsigned int softMax, maxDPM, funcMin; + unsigned int softMax = 0, maxDPM = 0, funcMin = 0, i; bool p_state_change_support; - if (!ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev)) + if (!dc->config.dc_mode_clk_limit_support) return; softMax = dc->clk_mgr->bw_params->dc_mode_softmax_memclk; - maxDPM = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz; + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz > maxDPM) + maxDPM = dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz; + } funcMin = (dc->clk_mgr->clks.dramclk_khz + 999) / 1000; p_state_change_support = dc->clk_mgr->clks.p_state_change_support; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index d7d00fefaab9..cb2bf9a466f5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -610,7 +610,7 @@ void hwss_build_fast_sequence(struct dc *dc, current_mpc_pipe = current_pipe; while (current_mpc_pipe) { - if (!current_mpc_pipe->bottom_pipe && !pipe_ctx->next_odm_pipe && + if (!current_mpc_pipe->bottom_pipe && !current_mpc_pipe->next_odm_pipe && current_mpc_pipe->stream && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.bits.addr_update && !current_mpc_pipe->plane_state->skip_manual_trigger) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 26d05e225088..63948170fd6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.239" +#define DC_VER "3.2.241" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -416,7 +416,7 @@ struct dc_config { uint8_t force_bios_fixed_vs; int sdpif_request_limit_words_per_umc; bool use_old_fixed_vs_sequence; - bool disable_subvp_drr; + bool dc_mode_clk_limit_support; }; enum visual_confirm { @@ -850,6 +850,7 @@ struct dc_debug_options { /* Enable dmub aux for legacy ddc */ bool enable_dmub_aux_for_legacy_ddc; bool disable_fams; + bool disable_fams_gaming; /* FEC/PSR1 sequence enable delay in 100us */ uint8_t fec_enable_delay_in100us; bool enable_driver_sequence_debug; @@ -1264,6 +1265,16 @@ struct dc_scaling_info { struct scaling_taps scaling_quality; }; +struct dc_fast_update { + const struct dc_flip_addrs *flip_addr; + const struct dc_gamma *gamma; + const struct colorspace_transform *gamut_remap_matrix; + const struct dc_csc_transform *input_csc_color_matrix; + const struct fixed31_32 *coeff_reduction_factor; + struct dc_transfer_func *out_transfer_func; + struct dc_csc_transform *output_csc_transform; +}; + struct dc_surface_update { struct dc_plane_state *surface; @@ -1525,6 +1536,7 @@ struct dc_link { bool dpia_forced_tbt3_mode; bool dongle_mode_timing_override; bool blank_stream_on_ocs_change; + bool read_dpcd204h_on_irq_hpd; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c52c40b16387..c753c6f30dd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1011,3 +1011,10 @@ void dc_send_update_cursor_info_to_dmu( dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); } } + +bool dc_dmub_check_min_version(struct dmub_srv *srv) +{ + if (!srv->hw_funcs.is_psrsu_supported) + return true; + return srv->hw_funcs.is_psrsu_supported(srv); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index a5196a9292b3..099f94b6107c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -86,4 +86,5 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, b void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv); void dc_send_update_cursor_info_to_dmu(struct pipe_ctx *pCtx, uint8_t pipe_idx); +bool dc_dmub_check_min_version(struct dmub_srv *srv); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h index e6c06325742a..168cb7094c95 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h @@ -266,7 +266,24 @@ type MASTER_COMM_INTERRUPT; \ type MASTER_COMM_CMD_REG_BYTE0; \ type MASTER_COMM_CMD_REG_BYTE1; \ - type MASTER_COMM_CMD_REG_BYTE2 + type MASTER_COMM_CMD_REG_BYTE2; \ + type ABM1_HG_BIN_33_40_SHIFT_INDEX; \ + type ABM1_HG_BIN_33_64_SHIFT_FLAG; \ + type ABM1_HG_BIN_41_48_SHIFT_INDEX; \ + type ABM1_HG_BIN_49_56_SHIFT_INDEX; \ + type ABM1_HG_BIN_57_64_SHIFT_INDEX; \ + type ABM1_HG_RESULT_DATA; \ + type ABM1_HG_RESULT_INDEX; \ + type ABM1_ACE_SLOPE_DATA; \ + type ABM1_ACE_OFFSET_DATA; \ + type ABM1_ACE_OFFSET_SLOPE_INDEX; \ + type ABM1_ACE_THRES_INDEX; \ + type ABM1_ACE_IGNORE_MASTER_LOCK_EN; \ + type ABM1_ACE_READBACK_DB_REG_VALUE_EN; \ + type ABM1_ACE_DBUF_REG_UPDATE_PENDING; \ + type ABM1_ACE_LOCK; \ + type ABM1_ACE_THRES_DATA_1; \ + type ABM1_ACE_THRES_DATA_2 struct dce_abm_shift { ABM_REG_FIELD_LIST(uint8_t); @@ -288,6 +305,16 @@ struct dce_abm_registers { uint32_t DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES; uint32_t DC_ABM1_HGLS_REG_READ_PROGRESS; uint32_t DC_ABM1_ACE_OFFSET_SLOPE_0; + uint32_t DC_ABM1_ACE_OFFSET_SLOPE_DATA; + uint32_t DC_ABM1_ACE_PWL_CNTL; + uint32_t DC_ABM1_HG_BIN_33_40_SHIFT_INDEX; + uint32_t DC_ABM1_HG_BIN_33_64_SHIFT_FLAG; + uint32_t DC_ABM1_HG_BIN_41_48_SHIFT_INDEX; + uint32_t DC_ABM1_HG_BIN_49_56_SHIFT_INDEX; + uint32_t DC_ABM1_HG_BIN_57_64_SHIFT_INDEX; + uint32_t DC_ABM1_HG_RESULT_DATA; + uint32_t DC_ABM1_HG_RESULT_INDEX; + uint32_t DC_ABM1_ACE_THRES_DATA; uint32_t DC_ABM1_ACE_THRES_12; uint32_t MASTER_COMM_CNTL_REG; uint32_t MASTER_COMM_CMD_REG; diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 808855886183..e115ff91aaaa 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -974,10 +974,12 @@ enum dc_status resource_map_phy_clock_resources( || dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->clock_source = dc->res_pool->dp_clock_source; - else - pipe_ctx->clock_source = find_matching_pll( - &context->res_ctx, dc->res_pool, - stream); + else { + if (stream && stream->link && stream->link->link_enc) + pipe_ctx->clock_source = find_matching_pll( + &context->res_ctx, dc->res_pool, + stream); + } if (pipe_ctx->clock_source == NULL) return DC_NO_CLOCK_SOURCE_RESOURCE; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 7a00fe525dfb..3538973bd0c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -308,7 +308,10 @@ bool cm_helper_convert_to_custom_float( #define NUMBER_REGIONS 32 #define NUMBER_SW_SEGMENTS 16 -bool cm_helper_translate_curve_to_hw_format( +#define DC_LOGGER \ + ctx->logger + +bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx, const struct dc_transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint) { @@ -482,10 +485,18 @@ bool cm_helper_translate_curve_to_hw_format( rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); + if (fixpoint == true) { - rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red); - rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green); - rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue); + uint32_t red_clamp = dc_fixpt_clamp_u0d14(rgb->delta_red); + uint32_t green_clamp = dc_fixpt_clamp_u0d14(rgb->delta_green); + uint32_t blue_clamp = dc_fixpt_clamp_u0d14(rgb->delta_blue); + + if (red_clamp >> 10 || green_clamp >> 10 || blue_clamp >> 10) + DC_LOG_WARNING("Losing delta precision while programming shaper LUT."); + + rgb->delta_red_reg = red_clamp & 0x3ff; + rgb->delta_green_reg = green_clamp & 0x3ff; + rgb->delta_blue_reg = blue_clamp & 0x3ff; rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red); rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green); rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h index 3b8cd7410498..0a68b63d6126 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h @@ -106,6 +106,7 @@ bool cm_helper_convert_to_custom_float( bool fixpoint); bool cm_helper_translate_curve_to_hw_format( + struct dc_context *ctx, const struct dc_transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 20a1582be0b1..a50309039d08 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1843,7 +1843,7 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, /* dcn10_translate_regamma_to_hw_format takes 750us, only do it when full * update. */ - else if (cm_helper_translate_curve_to_hw_format( + else if (cm_helper_translate_curve_to_hw_format(dc->ctx, stream->out_transfer_func, &dpp->regamma_params, false)) { dpp->funcs->dpp_program_regamma_pwl( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index eaf9e9ccad2a..4492bc2392b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -867,7 +867,7 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, params = &stream->out_transfer_func->pwl; else if (pipe_ctx->stream->out_transfer_func->type == TF_TYPE_DISTRIBUTED_POINTS && - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(dc->ctx, stream->out_transfer_func, &mpc->blender_params, false)) params = &mpc->blender_params; @@ -896,7 +896,7 @@ bool dcn20_set_blend_lut( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) blend_lut = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(plane_state->ctx, plane_state->blend_tf, &dpp_base->regamma_params, false); blend_lut = &dpp_base->regamma_params; @@ -918,7 +918,7 @@ bool dcn20_set_shaper_3dlut( if (plane_state->in_shaper_func->type == TF_TYPE_HWPWL) shaper_lut = &plane_state->in_shaper_func->pwl; else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(plane_state->ctx, plane_state->in_shaper_func, &dpp_base->shaper_params, true); shaper_lut = &dpp_base->shaper_params; @@ -1764,8 +1764,9 @@ static void dcn20_program_pipe( hws->funcs.set_hdr_multiplier(pipe_ctx); if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change) + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c index 6a3d3a0ec0a3..701c7d8bc038 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c @@ -280,7 +280,7 @@ bool dwb3_ogam_set_input_transfer_func( dwb_ogam_lut = kzalloc(sizeof(*dwb_ogam_lut), GFP_KERNEL); if (dwb_ogam_lut) { - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(dwbc->ctx, in_transfer_func_dwb_ogam, dwb_ogam_lut, false); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index b9753867d97b..bf8864bc8a99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -106,7 +106,7 @@ static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx, if (stream->func_shaper->type == TF_TYPE_HWPWL) { shaper_lut = &stream->func_shaper->pwl; } else if (stream->func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(stream->func_shaper, + cm_helper_translate_curve_to_hw_format(stream->ctx, stream->func_shaper, &dpp_base->shaper_params, true); shaper_lut = &dpp_base->shaper_params; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 1a0284a068b2..abe4c12a10b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -725,7 +725,8 @@ static const struct dc_debug_options debug_defaults_drv = { .dwb_fi_phase = -1, // -1 = disable, .dmub_command_table = true, .use_max_lb = true, - .exit_idle_opt_for_cursor_updates = true + .exit_idle_opt_for_cursor_updates = true, + .enable_legacy_fast_update = false, }; static const struct dc_panel_config panel_config_defaults = { @@ -1986,11 +1987,10 @@ bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(context)) return false; - // check if freesync enabled if (!context->streams[0]->allow_freesync) return false; - if (context->streams[0]->vrr_active_variable) + if (context->streams[0]->vrr_active_variable && dc->debug.disable_fams_gaming) return false; context->streams[0]->fpo_in_use = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 7dc065ea247a..5ad6a22ee47d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -95,7 +95,8 @@ static const struct dc_debug_options debug_defaults_drv = { .dwb_fi_phase = -1, // -1 = disable, .dmub_command_table = true, .use_max_lb = true, - .exit_idle_opt_for_cursor_updates = true + .exit_idle_opt_for_cursor_updates = true, + .enable_legacy_fast_update = false, }; static const struct dc_panel_config panel_config_defaults = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 6d9761395288..45956ef6f3f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1190,6 +1190,7 @@ static bool dcn303_resource_construct( dc->caps.dp_hdmi21_pcon_support = true; + dc->config.dc_mode_clk_limit_support = true; /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index cf23d7bc560a..0746ed31d1d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -332,7 +332,7 @@ static void dccg314_dpp_root_clock_control( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (dccg->dpp_clock_gated[dpp_inst] == clock_on) + if (dccg->dpp_clock_gated[dpp_inst] != clock_on) return; if (clock_on) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 7a43f8868500..4d2820ffe468 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -337,13 +337,14 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } -void dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) { struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int odm_combine_factor = 0; bool two_pix_per_container = false; two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); - get_odm_config(pipe_ctx, NULL); + odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -361,11 +362,15 @@ void dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int } else { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_4; + if (odm_combine_factor == 2) + *k2_div = PIXEL_RATE_DIV_BY_2; } } if ((*k1_div == PIXEL_RATE_DIV_NA) && (*k2_div == PIXEL_RATE_DIV_NA)) ASSERT(false); + + return odm_combine_factor; } void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx) @@ -424,27 +429,6 @@ void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, hws->ctx->dc->res_pool->dccg, dpp_inst, clock_on); } -void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on) -{ - struct dc_context *ctx = hws->ctx; - union dmub_rb_cmd cmd; - - if (hws->ctx->dc->debug.disable_hubp_power_gate) - return; - - PERF_TRACE(); - - memset(&cmd, 0, sizeof(cmd)); - cmd.domain_control.header.type = DMUB_CMD__VBIOS; - cmd.domain_control.header.sub_type = DMUB_CMD__VBIOS_DOMAIN_CONTROL; - cmd.domain_control.header.payload_bytes = sizeof(cmd.domain_control.data); - cmd.domain_control.data.inst = hubp_inst; - cmd.domain_control.data.power_gate = !power_on; - - dm_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); - - PERF_TRACE(); -} static void apply_symclk_on_tx_off_wa(struct dc_link *link) { /* There are use cases where SYMCLK is referenced by OTG. For instance diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index 96035c75e0df..eafcc4ea6d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -37,14 +37,12 @@ void dcn314_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool po void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); -void dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx); void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); -void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on); - void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on); void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index 86d6a514dec0..ca8fe55c33b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -139,7 +139,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .plane_atomic_power_down = dcn10_plane_atomic_power_down, .enable_power_gating_plane = dcn314_enable_power_gating_plane, .dpp_root_clock_control = dcn314_dpp_root_clock_control, - .hubp_pg_control = dcn314_hubp_pg_control, + .hubp_pg_control = dcn31_hubp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, .update_odm = dcn314_update_odm, .dsc_pg_control = dcn314_dsc_pg_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index a840b008d660..6a9024aa3285 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1883,13 +1883,6 @@ static bool dcn314_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; - /* Disable pipe power gating when unsupported */ - if (ctx->asic_id.hw_internal_rev == 0x01 || - ctx->asic_id.hw_internal_rev == 0x80) { - dc->debug.disable_dpp_power_gate = true; - dc->debug.disable_hubp_power_gate = true; - } - /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { @@ -1910,6 +1903,14 @@ static bool dcn314_resource_construct( dc->debug = debug_defaults_drv; else dc->debug = debug_defaults_diags; + + /* Disable pipe power gating */ + dc->debug.disable_dpp_power_gate = true; + dc->debug.disable_hubp_power_gate = true; + + /* Disable root clock optimization */ + dc->debug.root_clock_optimization.u32All = 0; + // Init the vm_helper if (dc->vm_helper) vm_helper_init(dc->vm_helper, 16); diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index f1153941907e..df3a438abda8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1610,7 +1610,7 @@ static int source_format_to_bpp (enum source_format_class SourcePixelFormat) { if (SourcePixelFormat == dm_444_64) return 8; - else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16) + else if (SourcePixelFormat == dm_444_16) return 2; else if (SourcePixelFormat == dm_444_8) return 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 2d604f7ee782..ca5b4b28a664 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -179,6 +179,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_setup_interdependent = hubp2_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp32_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index c586468872e2..d52d5feeb311 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -448,7 +448,7 @@ bool dcn32_set_mpc_shaper_3dlut( if (stream->func_shaper->type == TF_TYPE_HWPWL) shaper_lut = &stream->func_shaper->pwl; else if (stream->func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(stream->ctx, stream->func_shaper, &dpp_base->shaper_params, true); shaper_lut = &dpp_base->shaper_params; @@ -484,7 +484,7 @@ bool dcn32_set_mcm_luts( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(plane_state->ctx, plane_state->blend_tf, &dpp_base->regamma_params, false); lut_params = &dpp_base->regamma_params; @@ -499,7 +499,7 @@ bool dcn32_set_mcm_luts( else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format( + cm_helper_translate_curve_to_hw_format(plane_state->ctx, plane_state->in_shaper_func, &dpp_base->shaper_params, true); lut_params = &dpp_base->shaper_params; @@ -1141,16 +1141,14 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * } } -void dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) +unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) { struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int odm_combine_factor = 0; bool two_pix_per_container = false; - // For phantom pipes, use the same programming as the main pipes - if (pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { - stream = pipe_ctx->stream->mall_stream_config.paired_stream; - } two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); + odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -1168,13 +1166,15 @@ void dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int * } else { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_4; - if (dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) + if ((odm_combine_factor == 2) || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) *k2_div = PIXEL_RATE_DIV_BY_2; } } if ((*k1_div == PIXEL_RATE_DIV_NA) && (*k2_div == PIXEL_RATE_DIV_NA)) ASSERT(false); + + return odm_combine_factor; } void dcn32_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h index bf9bffabe0c0..2d2628f31bed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h @@ -71,7 +71,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context); void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx); -void dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); +unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); void dcn32_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c index c2490e16a66a..777b2fac20c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c @@ -56,6 +56,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .enable_audio_stream = dce110_enable_audio_stream, .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn10_lock_all_pipes, .cursor_lock = dcn10_cursor_lock, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 19f134caa8ad..1cc09799f92d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -732,6 +732,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dp_plus_plus_wa = true, .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, + .enable_legacy_fast_update = false, }; static struct dce_aux *dcn32_aux_engine_create( @@ -2214,6 +2215,7 @@ static bool dcn32_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index a9c41ef0751f..5be242a1b82c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -595,11 +595,10 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us)) return NULL; - // check if freesync enabled if (!fpo_candidate_stream->allow_freesync) return NULL; - if (fpo_candidate_stream->vrr_active_variable) + if (fpo_candidate_stream->vrr_active_variable && dc->debug.disable_fams_gaming) return NULL; return fpo_candidate_stream; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ea204742ad35..a53478e15ce3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -730,6 +730,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_subvp_high_refresh = false, .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, + .enable_legacy_fast_update = false, + .disable_dc_mode_overwrite = true, }; static struct dce_aux *dcn321_aux_engine_create( @@ -1754,6 +1756,7 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->config.dc_mode_clk_limit_support = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 6266b0788387..7bf4bb7ad044 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -4356,12 +4356,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); - - locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( - locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], - locals->EffectiveLBLatencyHidingSourceLinesChroma), - locals->SwathHeightCPerState[i][j][k]); + if (locals->LinesInDETChroma) { + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + + dml_min(locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * + locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); + } else { + locals->EffectiveDETLBLinesChroma = 0; + } if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index c9afddd11589..d9e049e7ff0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -33,7 +33,7 @@ #include "dml/display_mode_vba.h" struct _vcs_dpi_ip_params_st dcn3_14_ip = { - .VBlankNomDefaultUS = 800, + .VBlankNomDefaultUS = 668, .gpuvm_enable = 1, .gpuvm_max_page_table_levels = 1, .hostvm_enable = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index e2bb2b9971f3..a95034801712 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -485,24 +485,20 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) } } -void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, +static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) { int i = 0; int index = 0; - float net_bw_of_new_state = 0; dc_assert_fp_enabled(); - get_optimal_ntuple(entry); - if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) { index++; if (index >= *num_entries) break; @@ -2349,6 +2345,63 @@ void dcn32_patch_dpm_table(struct clk_bw_params *bw_params) bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16; } +static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry, + struct _vcs_dpi_voltage_scaling_st *second_entry) +{ + struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry; + *first_entry = *second_entry; + *second_entry = temp_entry; +} + +/* + * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK + */ +static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + unsigned int start_index = 0; + unsigned int end_index = 0; + unsigned int current_bw = 0; + + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + current_bw = table[i].net_bw_in_kbytes_sec; + start_index = i; + end_index = ++i; + + while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw)) + end_index = ++i; + } + + if (start_index != end_index) { + for (int j = start_index; j < end_index; j++) { + for (int k = start_index; k < end_index; k++) { + if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz) + swap_table_entries(&table[k], &table[k+1]); + } + } + } + + start_index = 0; + end_index = 0; + + } +} + +/* + * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing + * and remove entries that do not + */ +static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) || + (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz)) + remove_entry_from_table_at_index(table, num_entries, i); + } + } +} + /* * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings * Input: @@ -2480,6 +2533,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2488,6 +2543,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); // Insert the UCLK DPMS @@ -2496,6 +2553,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2506,6 +2565,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } } @@ -2515,6 +2576,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = max_clk_data.fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2530,6 +2593,21 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk remove_entry_from_table_at_index(table, num_entries, i); } + // Insert entry with all max dc limits without bandwidth matching + if (!disable_dc_mode_overwrite) { + struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry; + + max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; + max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz; + max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16; + + max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry); + insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry); + + sort_entries_with_same_bw(table, num_entries); + remove_inconsistent_entries(table, num_entries); + } + // At this point, the table only contains supported points of interest // it could be used as is, but some states may be redundant due to // coarse grained nature of some clocks, so we want to round up to diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index a4206b71d650..defbee866be6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -39,10 +39,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, uint8_t dcn32_predict_pipe_split(struct dc_state *context, display_e2e_pipe_params_st *pipe_e2e); -void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, - unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry); - void dcn32_set_phantom_stream_timing(struct dc *dc, struct dc_state *context, struct pipe_ctx *ref_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index f0683fd9d3f0..b26fcf86014c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -207,24 +207,20 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st * return limiting_bw_kbytes_sec; } -void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, +static void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) { int i = 0; int index = 0; - float net_bw_of_new_state = 0; dc_assert_fp_enabled(); - get_optimal_ntuple(entry); - if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) { index++; if (index >= *num_entries) break; @@ -252,6 +248,63 @@ static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); } +static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry, + struct _vcs_dpi_voltage_scaling_st *second_entry) +{ + struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry; + *first_entry = *second_entry; + *second_entry = temp_entry; +} + +/* + * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK + */ +static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + unsigned int start_index = 0; + unsigned int end_index = 0; + unsigned int current_bw = 0; + + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + current_bw = table[i].net_bw_in_kbytes_sec; + start_index = i; + end_index = ++i; + + while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw)) + end_index = ++i; + } + + if (start_index != end_index) { + for (int j = start_index; j < end_index; j++) { + for (int k = start_index; k < end_index; k++) { + if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz) + swap_table_entries(&table[k], &table[k+1]); + } + } + } + + start_index = 0; + end_index = 0; + + } +} + +/* + * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing + * and remove entries that do not follow this order + */ +static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) || + (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz)) + remove_entry_from_table_at_index(table, num_entries, i); + } + } +} + /* * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings * Input: @@ -362,11 +415,11 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk if (max_clk_data.fclk_mhz == 0) max_clk_data.fclk_mhz = max_clk_data.dcfclk_mhz * - dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / - dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; + dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / + dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; if (max_clk_data.phyclk_mhz == 0) - max_clk_data.phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + max_clk_data.phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; *num_entries = 0; entry.dispclk_mhz = max_clk_data.dispclk_mhz; @@ -374,8 +427,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.dppclk_mhz = max_clk_data.dppclk_mhz; entry.dtbclk_mhz = max_clk_data.dtbclk_mhz; entry.phyclk_mhz = max_clk_data.phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; // Insert all the DCFCLK STAs for (i = 0; i < num_dcfclk_stas; i++) { @@ -383,6 +436,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -391,6 +446,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); // Insert the UCLK DPMS @@ -399,6 +456,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -409,6 +468,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } } @@ -418,6 +479,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = max_clk_data.fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -433,6 +496,23 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk remove_entry_from_table_at_index(table, num_entries, i); } + // Insert entry with all max dc limits without bandwitch matching + if (!disable_dc_mode_overwrite) { + struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry; + + max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; + max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz; + max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16; + + max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry); + dcn321_insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry); + + sort_entries_with_same_bw(table, num_entries); + remove_inconsistent_entries(table, num_entries); + } + + + // At this point, the table only contains supported points of interest // it could be used as is, but some states may be redundant due to // coarse grained nature of some clocks, so we want to round up to diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h index e8fad9b4be69..c6623b3705ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -29,10 +29,6 @@ #include "dml/display_mode_vba.h" -void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, - unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry); - void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index ff0246a9458f..fb17f8868cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -167,6 +167,7 @@ struct _vcs_dpi_voltage_scaling_st { double phyclk_mhz; double dppclk_mhz; double dtbclk_mhz; + float net_bw_in_kbytes_sec; }; /** diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 6faf40fa5c69..ecb7bcc39469 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -230,6 +230,7 @@ struct clk_bw_params { unsigned int dram_channel_width_bytes; unsigned int dispclk_vco_khz; unsigned int dc_mode_softmax_memclk; + unsigned int max_memclk_mhz; struct clk_limit_table clk_table; struct wm_table wm_table; struct dummy_pstate_entry dummy_pstate_table[4]; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index a151865a3a20..4ca4192c1e12 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -156,7 +156,7 @@ struct hwseq_private_funcs { void (*program_mall_pipe_config)(struct dc *dc, struct dc_state *context); void (*update_force_pstate)(struct dc *dc, struct dc_state *context); void (*update_mall_sel)(struct dc *dc, struct dc_state *context); - void (*calculate_dccg_k1_k2_values)(struct pipe_ctx *pipe_ctx, + unsigned int (*calculate_dccg_k1_k2_values)(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index ba95facc4ee8..ef8739df91bc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -82,8 +82,15 @@ bool dp_parse_link_loss_status( } /* Check interlane align.*/ - if (sink_status_changed || - !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) { + if (link_dp_get_encoding_format(&link->cur_link_settings) == DP_128b_132b_ENCODING && + (!hpd_irq_dpcd_data->bytes.lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b || + !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.CDS_INTERLANE_ALIGN_DONE_128b_132b)) { + sink_status_changed = true; + } else if (!hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) { + sink_status_changed = true; + } + + if (sink_status_changed) { DC_LOG_HW_HPD_IRQ("%s: Link Status changed.\n", __func__); @@ -201,6 +208,25 @@ void dp_handle_link_loss(struct dc_link *link) } } +static void read_dpcd204h_on_irq_hpd(struct dc_link *link, union hpd_irq_data *irq_data) +{ + enum dc_status retval; + union lane_align_status_updated dpcd_lane_status_updated; + + retval = core_link_read_dpcd( + link, + DP_LANE_ALIGN_STATUS_UPDATED, + &dpcd_lane_status_updated.raw, + sizeof(union lane_align_status_updated)); + + if (retval == DC_OK) { + irq_data->bytes.lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b = + dpcd_lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b; + irq_data->bytes.lane_status_updated.bits.CDS_INTERLANE_ALIGN_DONE_128b_132b = + dpcd_lane_status_updated.bits.CDS_INTERLANE_ALIGN_DONE_128b_132b; + } +} + enum dc_status dp_read_hpd_rx_irq_data( struct dc_link *link, union hpd_irq_data *irq_data) @@ -242,6 +268,13 @@ enum dc_status dp_read_hpd_rx_irq_data( irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI]; irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI]; irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI]; + + /* + * This display doesn't have correct values in DPCD200Eh. + * Read and check DPCD204h instead. + */ + if (link->wa_flags.read_dpcd204h_on_irq_hpd) + read_dpcd204h_on_irq_hpd(link, irq_data); } return retval; diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 7c9a2b34bd05..4585e0419da6 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -367,6 +367,8 @@ struct dmub_srv_hw_funcs { bool (*is_supported)(struct dmub_srv *dmub); + bool (*is_psrsu_supported)(struct dmub_srv *dmub); + bool (*is_hw_init)(struct dmub_srv *dmub); void (*enable_dmub_boot_options)(struct dmub_srv *dmub, @@ -492,7 +494,7 @@ struct dmub_notification { * of a firmware to know if feature or functionality is supported or present. */ #define DMUB_FW_VERSION(major, minor, revision) \ - ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | ((revision) & 0xFFFF)) + ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | (((revision) & 0xFF) << 8)) /** * dmub_srv_create() - creates the DMUB service. diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index ebf7aeec4029..5e952541e72d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -302,6 +302,11 @@ bool dmub_dcn31_is_supported(struct dmub_srv *dmub) return supported; } +bool dmub_dcn31_is_psrsu_supported(struct dmub_srv *dmub) +{ + return dmub->fw_version >= DMUB_FW_VERSION(4, 0, 59); +} + void dmub_dcn31_set_gpint(struct dmub_srv *dmub, union dmub_gpint_data_register reg) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 7d5c10ee539b..89c5a948b67d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -221,6 +221,8 @@ bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub); bool dmub_dcn31_is_supported(struct dmub_srv *dmub); +bool dmub_dcn31_is_psrsu_supported(struct dmub_srv *dmub); + void dmub_dcn31_set_gpint(struct dmub_srv *dmub, union dmub_gpint_data_register reg); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c index 48a06dbd9be7..f161aeb7e7c4 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c @@ -60,3 +60,8 @@ const struct dmub_srv_dcn31_regs dmub_srv_dcn314_regs = { { DMUB_DCN31_FIELDS() }, #undef DMUB_SF }; + +bool dmub_dcn314_is_psrsu_supported(struct dmub_srv *dmub) +{ + return dmub->fw_version >= DMUB_FW_VERSION(8, 0, 16); +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h index 674267a2940e..f213bd82c911 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h @@ -30,4 +30,6 @@ extern const struct dmub_srv_dcn31_regs dmub_srv_dcn314_regs; +bool dmub_dcn314_is_psrsu_supported(struct dmub_srv *dmub); + #endif /* _DMUB_DCN314_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 9e9a6a44a7ac..bdaf43892f47 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -226,14 +226,17 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) case DMUB_ASIC_DCN314: case DMUB_ASIC_DCN315: case DMUB_ASIC_DCN316: - if (asic == DMUB_ASIC_DCN314) + if (asic == DMUB_ASIC_DCN314) { dmub->regs_dcn31 = &dmub_srv_dcn314_regs; - else if (asic == DMUB_ASIC_DCN315) + funcs->is_psrsu_supported = dmub_dcn314_is_psrsu_supported; + } else if (asic == DMUB_ASIC_DCN315) { dmub->regs_dcn31 = &dmub_srv_dcn315_regs; - else if (asic == DMUB_ASIC_DCN316) + } else if (asic == DMUB_ASIC_DCN316) { dmub->regs_dcn31 = &dmub_srv_dcn316_regs; - else + } else { dmub->regs_dcn31 = &dmub_srv_dcn31_regs; + funcs->is_psrsu_supported = dmub_dcn31_is_psrsu_supported; + } funcs->reset = dmub_dcn31_reset; funcs->reset_release = dmub_dcn31_reset_release; funcs->backdoor_load = dmub_dcn31_backdoor_load; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index a57952b93e73..9ef88a0b1b57 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -35,44 +35,6 @@ #include <linux/pm_runtime.h> #include <asm/processor.h> -static const struct cg_flag_name clocks[] = { - {AMD_CG_SUPPORT_GFX_FGCG, "Graphics Fine Grain Clock Gating"}, - {AMD_CG_SUPPORT_GFX_MGCG, "Graphics Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_GFX_MGLS, "Graphics Medium Grain memory Light Sleep"}, - {AMD_CG_SUPPORT_GFX_CGCG, "Graphics Coarse Grain Clock Gating"}, - {AMD_CG_SUPPORT_GFX_CGLS, "Graphics Coarse Grain memory Light Sleep"}, - {AMD_CG_SUPPORT_GFX_CGTS, "Graphics Coarse Grain Tree Shader Clock Gating"}, - {AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light Sleep"}, - {AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"}, - {AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"}, - {AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"}, - {AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light Sleep"}, - {AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"}, - {AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"}, - {AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"}, - {AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"}, - {AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Management Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"}, - {AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_VCN_MGCG, "VCN Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_HDP_DS, "Host Data Path Deep Sleep"}, - {AMD_CG_SUPPORT_HDP_SD, "Host Data Path Shutdown"}, - {AMD_CG_SUPPORT_IH_CG, "Interrupt Handler Clock Gating"}, - {AMD_CG_SUPPORT_JPEG_MGCG, "JPEG Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_REPEATER_FGCG, "Repeater Fine Grain Clock Gating"}, - {AMD_CG_SUPPORT_GFX_PERF_CLK, "Perfmon Clock Gating"}, - {AMD_CG_SUPPORT_ATHUB_MGCG, "Address Translation Hub Medium Grain Clock Gating"}, - {AMD_CG_SUPPORT_ATHUB_LS, "Address Translation Hub Light Sleep"}, - {0, NULL}, -}; - static const struct hwmon_temp_label { enum PP_HWMON_TEMP channel; const char *label; @@ -2110,6 +2072,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -2120,7 +2083,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ *states = ATTR_STATE_UNSUPPORTED; } } else if (DEVICE_ATTR_IS(pp_features)) { - if (adev->flags & AMD_IS_APU || gc_ver < IP_VERSION(9, 0, 0)) + if ((adev->flags & AMD_IS_APU && + gc_ver != IP_VERSION(9, 4, 3)) || + gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(gpu_metrics)) { if (gc_ver < IP_VERSION(9, 1, 0)) @@ -3684,6 +3649,44 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a return 0; } +static const struct cg_flag_name clocks[] = { + {AMD_CG_SUPPORT_GFX_FGCG, "Graphics Fine Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_MGCG, "Graphics Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_MGLS, "Graphics Medium Grain memory Light Sleep"}, + {AMD_CG_SUPPORT_GFX_CGCG, "Graphics Coarse Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_CGLS, "Graphics Coarse Grain memory Light Sleep"}, + {AMD_CG_SUPPORT_GFX_CGTS, "Graphics Coarse Grain Tree Shader Clock Gating"}, + {AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light Sleep"}, + {AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"}, + {AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"}, + {AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light Sleep"}, + {AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"}, + {AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"}, + {AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"}, + {AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"}, + {AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Management Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"}, + {AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_VCN_MGCG, "VCN Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_HDP_DS, "Host Data Path Deep Sleep"}, + {AMD_CG_SUPPORT_HDP_SD, "Host Data Path Shutdown"}, + {AMD_CG_SUPPORT_IH_CG, "Interrupt Handler Clock Gating"}, + {AMD_CG_SUPPORT_JPEG_MGCG, "JPEG Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_REPEATER_FGCG, "Repeater Fine Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_PERF_CLK, "Perfmon Clock Gating"}, + {AMD_CG_SUPPORT_ATHUB_MGCG, "Address Translation Hub Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_ATHUB_LS, "Address Translation Hub Light Sleep"}, + {0, NULL}, +}; + static void amdgpu_parse_cg_state(struct seq_file *m, u64 flags) { int i; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index d178f3f44081..42172b00be66 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -89,6 +89,8 @@ struct amdgpu_dpm_thermal { int max_mem_crit_temp; /* memory max emergency(shutdown) temp */ int max_mem_emergency_temp; + /* SWCTF threshold */ + int sw_ctf_threshold; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 11b7b4cffaae..ff360c699171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -26,6 +26,7 @@ #include <linux/gfp.h> #include <linux/slab.h> #include <linux/firmware.h> +#include <linux/reboot.h> #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" @@ -91,6 +92,45 @@ static int pp_early_init(void *handle) return 0; } +static void pp_swctf_delayed_work_handler(struct work_struct *work) +{ + struct pp_hwmgr *hwmgr = + container_of(work, struct pp_hwmgr, swctf_delayed_work.work); + struct amdgpu_device *adev = hwmgr->adev; + struct amdgpu_dpm_thermal *range = + &adev->pm.dpm.thermal; + uint32_t gpu_temperature, size; + int ret; + + /* + * If the hotspot/edge temperature is confirmed as below SW CTF setting point + * after the delay enforced, nothing will be done. + * Otherwise, a graceful shutdown will be performed to prevent further damage. + */ + if (range->sw_ctf_threshold && + hwmgr->hwmgr_func->read_sensor) { + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + &gpu_temperature, + &size); + /* + * For some legacy ASICs, hotspot temperature retrieving might be not + * supported. Check the edge temperature instead then. + */ + if (ret == -EOPNOTSUPP) + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_EDGE_TEMP, + &gpu_temperature, + &size); + if (!ret && gpu_temperature / 1000 < range->sw_ctf_threshold) + return; + } + + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); + orderly_poweroff(true); +} + static int pp_sw_init(void *handle) { struct amdgpu_device *adev = handle; @@ -101,6 +141,10 @@ static int pp_sw_init(void *handle) pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); + if (!ret) + INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work, + pp_swctf_delayed_work_handler); + return ret; } @@ -135,6 +179,8 @@ static int pp_hw_fini(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + hwmgr_hw_fini(hwmgr); return 0; @@ -221,6 +267,8 @@ static int pp_suspend(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + return hwmgr_suspend(hwmgr); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c index 981dc8c7112d..90452b66e107 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c @@ -241,7 +241,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) TEMP_RANGE_MAX, TEMP_RANGE_MIN, TEMP_RANGE_MAX, - TEMP_RANGE_MAX}; + TEMP_RANGE_MAX, + 0}; struct amdgpu_device *adev = hwmgr->adev; if (!hwmgr->not_vf) @@ -265,6 +266,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + adev->pm.dpm.thermal.sw_ctf_threshold = range.sw_ctf_threshold; return ret; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index e10cc5e7928e..6841a4bce186 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5432,6 +5432,8 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = data->thermal_temp_setting.temperature_shutdown * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->sw_ctf_threshold = thermal_data->max; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c index bfe80ac0ad8c..d0b1ab6c4523 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c @@ -603,21 +603,17 @@ int phm_irq_process(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; uint32_t client_id = entry->client_id; uint32_t src_id = entry->src_id; if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) { if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) { - dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); - /* - * SW CTF just occurred. - * Try to do a graceful shutdown to prevent further damage. - */ - dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); - orderly_poweroff(true); - } else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) + schedule_delayed_work(&hwmgr->swctf_delayed_work, + msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY)); + } else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) { dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n"); - else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) { + } else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) { dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n"); /* * HW CTF just occurred. Shutdown to prevent further damage. @@ -626,15 +622,10 @@ int phm_irq_process(struct amdgpu_device *adev, orderly_poweroff(true); } } else if (client_id == SOC15_IH_CLIENTID_THM) { - if (src_id == 0) { - dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); - /* - * SW CTF just occurred. - * Try to do a graceful shutdown to prevent further damage. - */ - dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); - orderly_poweroff(true); - } else + if (src_id == 0) + schedule_delayed_work(&hwmgr->swctf_delayed_work, + msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY)); + else dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n"); } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) { dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n"); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 99cd2e63afdd..c51dd4c74fe9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5241,6 +5241,9 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, { struct vega10_hwmgr *data = hwmgr->backend; PPTable_t *pp_table = &(data->smc_state_table.pp_table); + struct phm_ppt_v2_information *pp_table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); + struct phm_tdp_table *tdp_table = pp_table_info->tdp_table; memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); @@ -5257,6 +5260,13 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + if (tdp_table->usSoftwareShutdownTemp > pp_table->ThotspotLimit && + tdp_table->usSoftwareShutdownTemp < VEGA10_THERMAL_MAXIMUM_ALERT_TEMP) + thermal_data->sw_ctf_threshold = tdp_table->usSoftwareShutdownTemp; + else + thermal_data->sw_ctf_threshold = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP; + thermal_data->sw_ctf_threshold *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index e9db137cd1c6..1937be1cf5b4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -2763,6 +2763,8 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { + struct phm_ppt_v3_information *pptable_information = + (struct phm_ppt_v3_information *)hwmgr->pptable; struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); PPTable_t *pp_table = &(data->smc_state_table.pp_table); @@ -2781,6 +2783,8 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->sw_ctf_threshold = pptable_information->us_software_shutdown_temp * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c index ed3dff0b52d2..ae342c58cd3e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_thermal.c @@ -192,7 +192,9 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, high); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, low); - val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + val &= ~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK; + val &= ~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + val &= ~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 0d4d4811527c..4e19ccbdb807 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -4206,6 +4206,8 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { + struct phm_ppt_v3_information *pptable_information = + (struct phm_ppt_v3_information *)hwmgr->pptable; struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); PPTable_t *pp_table = &(data->smc_state_table.pp_table); @@ -4224,6 +4226,8 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->sw_ctf_threshold = pptable_information->us_software_shutdown_temp * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c index f4f4efdbda79..e9737ca8418a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c @@ -263,7 +263,9 @@ static int vega20_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, high); val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, low); - val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + val &= ~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK; + val &= ~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + val &= ~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index f1580a26a850..612d66aeaab9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -811,6 +811,8 @@ struct pp_hwmgr { bool gfxoff_state_changed_by_workload; uint32_t pstate_sclk_peak; uint32_t pstate_mclk_peak; + + struct delayed_work swctf_delayed_work; }; int hwmgr_early_init(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h index a5f2227a3971..0ffc2347829d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h @@ -131,6 +131,7 @@ struct PP_TemperatureRange { int mem_min; int mem_crit_max; int mem_emergency_max; + int sw_ctf_threshold; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 4dea79a0c5b5..ce41a8309582 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/pci.h> +#include <linux/reboot.h> #include "amdgpu.h" #include "amdgpu_smu.h" @@ -1078,6 +1079,34 @@ static void smu_interrupt_work_fn(struct work_struct *work) smu->ppt_funcs->interrupt_work(smu); } +static void smu_swctf_delayed_work_handler(struct work_struct *work) +{ + struct smu_context *smu = + container_of(work, struct smu_context, swctf_delayed_work.work); + struct smu_temperature_range *range = + &smu->thermal_range; + struct amdgpu_device *adev = smu->adev; + uint32_t hotspot_tmp, size; + + /* + * If the hotspot temperature is confirmed as below SW CTF setting point + * after the delay enforced, nothing will be done. + * Otherwise, a graceful shutdown will be performed to prevent further damage. + */ + if (range->software_shutdown_temp && + smu->ppt_funcs->read_sensor && + !smu->ppt_funcs->read_sensor(smu, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + &hotspot_tmp, + &size) && + hotspot_tmp / 1000 < range->software_shutdown_temp) + return; + + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); + orderly_poweroff(true); +} + static int smu_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1120,6 +1149,9 @@ static int smu_sw_init(void *handle) smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; + INIT_DELAYED_WORK(&smu->swctf_delayed_work, + smu_swctf_delayed_work_handler); + ret = smu_smc_table_sw_init(smu); if (ret) { dev_err(adev->dev, "Failed to sw init smc table!\n"); @@ -1600,6 +1632,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return ret; } + cancel_delayed_work_sync(&smu->swctf_delayed_work); + ret = smu_disable_dpms(smu); if (ret) { dev_err(adev->dev, "Fail to disable dpm features!\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 09469c750a96..6e2069dcb6b9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -573,6 +573,8 @@ struct smu_context u32 debug_param_reg; u32 debug_msg_reg; u32 debug_resp_reg; + + struct delayed_work swctf_delayed_work; }; struct i2c_adapter; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 275f708db636..c94d825a871b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1654,7 +1654,7 @@ static int navi10_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t mask) { - int ret = 0, size = 0; + int ret = 0; uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; soft_min_level = mask ? (ffs(mask) - 1) : 0; @@ -1675,15 +1675,15 @@ static int navi10_force_clk_levels(struct smu_context *smu, ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) - return size; + return 0; ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); if (ret) - return size; + return 0; ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); if (ret) - return size; + return 0; break; case SMU_DCEFCLK: dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n"); @@ -1693,7 +1693,7 @@ static int navi10_force_clk_levels(struct smu_context *smu, break; } - return size; + return 0; } static int navi10_populate_umd_state_clk(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index e1ef88ee1ed3..aa4a5498a12f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1412,13 +1412,8 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { case THM_11_0__SRCID__THM_DIG_THERM_L2H: - dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); - /* - * SW CTF just occurred. - * Try to do a graceful shutdown to prevent further damage. - */ - dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); - orderly_poweroff(true); + schedule_delayed_work(&smu->swctf_delayed_work, + msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY)); break; case THM_11_0__SRCID__THM_DIG_THERM_H2L: dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e52c563f0dac..3856da6c3f3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1353,13 +1353,8 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { case THM_11_0__SRCID__THM_DIG_THERM_L2H: - dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); - /* - * SW CTF just occurred. - * Try to do a graceful shutdown to prevent further damage. - */ - dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); - orderly_poweroff(true); + schedule_delayed_work(&smu->swctf_delayed_work, + msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY)); break; case THM_11_0__SRCID__THM_DIG_THERM_H2L: dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a6083957ae51..124287cbbff8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1710,6 +1710,7 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index a92ea4601ea4..6ef12252beb5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -200,7 +200,6 @@ struct PPTable_t { }; #define SMUQ10_TO_UINT(x) ((x) >> 10) -#define SMUQ16_TO_UINT(x) ((x) >> 16) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -1994,8 +1993,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->average_socket_power = SMUQ10_TO_UINT(metrics->SocketPower); + /* Energy is reported in 15.625mJ units */ gpu_metrics->energy_accumulator = - SMUQ16_TO_UINT(metrics->SocketEnergyAcc); + SMUQ10_TO_UINT(metrics->SocketEnergyAcc); gpu_metrics->current_gfxclk = SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]); diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 0600fdcd06ef..719447ce86e7 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2435,7 +2435,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder, intel_de_rmw(i915, XELPDP_PORT_CLOCK_CTL(encoder->port), XELPDP_LANE1_PHY_CLOCK_SELECT | XELPDP_FORWARD_CLOCK_UNGATE | - XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_SSC_ENABLE_PLLB, val); + XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_SSC_ENABLE_PLLA | + XELPDP_SSC_ENABLE_PLLB, val); } static u32 intel_cx0_get_powerdown_update(u8 lane_mask) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index be1a87bde0c9..df38632c6237 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -6,6 +6,9 @@ #ifndef __INTEL_DISPLAY_POWER_H__ #define __INTEL_DISPLAY_POWER_H__ +#include <linux/mutex.h> +#include <linux/workqueue.h> + #include "intel_wakeref.h" enum aux_ch; @@ -16,6 +19,7 @@ enum port; struct drm_i915_private; struct i915_power_well; struct intel_encoder; +struct seq_file; /* * Keep the pipe, transcoder, port (DDI_LANES,DDI_IO,AUX) domain instances diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index 1118ee9d224c..5ad04cd42c15 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -1252,10 +1252,18 @@ I915_DECL_PW_DOMAINS(xelpd_pwdoms_pw_a, POWER_DOMAIN_INIT); #define XELPD_DC_OFF_PORT_POWER_DOMAINS \ + POWER_DOMAIN_PORT_DDI_LANES_C, \ + POWER_DOMAIN_PORT_DDI_LANES_D, \ + POWER_DOMAIN_PORT_DDI_LANES_E, \ POWER_DOMAIN_PORT_DDI_LANES_TC1, \ POWER_DOMAIN_PORT_DDI_LANES_TC2, \ POWER_DOMAIN_PORT_DDI_LANES_TC3, \ POWER_DOMAIN_PORT_DDI_LANES_TC4, \ + POWER_DOMAIN_VGA, \ + POWER_DOMAIN_AUDIO_PLAYBACK, \ + POWER_DOMAIN_AUX_IO_C, \ + POWER_DOMAIN_AUX_IO_D, \ + POWER_DOMAIN_AUX_IO_E, \ POWER_DOMAIN_AUX_C, \ POWER_DOMAIN_AUX_D, \ POWER_DOMAIN_AUX_E, \ @@ -1272,14 +1280,6 @@ I915_DECL_PW_DOMAINS(xelpd_pwdoms_pw_a, XELPD_PW_B_POWER_DOMAINS, \ XELPD_PW_C_POWER_DOMAINS, \ XELPD_PW_D_POWER_DOMAINS, \ - POWER_DOMAIN_PORT_DDI_LANES_C, \ - POWER_DOMAIN_PORT_DDI_LANES_D, \ - POWER_DOMAIN_PORT_DDI_LANES_E, \ - POWER_DOMAIN_VGA, \ - POWER_DOMAIN_AUDIO_PLAYBACK, \ - POWER_DOMAIN_AUX_IO_C, \ - POWER_DOMAIN_AUX_IO_D, \ - POWER_DOMAIN_AUX_IO_E, \ XELPD_DC_OFF_PORT_POWER_DOMAINS I915_DECL_PW_DOMAINS(xelpd_pwdoms_pw_2, diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.h b/drivers/gpu/drm/i915/display/intel_display_power_well.h index e494df379e6c..1015bba4af01 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.h +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.h @@ -12,6 +12,8 @@ struct drm_i915_private; struct i915_power_well; +struct i915_power_well_ops; +struct intel_encoder; #define for_each_power_well(__dev_priv, __power_well) \ for ((__power_well) = (__dev_priv)->display.power.domains.power_wells; \ diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 5ed450111f77..34fabadefaf6 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2358,7 +2358,7 @@ int intel_hdcp_enable(struct intel_atomic_state *state, mutex_lock(&dig_port->hdcp_mutex); drm_WARN_ON(&i915->drm, hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); - hdcp->content_type = (u8)conn_state->content_type; + hdcp->content_type = (u8)conn_state->hdcp_content_type; if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) { hdcp->cpu_transcoder = pipe_config->mst_master_transcoder; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index d58ed9b62e67..56c17283ba2d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -933,9 +933,9 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp, } io_wake_lines = intel_usecs_to_scanlines( - &crtc_state->uapi.adjusted_mode, io_wake_time); + &crtc_state->hw.adjusted_mode, io_wake_time); fast_wake_lines = intel_usecs_to_scanlines( - &crtc_state->uapi.adjusted_mode, fast_wake_time); + &crtc_state->hw.adjusted_mode, fast_wake_time); if (io_wake_lines > max_wake_lines || fast_wake_lines > max_wake_lines) diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h index 0f7db617425a..8750cb0d8d9d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h @@ -81,7 +81,7 @@ #define _SRD_AUX_DATA_A 0x60814 #define _SRD_AUX_DATA_EDP 0x6f814 -#define EDP_PSR_AUX_DATA(tran, i) _MMIO_TRANS2(tran, _SRD_AUX_DATA_A + (i) + 4) /* 5 registers */ +#define EDP_PSR_AUX_DATA(tran, i) _MMIO_TRANS2(tran, _SRD_AUX_DATA_A + (i) * 4) /* 5 registers */ #define _SRD_STATUS_A 0x60840 #define _SRD_STATUS_EDP 0x6f840 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 01b75529311c..ee9f83af7cf6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -606,7 +606,7 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) if (unlikely(ret)) return ret; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; - } else if (slpc->min_freq_softlimit != slpc->min_freq) { + } else { return intel_guc_slpc_set_min_freq(slpc, slpc->min_freq_softlimit); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 09d4bbcdcdbf..4de6a4e8280d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -118,15 +118,31 @@ static void mock_gt_probe(struct drm_i915_private *i915) i915->gt[0]->name = "Mock GT"; } +static const struct intel_device_info mock_info = { + .__runtime.graphics.ip.ver = -1, + .__runtime.page_sizes = (I915_GTT_PAGE_SIZE_4K | + I915_GTT_PAGE_SIZE_64K | + I915_GTT_PAGE_SIZE_2M), + .__runtime.memory_regions = REGION_SMEM, + .__runtime.platform_engine_mask = BIT(0), + + /* simply use legacy cache level for mock device */ + .max_pat_index = 3, + .cachelevel_to_pat = { + [I915_CACHE_NONE] = 0, + [I915_CACHE_LLC] = 1, + [I915_CACHE_L3_LLC] = 2, + [I915_CACHE_WT] = 3, + }, +}; + struct drm_i915_private *mock_gem_device(void) { #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) static struct dev_iommu fake_iommu = { .priv = (void *)-1 }; #endif struct drm_i915_private *i915; - struct intel_device_info *i915_info; struct pci_dev *pdev; - unsigned int i; int ret; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); @@ -159,15 +175,18 @@ struct drm_i915_private *mock_gem_device(void) pci_set_drvdata(pdev, i915); + /* Device parameters start as a copy of module parameters. */ + i915_params_copy(&i915->params, &i915_modparams); + + /* Set up device info and initial runtime info. */ + intel_device_info_driver_create(i915, pdev->device, &mock_info); + dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); if (pm_runtime_enabled(&pdev->dev)) WARN_ON(pm_runtime_get_sync(&pdev->dev)); - - i915_params_copy(&i915->params, &i915_modparams); - intel_runtime_pm_init_early(&i915->runtime_pm); /* wakeref tracking has significant overhead */ i915->runtime_pm.no_wakeref_tracking = true; @@ -175,21 +194,6 @@ struct drm_i915_private *mock_gem_device(void) /* Using the global GTT may ask questions about KMS users, so prepare */ drm_mode_config_init(&i915->drm); - RUNTIME_INFO(i915)->graphics.ip.ver = -1; - - RUNTIME_INFO(i915)->page_sizes = - I915_GTT_PAGE_SIZE_4K | - I915_GTT_PAGE_SIZE_64K | - I915_GTT_PAGE_SIZE_2M; - - RUNTIME_INFO(i915)->memory_regions = REGION_SMEM; - - /* simply use legacy cache level for mock device */ - i915_info = (struct intel_device_info *)INTEL_INFO(i915); - i915_info->max_pat_index = 3; - for (i = 0; i < I915_MAX_CACHE_LEVEL; i++) - i915_info->cachelevel_to_pat[i] = i; - intel_memory_regions_hw_probe(i915); spin_lock_init(&i915->gpu_error.lock); @@ -223,7 +227,6 @@ struct drm_i915_private *mock_gem_device(void) mock_init_ggtt(to_gt(i915)); to_gt(i915)->vm = i915_vm_get(&to_gt(i915)->ggtt->vm); - RUNTIME_INFO(i915)->platform_engine_mask = BIT(0); to_gt(i915)->info.engine_mask = BIT(0); to_gt(i915)->engine[RCS0] = mock_engine(i915, "mock", RCS0); diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 3cc9fb0d4f5d..dc276c346fd1 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -2139,9 +2139,9 @@ static const struct panel_desc starry_himax83102_j02_desc = { static const struct drm_display_mode starry_ili9882t_default_mode = { .clock = 165280, .hdisplay = 1200, - .hsync_start = 1200 + 32, - .hsync_end = 1200 + 32 + 30, - .htotal = 1200 + 32 + 30 + 32, + .hsync_start = 1200 + 72, + .hsync_end = 1200 + 72 + 30, + .htotal = 1200 + 72 + 30 + 72, .vdisplay = 1920, .vsync_start = 1920 + 68, .vsync_end = 1920 + 68 + 2, diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index 104570292241..421735acfa14 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -13,9 +13,6 @@ #include <linux/i2c.h> #include <linux/acpi.h> -/* SMBUS HID definition as supported by Microsoft Windows */ -#define ACPI_SMBUS_MS_HID "SMB0001" - struct smbus_methods_t { char *mt_info; char *mt_sbr; diff --git a/drivers/md/dm-verity-loadpin.c b/drivers/md/dm-verity-loadpin.c index 4f78cc55c251..0666699b6858 100644 --- a/drivers/md/dm-verity-loadpin.c +++ b/drivers/md/dm-verity-loadpin.c @@ -58,6 +58,9 @@ bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) int srcu_idx; bool trusted = false; + if (bdev == NULL) + return false; + if (list_empty(&dm_verity_loadpin_trusted_root_digests)) return false; diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index ec4108a3e5b9..3d3e0ca52614 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2199,8 +2199,10 @@ int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *gpt_sector) } EXPORT_SYMBOL(mmc_card_alternative_gpt_sector); -static void __mmc_rescan(struct mmc_host *host) +void mmc_rescan(struct work_struct *work) { + struct mmc_host *host = + container_of(work, struct mmc_host, detect.work); int i; if (host->rescan_disable) @@ -2272,14 +2274,6 @@ static void __mmc_rescan(struct mmc_host *host) mmc_schedule_delayed_work(&host->detect, HZ); } -void mmc_rescan(struct work_struct *work) -{ - struct mmc_host *host = - container_of(work, struct mmc_host, detect.work); - - __mmc_rescan(host); -} - void mmc_start_host(struct mmc_host *host) { host->f_init = max(min(freqs[0], host->f_max), host->f_min); @@ -2292,8 +2286,7 @@ void mmc_start_host(struct mmc_host *host) } mmc_gpiod_request_cd_irq(host); - host->detect_change = 1; - __mmc_rescan(host); + _mmc_detect_change(host, 0, false); } void __mmc_stop_host(struct mmc_host *host) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 08d33290296b..823f8e6e4801 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1047,6 +1047,7 @@ config REGULATOR_QCOM_USB_VBUS config REGULATOR_RAA215300 tristate "Renesas RAA215300 driver" select REGMAP_I2C + depends on COMMON_CLK depends on I2C help Support for the Renesas RAA215300 PMIC. diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index f956a4ac9881..2e4e555b37c3 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -24,7 +24,7 @@ #include <asm/debug.h> #include <asm/diag.h> #include <asm/ebcdic.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/irq.h> #include <asm/vtoc.h> diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 113c509bf6d0..8587e423169e 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -21,13 +21,13 @@ #include <linux/compat.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/io.h> #include <asm/css_chars.h> #include <asm/debug.h> #include <asm/idals.h> #include <asm/ebcdic.h> -#include <asm/io.h> -#include <linux/uaccess.h> #include <asm/cio.h> #include <asm/ccwdev.h> #include <asm/itcw.h> diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index bcb67fa747a7..c06fa2b27120 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -16,10 +16,10 @@ #include <linux/bio.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/io.h> #include <asm/idals.h> #include <asm/ebcdic.h> -#include <asm/io.h> #include <asm/ccwdev.h> #include "dasd_int.h" diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 405d76df9427..09acf3853a77 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -20,8 +20,8 @@ #include <linux/pfn_t.h> #include <linux/uio.h> #include <linux/dax.h> +#include <linux/io.h> #include <asm/extmem.h> -#include <asm/io.h> #define DCSSBLK_NAME "dcssblk" #define DCSSBLK_MINORS_PER_DISK 1 diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 0b05cd76b7d0..a1fef666c9b0 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -25,7 +25,7 @@ #include <linux/slab.h> #include <asm/ccwdev.h> #include <asm/cio.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/ebcdic.h> #include <linux/uaccess.h> #include <asm/delay.h> diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 9cd1ea92d619..bc5193d81f9c 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -22,8 +22,8 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/io.h> #include <asm/ebcdic.h> -#include <asm/io.h> #include <asm/appldata.h> #include <asm/monwriter.h> diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index f0538609dfe4..aa3292e57e38 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -152,7 +152,7 @@ static ssize_t ccwgroup_online_show(struct device *dev, /* * Provide an 'ungroup' attribute so the user can remove group devices no - * longer needed or accidentially created. Saves memory :) + * longer needed or accidentally created. Saves memory :) */ static void ccwgroup_ungroup(struct ccwgroup_device *gdev) { diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index c0d620ffea61..4ca5adce9107 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -943,7 +943,7 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev, cdev->private->dev_id.devno, sch->schid.ssid, sch->schib.pmcw.dev, rc); if (old_enabled) { - /* Try to reenable the old subchannel. */ + /* Try to re-enable the old subchannel. */ spin_lock_irq(old_sch->lock); cio_enable_subchannel(old_sch, (u32)virt_to_phys(old_sch)); spin_unlock_irq(old_sch->lock); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 2b2058427a2b..c396ac3e3a32 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -310,7 +310,7 @@ static void ccw_device_oper_notify(struct ccw_device *cdev) struct subchannel *sch = to_subchannel(cdev->dev.parent); if (ccw_device_notify(cdev, CIO_OPER) == NOTIFY_OK) { - /* Reenable channel measurements, if needed. */ + /* Re-enable channel measurements, if needed. */ ccw_device_sched_todo(cdev, CDEV_TODO_ENABLE_CMF); /* Save indication for new paths. */ cdev->private->path_new_mask = sch->vpm; @@ -947,7 +947,7 @@ void ccw_device_trigger_reprobe(struct ccw_device *cdev) */ sch->lpm = sch->schib.pmcw.pam & sch->opm; /* - * Use the initial configuration since we can't be shure that the old + * Use the initial configuration since we can't be sure that the old * paths are valid. */ io_subchannel_init_config(sch); diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 1c31e81ca8de..aafd66305ead 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -672,7 +672,7 @@ out_init: /* * Fetch one ccw. * To reduce memory copy, we'll pin the cda page in memory, - * and to get rid of the cda 2G limitiaion of ccw1, we'll translate + * and to get rid of the cda 2G limitation of ccw1, we'll translate * direct ccws to idal ccws. */ static int ccwchain_fetch_one(struct ccw1 *ccw, @@ -787,7 +787,7 @@ void cp_free(struct channel_program *cp) * program. * * These APIs will copy the ccws into kernel-space buffers, and update - * the guest phsical addresses with their corresponding host physical + * the guest physical addresses with their corresponding host physical * addresses. Then channel I/O device drivers could issue the * translated channel program to real devices to perform an I/O * operation. diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 8d6b9a52bf3c..420120be300f 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -497,7 +497,7 @@ static void ap_tasklet_fn(unsigned long dummy) enum ap_sm_wait wait = AP_SM_WAIT_NONE; /* Reset the indicator if interrupts are used. Thus new interrupts can - * be received. Doing it in the beginning of the tasklet is therefor + * be received. Doing it in the beginning of the tasklet is therefore * important that no requests on any AP get lost. */ if (ap_irq_flag) @@ -2289,7 +2289,7 @@ static int __init ap_module_init(void) timer_setup(&ap_config_timer, ap_config_timeout, 0); /* - * Setup the high resultion poll timer. + * Setup the high resolution poll timer. * If we are running under z/VM adjust polling to z/VM polling rate. */ if (MACHINE_IS_VM) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 101fb324476f..0d7b7eb374ad 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -233,30 +233,6 @@ struct ap_queue { typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue); -/* failure injection cmd struct */ -struct ap_fi { - union { - u16 cmd; /* fi flags + action */ - struct { - u8 flags; /* fi flags only */ - u8 action; /* fi action only */ - }; - }; -}; - -/* all currently known fi actions */ -enum ap_fi_actions { - AP_FI_ACTION_CCA_AGENT_FF = 0x01, - AP_FI_ACTION_CCA_DOM_INVAL = 0x02, - AP_FI_ACTION_NQAP_QID_INVAL = 0x03, -}; - -/* all currently known fi flags */ -enum ap_fi_flags { - AP_FI_FLAG_NO_RETRY = 0x01, - AP_FI_FLAG_TOGGLE_SPECIAL = 0x02, -}; - struct ap_message { struct list_head list; /* Request queueing. */ unsigned long psmid; /* Message id. */ @@ -264,7 +240,6 @@ struct ap_message { size_t len; /* actual msg len in msg buffer */ size_t bufsize; /* allocated msg buffer size */ u16 flags; /* Flags, see AP_MSG_FLAG_xxx */ - struct ap_fi fi; /* Failure Injection cmd */ int rc; /* Return code for this message */ void *private; /* ap driver private pointer. */ /* receive is called from tasklet context */ @@ -384,7 +359,7 @@ int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm, * like "+1-16,-32,-0x40,+128" where only single bits or ranges of * bits are cleared or set. Distinction is done based on the very * first character which may be '+' or '-' for the relative string - * and othewise assume to be an absolute value string. If parsing fails + * and otherwise assume to be an absolute value string. If parsing fails * a negative errno value is returned. All arguments and bitmaps are * big endian order. */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ed8f813653fe..30df83735adf 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -274,13 +274,6 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) /* Start the next request on the queue. */ ap_msg = list_entry(aq->requestq.next, struct ap_message, list); -#ifdef CONFIG_ZCRYPT_DEBUG - if (ap_msg->fi.action == AP_FI_ACTION_NQAP_QID_INVAL) { - AP_DBF_WARN("%s fi cmd 0x%04x: forcing invalid qid 0xFF00\n", - __func__, ap_msg->fi.cmd); - qid = 0xFF00; - } -#endif status = __ap_send(qid, ap_msg->psmid, ap_msg->msg, ap_msg->len, ap_msg->flags & AP_MSG_FLAG_SPECIAL); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a8f58e133e6e..b441745b0418 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -445,7 +445,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, q->saved_isc = isc; break; case AP_RESPONSE_OTHERWISE_CHANGED: - /* We could not modify IRQ setings: clear new configuration */ + /* We could not modify IRQ settings: clear new configuration */ vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); kvm_s390_gisc_unregister(kvm, isc); break; @@ -524,7 +524,7 @@ static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) * Response.status may be set to following Response Code: * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available * - AP_RESPONSE_DECONFIGURED: if the queue is not configured - * - AP_RESPONSE_NORMAL (0) : in case of successs + * - AP_RESPONSE_NORMAL (0) : in case of success * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. * We take the matrix_dev lock to ensure serialization on queues and * mediated device access. diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 444ef95d3f59..4b23c9f7f3e5 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -111,8 +111,6 @@ EXPORT_SYMBOL(zcrypt_msgtype); * Multi device nodes extension functions. */ -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES - struct zcdn_device; static struct class *zcrypt_class; @@ -477,8 +475,6 @@ static void zcdn_destroy_all(void) mutex_unlock(&ap_perms_mutex); } -#endif - /* * zcrypt_read (): Not supported beyond zcrypt 1.3.1. * @@ -510,7 +506,6 @@ static int zcrypt_open(struct inode *inode, struct file *filp) { struct ap_perms *perms = &ap_perms; -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES if (filp->f_inode->i_cdev == &zcrypt_cdev) { struct zcdn_device *zcdndev; @@ -522,7 +517,6 @@ static int zcrypt_open(struct inode *inode, struct file *filp) if (zcdndev) perms = &zcdndev->perms; } -#endif filp->private_data = (void *)perms; atomic_inc(&zcrypt_open_count); @@ -536,7 +530,6 @@ static int zcrypt_open(struct inode *inode, struct file *filp) */ static int zcrypt_release(struct inode *inode, struct file *filp) { -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES if (filp->f_inode->i_cdev == &zcrypt_cdev) { struct zcdn_device *zcdndev; @@ -549,7 +542,6 @@ static int zcrypt_release(struct inode *inode, struct file *filp) put_device(&zcdndev->device); } } -#endif atomic_dec(&zcrypt_open_count); return 0; @@ -661,11 +653,6 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, ap_init_message(&ap_msg); -#ifdef CONFIG_ZCRYPT_DEBUG - if (tr && tr->fi.cmd) - ap_msg.fi.cmd = tr->fi.cmd; -#endif - if (mex->outputdatalength < mex->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -687,7 +674,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for usable accelarator or CCA card */ + /* Check for usable accelerator or CCA card */ if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; @@ -771,11 +758,6 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, ap_init_message(&ap_msg); -#ifdef CONFIG_ZCRYPT_DEBUG - if (tr && tr->fi.cmd) - ap_msg.fi.cmd = tr->fi.cmd; -#endif - if (crt->outputdatalength < crt->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -797,7 +779,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for usable accelarator or CCA card */ + /* Check for usable accelerator or CCA card */ if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; @@ -883,16 +865,6 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, xcrb->status = 0; ap_init_message(&ap_msg); -#ifdef CONFIG_ZCRYPT_DEBUG - if (tr && tr->fi.cmd) - ap_msg.fi.cmd = tr->fi.cmd; - if (tr && tr->fi.action == AP_FI_ACTION_CCA_AGENT_FF) { - ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid agent_ID 'FF'\n", - __func__, tr->fi.cmd); - xcrb->agent_ID = 0x4646; - } -#endif - rc = prep_cca_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -982,14 +954,6 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, if (*domain == AUTOSEL_DOM) *domain = AP_QID_QUEUE(qid); -#ifdef CONFIG_ZCRYPT_DEBUG - if (tr && tr->fi.action == AP_FI_ACTION_CCA_DOM_INVAL) { - ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid domain\n", - __func__, tr->fi.cmd); - *domain = 99; - } -#endif - rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcrb, &ap_msg); spin_lock(&zcrypt_list_lock); @@ -1058,11 +1022,6 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, ap_init_message(&ap_msg); -#ifdef CONFIG_ZCRYPT_DEBUG - if (tr && tr->fi.cmd) - ap_msg.fi.cmd = tr->fi.cmd; -#endif - target_num = (unsigned short)xcrb->targets_num; /* empty list indicates autoselect (all available targets) */ @@ -1473,23 +1432,10 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&mex, umex, sizeof(mex))) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if (mex.inputdatalength & (1U << 31)) { - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - tr.fi.cmd = (u16)(mex.inputdatalength >> 16); - } - mex.inputdatalength &= 0x0000FFFF; -#endif - do { rc = zcrypt_rsa_modexpo(perms, &tr, &mex); if (rc == -EAGAIN) tr.again_counter++; -#ifdef CONFIG_ZCRYPT_DEBUG - if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) - break; -#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1518,23 +1464,10 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&crt, ucrt, sizeof(crt))) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if (crt.inputdatalength & (1U << 31)) { - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - tr.fi.cmd = (u16)(crt.inputdatalength >> 16); - } - crt.inputdatalength &= 0x0000FFFF; -#endif - do { rc = zcrypt_rsa_crt(perms, &tr, &crt); if (rc == -EAGAIN) tr.again_counter++; -#ifdef CONFIG_ZCRYPT_DEBUG - if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) - break; -#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1563,23 +1496,10 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if ((xcrb.status & 0x8000FFFF) == 0x80004649 /* 'FI' */) { - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - tr.fi.cmd = (u16)(xcrb.status >> 16); - } - xcrb.status = 0; -#endif - do { rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb); if (rc == -EAGAIN) tr.again_counter++; -#ifdef CONFIG_ZCRYPT_DEBUG - if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) - break; -#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1609,23 +1529,10 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if (xcrb.req_len & (1ULL << 63)) { - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - tr.fi.cmd = (u16)(xcrb.req_len >> 48); - } - xcrb.req_len &= 0x0000FFFFFFFFFFFFULL; -#endif - do { rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb); if (rc == -EAGAIN) tr.again_counter++; -#ifdef CONFIG_ZCRYPT_DEBUG - if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) - break; -#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1668,14 +1575,16 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, size_t total_size = MAX_ZDEV_ENTRIES_EXT * sizeof(struct zcrypt_device_status_ext); - device_status = kzalloc(total_size, GFP_KERNEL); + device_status = kvmalloc_array(MAX_ZDEV_ENTRIES_EXT, + sizeof(struct zcrypt_device_status_ext), + GFP_KERNEL); if (!device_status) return -ENOMEM; zcrypt_device_status_mask_ext(device_status); if (copy_to_user((char __user *)arg, device_status, total_size)) rc = -EFAULT; - kfree(device_status); + kvfree(device_status); return rc; } case ZCRYPT_STATUS_MASK: { @@ -2144,8 +2053,6 @@ void zcrypt_debug_exit(void) debug_unregister(zcrypt_dbf_info); } -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES - static int __init zcdn_init(void) { int rc; @@ -2203,8 +2110,6 @@ static void zcdn_exit(void) class_destroy(zcrypt_class); } -#endif - /* * zcrypt_api_init(): Module initialization. * @@ -2218,11 +2123,9 @@ int __init zcrypt_api_init(void) if (rc) goto out; -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES rc = zcdn_init(); if (rc) goto out; -#endif /* Register the request sprayer. */ rc = misc_register(&zcrypt_misc_device); @@ -2235,9 +2138,7 @@ int __init zcrypt_api_init(void) return 0; out_misc_register_failed: -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES zcdn_exit(); -#endif zcrypt_debug_exit(); out: return rc; @@ -2250,9 +2151,7 @@ out: */ void __exit zcrypt_api_exit(void) { -#ifdef CONFIG_ZCRYPT_MULTIDEVNODES zcdn_exit(); -#endif misc_deregister(&zcrypt_misc_device); zcrypt_msgtype6_exit(); zcrypt_msgtype50_exit(); diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index f299deb8b8c7..de659954c8f7 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -60,9 +60,6 @@ struct zcrypt_track { int again_counter; /* retry attempts counter */ int last_qid; /* last qid used */ int last_rc; /* last return code */ -#ifdef CONFIG_ZCRYPT_DEBUG - struct ap_fi fi; /* failure injection cmd */ -#endif }; /* defines related to message tracking */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 8c8808cc68a4..263fe182648b 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -689,7 +689,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, goto out; } - /* copy the tanslated protected key */ + /* copy the translated protected key */ switch (prepparm->lv3.ckb.len) { case 16 + 32: /* AES 128 protected key */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h index 78bf5631848e..5ddf02f965f9 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.h +++ b/drivers/s390/crypto/zcrypt_ccamisc.h @@ -115,7 +115,7 @@ struct eccprivkeytoken { u64 mkvp; /* master key verification pattern */ u8 opk[48]; /* encrypted object protection key data */ u16 adatalen; /* associated data length in bytes */ - u16 fseclen; /* formated section length in bytes */ + u16 fseclen; /* formatted section length in bytes */ u8 more_data[]; /* more data follows */ } __packed; @@ -232,7 +232,7 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify); * the number of apqns stored into the list is returned in *nr_apqns. One apqn * entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and * may be casted to struct pkey_apqn. The return value is either 0 for success - * or a negative errno value. If no apqn meeting the criterias is found, + * or a negative errno value. If no apqn meeting the criteria is found, * -ENODEV is returned. */ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index f67d19d08571..958f5ee47f1b 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -1368,7 +1368,7 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, goto out; } - /* copy the tanslated protected key */ + /* copy the translated protected key */ if (wki->pkeysize > *protkeylen) { DEBUG_ERR("%s wk info pkeysize %llu > protkeysize %u\n", __func__, wki->pkeysize, *protkeylen); diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index 07445041869f..a3eddf51242d 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -131,14 +131,14 @@ int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, * - if minapi > 0 only apqns with API_ord_nr >= minapi * - if wkvp != NULL only apqns where the wkvp (EP11_WKVPLEN bytes) matches * to the first EP11_WKVPLEN bytes of the wkvp of the current wrapping - * key for this domain. When a wkvp is given there will aways be a re-fetch + * key for this domain. When a wkvp is given there will always be a re-fetch * of the domain info for the potential apqn - so this triggers an request * reply to each apqn eligible. * The array of apqn entries is allocated with kmalloc and returned in *apqns; * the number of apqns stored into the list is returned in *nr_apqns. One apqn * entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and * may be casted to struct pkey_apqn. The return value is either 0 for success - * or a negative errno value. If no apqn meeting the criterias is found, + * or a negative errno value. If no apqn meeting the criteria is found, * -ENODEV is returned. */ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 05ace18c12b0..51f8f7a463f7 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -246,11 +246,6 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, copy_from_user(inp, mex->inputdata, mod_len)) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) - ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; -#endif - return 0; } @@ -338,11 +333,6 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, copy_from_user(inp, crt->inputdata, mod_len)) return -EFAULT; -#ifdef CONFIG_ZCRYPT_DEBUG - if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) - ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; -#endif - return 0; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 2f9bf23fbb44..67fd2ec9c5a1 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -425,11 +425,6 @@ static int xcrb_msg_to_type6cprb_msgx(bool userspace, struct ap_message *ap_msg, memcmp(function_code, "AU", 2) == 0) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; -#ifdef CONFIG_ZCRYPT_DEBUG - if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) - ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; -#endif - /* check CPRB minor version, set info bits in ap_message flag field */ switch (*(unsigned short *)(&msg->cprbx.func_id[0])) { case 0x5432: /* "T2" */ @@ -535,11 +530,6 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap if (msg->cprbx.flags & 0x20) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; -#ifdef CONFIG_ZCRYPT_DEBUG - if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) - ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; -#endif - /* set info bits in ap_message flag field */ if (msg->cprbx.flags & 0x80) ap_msg->flags |= AP_MSG_FLAG_ADMIN; @@ -1143,6 +1133,9 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); } + if (rc == -EAGAIN && ap_msg->flags & AP_MSG_FLAG_ADMIN) + rc = -EIO; /* do not retry administrative requests */ + out: if (rc) ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", @@ -1263,6 +1256,9 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * ap_cancel_message(zq->queue, ap_msg); } + if (rc == -EAGAIN && ap_msg->flags & AP_MSG_FLAG_ADMIN) + rc = -EIO; /* do not retry administrative requests */ + out: if (rc) ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index c44ba88f9f47..7a2f34a5e0e0 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -43,13 +43,13 @@ #include <linux/netdevice.h> #include <net/dst.h> -#include <linux/io.h> /* instead of <asm/io.h> ok ? */ -#include <asm/ccwdev.h> -#include <asm/ccwgroup.h> -#include <linux/bitops.h> /* instead of <asm/bitops.h> ok ? */ -#include <linux/uaccess.h> /* instead of <asm/uaccess.h> ok ? */ +#include <linux/io.h> +#include <linux/bitops.h> +#include <linux/uaccess.h> #include <linux/wait.h> #include <linux/moduleparam.h> +#include <asm/ccwdev.h> +#include <asm/ccwgroup.h> #include <asm/idals.h> #include "ctcm_main.h" diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 66076cada8ae..8852b03f943b 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -47,7 +47,7 @@ #include <linux/ctype.h> #include <net/dst.h> -#include <asm/io.h> +#include <linux/io.h> #include <linux/uaccess.h> #include <asm/ebcdic.h> diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index abbd1fb5fbc0..8962b2557615 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -826,7 +826,7 @@ config SPI_RSPI SPI driver for Renesas RSPI and QSPI blocks. config SPI_RZV2M_CSI - tristate "Renesas RZV2M CSI controller" + tristate "Renesas RZ/V2M CSI controller" depends on ARCH_RENESAS || COMPILE_TEST help SPI driver for Renesas RZ/V2M Clocked Serial Interface (CSI) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6b46a3b67c41..d91dfbe47aa5 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1543,13 +1543,9 @@ int bcm_qspi_probe(struct platform_device *pdev, res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mspi"); - if (res) { - qspi->base[MSPI] = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->base[MSPI])) - return PTR_ERR(qspi->base[MSPI]); - } else { - return 0; - } + qspi->base[MSPI] = devm_ioremap_resource(dev, res); + if (IS_ERR(qspi->base[MSPI])) + return PTR_ERR(qspi->base[MSPI]); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi"); if (res) { diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index ee2528dad02d..9e218e143263 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -2,7 +2,7 @@ * Broadcom BCM63XX High Speed SPI Controller driver * * Copyright 2000-2010 Broadcom Corporation - * Copyright 2012-2013 Jonas Gorski <[email protected]> + * Copyright 2012-2013 Jonas Gorski <[email protected]> * * Licensed under the GNU/GPL. See COPYING for details. */ diff --git a/drivers/spi/spi-bcmbca-hsspi.c b/drivers/spi/spi-bcmbca-hsspi.c index 8cbd01619789..ca1b4741e9f4 100644 --- a/drivers/spi/spi-bcmbca-hsspi.c +++ b/drivers/spi/spi-bcmbca-hsspi.c @@ -3,7 +3,7 @@ * Broadcom BCMBCA High Speed SPI Controller driver * * Copyright 2000-2010 Broadcom Corporation - * Copyright 2012-2013 Jonas Gorski <[email protected]> + * Copyright 2012-2013 Jonas Gorski <[email protected]> * Copyright 2019-2022 Broadcom Ltd */ diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 26ce959d98df..1df9d4844a68 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -1097,6 +1097,12 @@ static int spi_geni_probe(struct platform_device *pdev) if (mas->cur_xfer_mode == GENI_SE_FIFO) spi->set_cs = spi_geni_set_cs; + /* + * TX is required per GSI spec, see setup_gsi_xfer(). + */ + if (mas->cur_xfer_mode == GENI_GPI_DMA) + spi->flags = SPI_CONTROLLER_MUST_TX; + ret = request_irq(mas->irq, geni_spi_isr, 0, dev_name(dev), spi); if (ret) goto spi_geni_release_dma; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6bb251a4d613..59cbfb80edbd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; + unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; + loff_t end = rreq->start + rreq->len, new_end; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + unsigned long max_len; u32 blockoff; - u64 blockno; - /* Expand the start downward */ - blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); - rreq->start = blockno * lo->stripe_unit; - rreq->len += blockoff; + if (priv) { + /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ + if (priv->file_ra_disabled) + max_pages = 0; + else + max_pages = priv->file_ra_pages; + + } - /* Now, round up the length to the next block */ - rreq->len = roundup(rreq->len, lo->stripe_unit); + /* Readahead is disabled */ + if (!max_pages) + return; + + max_len = max_pages << PAGE_SHIFT; + + /* + * Try to expand the length forward by rounding up it to the next + * block, but do not exceed the file size, unless the original + * request already exceeds it. + */ + new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); + if (new_end > end && new_end <= rreq->start + max_len) + rreq->len = new_end - rreq->start; + + /* Try to expand the start downward */ + div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); + if (rreq->len + blockoff <= max_len) { + rreq->start -= blockoff; + rreq->len += blockoff; + } } static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) @@ -362,18 +388,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; int got = 0, want = CEPH_CAP_FILE_CACHE; + struct ceph_netfs_request_data *priv; int ret = 0; if (rreq->origin != NETFS_READAHEAD) return 0; + priv = kzalloc(sizeof(*priv), GFP_NOFS); + if (!priv) + return -ENOMEM; + if (file) { struct ceph_rw_context *rw_ctx; struct ceph_file_info *fi = file->private_data; + priv->file_ra_pages = file->f_ra.ra_pages; + priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; + rw_ctx = ceph_find_rw_context(fi); - if (rw_ctx) + if (rw_ctx) { + rreq->netfs_priv = priv; return 0; + } } /* @@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { dout("start_read %p, error getting cap\n", inode); - return ret; + goto out; } if (!(got & want)) { dout("start_read %p, no cache cap\n", inode); - return -EACCES; + ret = -EACCES; + goto out; + } + if (ret == 0) { + ret = -EACCES; + goto out; } - if (ret == 0) - return -EACCES; - rreq->netfs_priv = (void *)(uintptr_t)got; - return 0; + priv->caps = got; + rreq->netfs_priv = priv; + +out: + if (ret < 0) + kfree(priv); + + return ret; } static void ceph_netfs_free_request(struct netfs_io_request *rreq) { - struct ceph_inode_info *ci = ceph_inode(rreq->inode); - int got = (uintptr_t)rreq->netfs_priv; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + + if (!priv) + return; - if (got) - ceph_put_cap_refs(ci, got); + if (priv->caps) + ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); + kfree(priv); + rreq->netfs_priv = NULL; } const struct netfs_request_ops ceph_netfs_ops = { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2321e5ddb664..e2bb0d0072da 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, } if (had & CEPH_CAP_FILE_WR) { if (--ci->i_wr_ref == 0) { + /* + * The Fb caps will always be took and released + * together with the Fw caps. + */ + WARN_ON_ONCE(ci->i_wb_ref); + last++; check_flushsnaps = true; if (ci->i_wrbuffer_ref_head == 0 && @@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode, } BUG_ON(cap->issued & ~cap->implemented); + /* don't let check_caps skip sending a response to MDS for revoke msgs */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + cap->mds_wanted = 0; + if (cap == ci->i_auth_cap) + check_caps = 1; /* check auth cap only */ + else + check_caps = 2; /* check all caps */ + } + if (extra_info->inline_version > 0 && extra_info->inline_version >= ci->i_inline_version) { ci->i_inline_version = extra_info->inline_version; @@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct cap_extra_info extra_info = {}; bool queue_trunc; bool close_sessions = false; + bool do_cap_release = false; dout("handle_caps from mds%d\n", session->s_mds); @@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (!inode) { dout(" i don't have ino %llx\n", vino.ino); - if (op == CEPH_CAP_OP_IMPORT) { - cap = ceph_get_cap(mdsc, NULL); - cap->cap_ino = vino.ino; - cap->queue_release = 1; - cap->cap_id = le64_to_cpu(h->cap_id); - cap->mseq = mseq; - cap->seq = seq; - cap->issue_seq = seq; - spin_lock(&session->s_cap_lock); - __ceph_queue_cap_release(session, cap); - spin_unlock(&session->s_cap_lock); + switch (op) { + case CEPH_CAP_OP_IMPORT: + case CEPH_CAP_OP_REVOKE: + case CEPH_CAP_OP_GRANT: + do_cap_release = true; + break; + default: + break; } goto flush_cap_releases; } @@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, inode, ceph_ino(inode), ceph_snap(inode), session->s_mds); spin_unlock(&ci->i_ceph_lock); + switch (op) { + case CEPH_CAP_OP_REVOKE: + case CEPH_CAP_OP_GRANT: + do_cap_release = true; + break; + default: + break; + } goto flush_cap_releases; } @@ -4302,6 +4323,18 @@ flush_cap_releases: * along for the mds (who clearly thinks we still have this * cap). */ + if (do_cap_release) { + cap = ceph_get_cap(mdsc, NULL); + cap->cap_ino = vino.ino; + cap->queue_release = 1; + cap->cap_id = le64_to_cpu(h->cap_id); + cap->mseq = mseq; + cap->seq = seq; + cap->issue_seq = seq; + spin_lock(&session->s_cap_lock); + __ceph_queue_cap_release(session, cap); + spin_unlock(&session->s_cap_lock); + } ceph_flush_cap_releases(mdsc, session); goto done; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index cb67ac821f0e..4a2b39d9a61a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mkdir.mode = cpu_to_le32(mode); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_SHARED on source inode (mds will lock it) */ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; @@ -1218,7 +1221,7 @@ retry: req->r_num_caps = 2; req->r_parent = dir; ihold(dir); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_inode_drop = ceph_drop_caps_for_unlink(inode); @@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, req->r_parent = new_dir; ihold(new_dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); - req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b1925232dc08..63efe5389783 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -791,7 +791,8 @@ retry: if (flags & O_CREAT) { struct ceph_file_layout lo; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4c0f22acf53d..66048a86c480 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -645,6 +645,7 @@ bad: err = -EIO; out_bad: pr_err("mds parse_reply err %d\n", err); + ceph_msg_dump(msg); return err; } @@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, bad: pr_err("mdsc_handle_forward decode error err=%d\n", err); + ceph_msg_dump(msg); } static int __decode_session_metadata(void **p, void *end, @@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) bad: pr_err("error decoding fsmap %d. Shutting down mount.\n", err); ceph_umount_begin(mdsc->fsc->sb); + ceph_msg_dump(msg); err_out: mutex_lock(&mdsc->mutex); mdsc->mdsmap_err = err; @@ -5326,6 +5329,7 @@ bad_unlock: bad: pr_err("error decoding mdsmap %d. Shutting down mount.\n", err); ceph_umount_begin(mdsc->fsc->sb); + ceph_msg_dump(msg); return; } diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index c47347d2e84e..cce78d769f55 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, s32 items = 0; s32 len; + /* Do not send the metrics until the MDS rank is ready */ + mutex_lock(&mdsc->mutex); + if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) { + mutex_unlock(&mdsc->mutex); + return false; + } + mutex_unlock(&mdsc->mutex); + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + sizeof(*meta) + sizeof(*dlease) + sizeof(*files) + sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 2e73ba62bd7a..343d738448dc 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, return 0; } - /* Fb cap still in use, delay it */ - if (ci->i_wb_ref) { + /* + * Defer flushing the capsnap if the dirty buffer not flushed yet. + * And trigger to flush the buffer immediately. + */ + if (ci->i_wrbuffer_ref) { dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " "used WRBUFFER, delaying\n", __func__, inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size); - capsnap->writing = 1; + ceph_queue_writeback(inode); return 0; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d24bf0db5234..3bfddf34d488 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -451,6 +451,19 @@ struct ceph_inode_info { unsigned long i_work_mask; }; +struct ceph_netfs_request_data { + int caps; + + /* + * Maximum size of a file readahead request. + * The fadvise could update the bdi's default ra_pages. + */ + unsigned int file_ra_pages; + + /* Set it if fadvise disables file readahead entirely */ + bool file_ra_disabled; +}; + static inline struct ceph_inode_info * ceph_inode(const struct inode *inode) { diff --git a/fs/inode.c b/fs/inode.c index d37fad91c8da..8fefb69e1f84 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1156,8 +1156,10 @@ lock: */ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) { - WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); - WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); + if (inode1) + WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); + if (inode2) + WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); } EXPORT_SYMBOL(lock_two_nondirectories); diff --git a/fs/namei.c b/fs/namei.c index 91171da719c5..e56ff39a79bc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4874,8 +4874,7 @@ int vfs_rename(struct renamedata *rd) d_exchange(old_dentry, new_dentry); } out: - if (source) - inode_unlock(source); + inode_unlock(source); if (target) inode_unlock(target); dput(new_dentry); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 95d7d8790bc3..f69c451018e3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1623,6 +1623,20 @@ static int fanotify_events_supported(struct fsnotify_group *group, return -EINVAL; /* + * mount and sb marks are not allowed on kernel internal pseudo fs, + * like pipe_mnt, because that would subscribe to events on all the + * anonynous pipes in the system. + * + * SB_NOUSER covers all of the internal pseudo fs whose objects are not + * exposed to user's mount namespace, but there are other SB_KERNMOUNT + * fs, like nsfs, debugfs, for which the value of allowing sb and mount + * mark is questionable. For now we leave them alone. + */ + if (mark_type != FAN_MARK_INODE && + path->mnt->mnt_sb->s_flags & SB_NOUSER) + return -EINVAL; + + /* * We shouldn't have allowed setting dirent events and the directory * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, * but because we always allowed it, error only when using new APIs. diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 0b8bc66377db..a9d82bbb4729 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -573,7 +573,7 @@ add_alloc_in_same_attr_seg: sbi, run, vcn, lcn, to_allocate, &pre_alloc, is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen, is_mft ? 0 : - (sbi->record_size - + (sbi->record_size - le32_to_cpu(rec->used) + 8) / 3 + 1, diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index c0c6bcbc8c05..42631b31adf1 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,7 +52,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); - le = kmalloc(al_aligned(lsize), GFP_NOFS); + le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); if (!le) { err = -ENOMEM; goto out; @@ -80,7 +80,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (err < 0) goto out; - le = kmalloc(al_aligned(lsize), GFP_NOFS); + le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); if (!le) { err = -ENOMEM; goto out; @@ -375,8 +375,7 @@ bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le) * al_delete_le - Delete first le from the list which matches its parameters. */ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, - const __le16 *name, size_t name_len, - const struct MFT_REF *ref) + const __le16 *name, u8 name_len, const struct MFT_REF *ref) { u16 size; struct ATTR_LIST_ENTRY *le; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 9a6c6a09d70c..107e808e06ea 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -287,8 +287,8 @@ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, /* Check bits before 'bit'. */ ib = wnd->zone_bit == wnd->zone_end || bit < wnd->zone_end ? - 0 : - wnd->zone_end; + 0 : + wnd->zone_end; while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { bit -= 1; @@ -298,8 +298,8 @@ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, /* Check bits after 'end_in'. */ ib = wnd->zone_bit == wnd->zone_end || end_in > wnd->zone_bit ? - wnd->nbits : - wnd->zone_bit; + wnd->nbits : + wnd->zone_bit; while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { end_in += 1; @@ -418,7 +418,7 @@ static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) n3 = rb_first(&wnd->count_tree); wnd->extent_max = n3 ? rb_entry(n3, struct e_node, count.node)->count.key : - 0; + 0; return; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 9be3e8edf4f3..1d6c824246c4 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -179,7 +179,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) { int err = 0; struct address_space *mapping = inode->i_mapping; - u32 blocksize = 1 << inode->i_blkbits; + u32 blocksize = i_blocksize(inode); pgoff_t idx = vbo >> PAGE_SHIFT; u32 from = vbo & (PAGE_SIZE - 1); pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -192,7 +192,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) for (; idx < idx_end; idx += 1, from = 0) { page_off = (loff_t)idx << PAGE_SHIFT; to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) : - PAGE_SIZE; + PAGE_SIZE; iblock = page_off >> inode->i_blkbits; page = find_or_create_page(mapping, idx, @@ -1078,7 +1078,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) : - __generic_file_write_iter(iocb, from); + __generic_file_write_iter(iocb, from); out: inode_unlock(inode); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 2bfcf1a989c9..16bd9faa2d28 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -77,7 +77,7 @@ struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni) attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) : - NULL; + NULL; } /* @@ -92,7 +92,7 @@ struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni) attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) : - NULL; + NULL; } /* @@ -236,6 +236,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, return attr; out: + ntfs_inode_err(&ni->vfs_inode, "failed to parse mft record"); ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); return NULL; } @@ -384,7 +385,7 @@ bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi) * ni_remove_attr - Remove all attributes for the given type/name/id. */ int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, size_t name_len, bool base_only, + const __le16 *name, u8 name_len, bool base_only, const __le16 *id) { int err; @@ -517,6 +518,9 @@ out: */ static int ni_repack(struct ntfs_inode *ni) { +#if 1 + return 0; +#else int err = 0; struct ntfs_sb_info *sbi = ni->mi.sbi; struct mft_inode *mi, *mi_p = NULL; @@ -639,6 +643,7 @@ static int ni_repack(struct ntfs_inode *ni) run_close(&run); return err; +#endif } /* @@ -813,10 +818,8 @@ int ni_create_attr_list(struct ntfs_inode *ni) * Looks like one record_size is always enough. */ le = kmalloc(al_aligned(rs), GFP_NOFS); - if (!le) { - err = -ENOMEM; - goto out; - } + if (!le) + return -ENOMEM; mi_get_ref(&ni->mi, &le->ref); ni->attr_list.le = le; @@ -865,15 +868,16 @@ int ni_create_attr_list(struct ntfs_inode *ni) if (to_free > free_b) { err = -EINVAL; - goto out1; + goto out; } } /* Allocate child MFT. */ err = ntfs_look_free_mft(sbi, &rno, is_mft, ni, &mi); if (err) - goto out1; + goto out; + err = -EINVAL; /* Call mi_remove_attr() in reverse order to keep pointers 'arr_move' valid. */ while (to_free > 0) { struct ATTRIB *b = arr_move[--nb]; @@ -882,7 +886,8 @@ int ni_create_attr_list(struct ntfs_inode *ni) attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off), b->name_len, asize, name_off); - WARN_ON(!attr); + if (!attr) + goto out; mi_get_ref(mi, &le_b[nb]->ref); le_b[nb]->id = attr->id; @@ -892,17 +897,20 @@ int ni_create_attr_list(struct ntfs_inode *ni) attr->id = le_b[nb]->id; /* Remove from primary record. */ - WARN_ON(!mi_remove_attr(NULL, &ni->mi, b)); + if (!mi_remove_attr(NULL, &ni->mi, b)) + goto out; if (to_free <= asize) break; to_free -= asize; - WARN_ON(!nb); + if (!nb) + goto out; } attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0, lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT); - WARN_ON(!attr); + if (!attr) + goto out; attr->non_res = 0; attr->flags = 0; @@ -916,14 +924,12 @@ int ni_create_attr_list(struct ntfs_inode *ni) ni->attr_list.dirty = false; mark_inode_dirty(&ni->vfs_inode); - goto out; + return 0; -out1: +out: kfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.size = 0; - -out: return err; } @@ -1638,14 +1644,13 @@ int ni_delete_all(struct ntfs_inode *ni) * Return: File name attribute by its value. */ struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, - const struct cpu_str *uni, + const struct le_str *uni, const struct MFT_REF *home_dir, struct mft_inode **mi, struct ATTR_LIST_ENTRY **le) { struct ATTRIB *attr = NULL; struct ATTR_FILE_NAME *fname; - struct le_str *fns; if (le) *le = NULL; @@ -1669,10 +1674,9 @@ next: if (uni->len != fname->name_len) goto next; - fns = (struct le_str *)&fname->name_len; - if (ntfs_cmp_names_cpu(uni, fns, NULL, false)) + if (ntfs_cmp_names(uni->name, uni->len, fname->name, uni->len, NULL, + false)) goto next; - return fname; } @@ -1757,8 +1761,8 @@ int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa) /* Resize nonresident empty attribute in-place only. */ new_asize = (new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ? - (SIZEOF_NONRESIDENT_EX + 8) : - (SIZEOF_NONRESIDENT + 8); + (SIZEOF_NONRESIDENT_EX + 8) : + (SIZEOF_NONRESIDENT + 8); if (!mi_resize_attr(mi, attr, new_asize - le32_to_cpu(attr->size))) return -EOPNOTSUPP; @@ -2910,7 +2914,7 @@ int ni_remove_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, /* Find name in record. */ mi_get_ref(&dir_ni->mi, &de_name->home); - fname = ni_fname_name(ni, (struct cpu_str *)&de_name->name_len, + fname = ni_fname_name(ni, (struct le_str *)&de_name->name_len, &de_name->home, &mi, &le); if (!fname) return -ENOENT; @@ -3160,8 +3164,8 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, __le64 valid_le; dup->alloc_size = is_attr_ext(attr) ? - attr->nres.total_size : - attr->nres.alloc_size; + attr->nres.total_size : + attr->nres.alloc_size; dup->data_size = attr->nres.data_size; if (new_valid > data_size) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 57762c5fe68b..12f28cdf5c83 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -828,8 +828,8 @@ static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl, memcpy(rt + 1, tbl + 1, esize * used); rt->free_goal = free_goal == ~0u ? - cpu_to_le32(~0u) : - cpu_to_le32(sizeof(struct RESTART_TABLE) + + cpu_to_le32(~0u) : + cpu_to_le32(sizeof(struct RESTART_TABLE) + free_goal * esize); if (tbl->first_free) { @@ -1090,8 +1090,8 @@ static inline u64 base_lsn(struct ntfs_log *log, << log->file_data_bits) + ((((is_log_record_end(hdr) && h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn)) ? - le16_to_cpu(hdr->record_hdr.next_record_off) : - log->page_size) + + le16_to_cpu(hdr->record_hdr.next_record_off) : + log->page_size) + lsn) >> 3); @@ -1299,8 +1299,8 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, log->clst_per_page = 1; log->first_page = major_ver >= 2 ? - 0x22 * page_size : - ((sys_page_size << 1) + (page_size << 1)); + 0x22 * page_size : + ((sys_page_size << 1) + (page_size << 1)); log->major_ver = major_ver; log->minor_ver = minor_ver; } @@ -1513,8 +1513,8 @@ static u32 current_log_avail(struct ntfs_log *log) * If there is no oldest lsn then start at the first page of the file. */ oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN) ? - log->first_page : - (log->oldest_lsn_off & ~log->sys_page_mask); + log->first_page : + (log->oldest_lsn_off & ~log->sys_page_mask); /* * We will use the next log page offset to compute the next free page. @@ -1522,9 +1522,9 @@ static u32 current_log_avail(struct ntfs_log *log) * If we are at the first page then use the end of the file. */ next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL) ? - log->next_page + log->page_size : + log->next_page + log->page_size : log->next_page == log->first_page ? log->l_size : - log->next_page; + log->next_page; /* If the two offsets are the same then there is no available space. */ if (oldest_off == next_free_off) @@ -1535,8 +1535,8 @@ static u32 current_log_avail(struct ntfs_log *log) */ free_bytes = oldest_off < next_free_off ? - log->total_avail_pages - (next_free_off - oldest_off) : - oldest_off - next_free_off; + log->total_avail_pages - (next_free_off - oldest_off) : + oldest_off - next_free_off; free_bytes >>= log->page_bits; return free_bytes * log->reserved; @@ -1671,7 +1671,7 @@ next_tail: best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0; best_lsn2 = second_tail ? base_lsn(log, second_tail, second_file_off) : - 0; + 0; if (first_tail && second_tail) { if (best_lsn1 > best_lsn2) { @@ -1767,7 +1767,7 @@ tail_read: page_cnt = page_pos = 1; curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off) : - log->next_page; + log->next_page; wrapped_file = curpage_off == log->first_page && @@ -1826,8 +1826,8 @@ use_cur_page: ((lsn_cur >> log->file_data_bits) + ((curpage_off < (lsn_to_vbo(log, lsn_cur) & ~log->page_mask)) ? - 1 : - 0)) != expected_seq) { + 1 : + 0)) != expected_seq) { goto check_tail; } @@ -2643,8 +2643,8 @@ static inline bool check_index_root(const struct ATTRIB *attr, const struct INDEX_ROOT *root = resident_data(attr); u8 index_bits = le32_to_cpu(root->index_block_size) >= sbi->cluster_size ? - sbi->cluster_bits : - SECTOR_SHIFT; + sbi->cluster_bits : + SECTOR_SHIFT; u8 block_clst = root->index_block_clst; if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) || @@ -3861,9 +3861,9 @@ check_restart_area: /* If we have a valid page then grab a pointer to the restart area. */ ra2 = rst_info.valid_page ? - Add2Ptr(rst_info.r_page, + Add2Ptr(rst_info.r_page, le16_to_cpu(rst_info.r_page->ra_off)) : - NULL; + NULL; if (rst_info.chkdsk_was_run || (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) { diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 28cc421102e5..33afee0f5559 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -9,6 +9,7 @@ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> +#include <linux/nls.h> #include "debug.h" #include "ntfs.h" @@ -173,12 +174,12 @@ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, fo = le16_to_cpu(rhdr->fix_off); fn = simple ? ((bytes >> SECTOR_SHIFT) + 1) : - le16_to_cpu(rhdr->fix_num); + le16_to_cpu(rhdr->fix_num); /* Check errors. */ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { - return -EINVAL; /* Native chkntfs returns ok! */ + return -E_NTFS_CORRUPT; } /* Get fixup pointer. */ @@ -1661,7 +1662,8 @@ int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, return 0; } -struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) +struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, + enum RECORD_FLAG flag) { int err = 0; struct super_block *sb = sbi->sb; @@ -1673,8 +1675,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) ni = ntfs_i(inode); - err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0, - false); + err = mi_format_new(&ni->mi, sbi, rno, flag, false); if (err) goto out; @@ -1937,7 +1938,7 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) break; sii_e = (struct NTFS_DE_SII *)ne; - if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR) + if (le16_to_cpu(ne->view.data_size) < sizeof(sii_e->sec_hdr)) continue; next_id = le32_to_cpu(sii_e->sec_id) + 1; @@ -1998,18 +1999,18 @@ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, goto out; t32 = le32_to_cpu(sii_e->sec_hdr.size); - if (t32 < SIZEOF_SECURITY_HDR) { + if (t32 < sizeof(struct SECURITY_HDR)) { err = -EINVAL; goto out; } - if (t32 > SIZEOF_SECURITY_HDR + 0x10000) { + if (t32 > sizeof(struct SECURITY_HDR) + 0x10000) { /* Looks like too big security. 0x10000 - is arbitrary big number. */ err = -EFBIG; goto out; } - *size = t32 - SIZEOF_SECURITY_HDR; + *size = t32 - sizeof(struct SECURITY_HDR); p = kmalloc(*size, GFP_NOFS); if (!p) { @@ -2023,14 +2024,14 @@ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, if (err) goto out; - if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) { + if (memcmp(&d_security, &sii_e->sec_hdr, sizeof(d_security))) { err = -EINVAL; goto out; } err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(sii_e->sec_hdr.off) + - SIZEOF_SECURITY_HDR, + sizeof(struct SECURITY_HDR), p, *size, NULL); if (err) goto out; @@ -2069,7 +2070,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, struct NTFS_DE_SDH sdh_e; struct NTFS_DE_SII sii_e; struct SECURITY_HDR *d_security; - u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR; + u32 new_sec_size = size_sd + sizeof(struct SECURITY_HDR); u32 aligned_sec_size = ALIGN(new_sec_size, 16); struct SECURITY_KEY hash_key; struct ntfs_fnd *fnd_sdh = NULL; @@ -2207,14 +2208,14 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, /* Fill SII entry. */ sii_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr)); - sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR); + sii_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sii_e.de.view.res = 0; - sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY); + sii_e.de.size = cpu_to_le16(sizeof(struct NTFS_DE_SII)); sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id)); sii_e.de.flags = 0; sii_e.de.res = 0; sii_e.sec_id = d_security->key.sec_id; - memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR); + memcpy(&sii_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0); if (err) @@ -2223,7 +2224,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, /* Fill SDH entry. */ sdh_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr)); - sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR); + sdh_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sdh_e.de.view.res = 0; sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY); sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key)); @@ -2231,7 +2232,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, sdh_e.de.res = 0; sdh_e.key.hash = d_security->key.hash; sdh_e.key.sec_id = d_security->key.sec_id; - memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR); + memcpy(&sdh_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); sdh_e.magic[0] = cpu_to_le16('I'); sdh_e.magic[1] = cpu_to_le16('I'); @@ -2522,7 +2523,8 @@ out: /* * run_deallocate - Deallocate clusters. */ -int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim) +int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run, + bool trim) { CLST lcn, len; size_t idx = 0; @@ -2578,13 +2580,13 @@ static inline bool name_has_forbidden_chars(const struct le_str *fname) return false; } -static inline bool is_reserved_name(struct ntfs_sb_info *sbi, +static inline bool is_reserved_name(const struct ntfs_sb_info *sbi, const struct le_str *fname) { int port_digit; const __le16 *name = fname->name; int len = fname->len; - u16 *upcase = sbi->upcase; + const u16 *upcase = sbi->upcase; /* check for 3 chars reserved names (device names) */ /* name by itself or with any extension is forbidden */ @@ -2618,3 +2620,60 @@ bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *fname) return !name_has_forbidden_chars(fname) && !is_reserved_name(sbi, fname); } + +/* + * ntfs_set_label - updates current ntfs label. + */ +int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) +{ + int err; + struct ATTRIB *attr; + struct ntfs_inode *ni = sbi->volume.ni; + const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */ + /* Allocate PATH_MAX bytes. */ + struct cpu_str *uni = __getname(); + + if (!uni) + return -ENOMEM; + + err = ntfs_nls_to_utf16(sbi, label, len, uni, (PATH_MAX - 2) / 2, + UTF16_LITTLE_ENDIAN); + if (err < 0) + goto out; + + if (uni->len > max_ulen) { + ntfs_warn(sbi->sb, "new label is too long"); + err = -EFBIG; + goto out; + } + + ni_lock(ni); + + /* Ignore any errors. */ + ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL); + + err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL, + 0, &attr, NULL, NULL); + if (err < 0) + goto unlock_out; + + /* write new label in on-disk struct. */ + memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16)); + + /* update cached value of current label. */ + if (len >= ARRAY_SIZE(sbi->volume.label)) + len = ARRAY_SIZE(sbi->volume.label) - 1; + memcpy(sbi->volume.label, label, len); + sbi->volume.label[len] = 0; + mark_inode_dirty_sync(&ni->vfs_inode); + +unlock_out: + ni_unlock(ni); + + if (!err) + err = _ni_write_inode(&ni->vfs_inode, 0); + +out: + __putname(uni); + return err; +}
\ No newline at end of file diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 0a48d2d67219..124c6e822623 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -432,8 +432,8 @@ next_run: nbits = 8 * (data_size - vbo); ok = nbits > from ? - (*fn)((ulong *)bh->b_data, from, nbits, ret) : - false; + (*fn)((ulong *)bh->b_data, from, nbits, ret) : + false; put_bh(bh); if (ok) { @@ -1113,6 +1113,12 @@ ok: *node = in; out: + if (err == -E_NTFS_CORRUPT) { + ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + } + if (ib != in->index) kfree(ib); @@ -1676,8 +1682,8 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, /* Create alloc and bitmap attributes (if not). */ err = run_is_empty(&indx->alloc_run) ? - indx_create_allocate(indx, ni, &new_vbn) : - indx_add_allocate(indx, ni, &new_vbn); + indx_create_allocate(indx, ni, &new_vbn) : + indx_add_allocate(indx, ni, &new_vbn); /* Layout of record may be changed, so rescan root. */ root = indx_get_root(indx, ni, &attr, &mi); @@ -1868,8 +1874,8 @@ indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size), up_e + 1, le16_to_cpu(up_e->key_size), ctx) < 0 ? - hdr2 : - hdr1, + hdr2 : + hdr1, new_de, NULL, ctx); indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); @@ -2340,7 +2346,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, re, ctx, fnd->level - 1, fnd) : - indx_insert_into_root(indx, ni, re, e, + indx_insert_into_root(indx, ni, re, e, ctx, fnd, 0); kfree(re); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 6c560245eef4..dc7e7ab701c6 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -263,7 +263,7 @@ next_attr: goto next_attr; run = ino == MFT_REC_BITMAP ? &sbi->used.bitmap.run : - &ni->file.run; + &ni->file.run; break; case ATTR_ROOT: @@ -291,8 +291,8 @@ next_attr: goto out; mode = sb->s_root ? - (S_IFDIR | (0777 & sbi->options->fs_dmask_inv)) : - (S_IFDIR | 0777); + (S_IFDIR | (0777 & sbi->options->fs_dmask_inv)) : + (S_IFDIR | 0777); goto next_attr; case ATTR_ALLOC: @@ -450,7 +450,7 @@ end_enum: inode->i_op = &ntfs_file_inode_operations; inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : - &ntfs_aops; + &ntfs_aops; if (ino != MFT_REC_MFT) init_rwsem(&ni->file.run_lock); } else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || @@ -787,7 +787,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ret = blockdev_direct_IO(iocb, inode, iter, wr ? ntfs_get_block_direct_IO_W : - ntfs_get_block_direct_IO_R); + ntfs_get_block_direct_IO_R); if (ret > 0) end = vbo + ret; @@ -1191,11 +1191,11 @@ out: * - ntfs_symlink * - ntfs_mkdir * - ntfs_atomic_open - * + * * NOTE: if fnd != NULL (ntfs_atomic_open) then @dir is locked */ -struct inode *ntfs_create_inode(struct mnt_idmap *idmap, - struct inode *dir, struct dentry *dentry, +struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, struct ntfs_fnd *fnd) @@ -1309,7 +1309,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, if (err) goto out2; - ni = ntfs_new_inode(sbi, ino, fa & FILE_ATTRIBUTE_DIRECTORY); + ni = ntfs_new_inode(sbi, ino, S_ISDIR(mode) ? RECORD_FLAG_DIR : 0); if (IS_ERR(ni)) { err = PTR_ERR(ni); ni = NULL; @@ -1437,8 +1437,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, root = Add2Ptr(attr, sizeof(I30_NAME) + SIZEOF_RESIDENT); memcpy(root, dir_root, offsetof(struct INDEX_ROOT, ihdr)); - root->ihdr.de_off = - cpu_to_le32(sizeof(struct INDEX_HDR)); // 0x10 + root->ihdr.de_off = cpu_to_le32(sizeof(struct INDEX_HDR)); root->ihdr.used = cpu_to_le32(sizeof(struct INDEX_HDR) + sizeof(struct NTFS_DE)); root->ihdr.total = root->ihdr.used; @@ -1605,7 +1604,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, inode->i_op = &ntfs_file_inode_operations; inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : - &ntfs_aops; + &ntfs_aops; init_rwsem(&ni->file.run_lock); } else { inode->i_op = &ntfs_special_inode_operations; diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c index 61e161c7c567..4aae598d6d88 100644 --- a/fs/ntfs3/lznt.c +++ b/fs/ntfs3/lznt.c @@ -297,7 +297,7 @@ next: struct lznt *get_lznt_ctx(int level) { struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash) : - sizeof(struct lznt), + sizeof(struct lznt), GFP_NOFS); if (r) @@ -393,8 +393,8 @@ ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc, } else { /* This chunk does not contain compressed data. */ unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end ? - unc_end - unc_chunk : - LZNT_CHUNK_SIZE; + unc_end - unc_chunk : + LZNT_CHUNK_SIZE; if (cmpr_chunk + sizeof(chunk_hdr) + unc_use > cmpr_end) { diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index 9736b1e4a0f6..70f8c859e0ad 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -109,8 +109,8 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir, { struct inode *inode; - inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode, - 0, NULL, 0, NULL); + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode, 0, + NULL, 0, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; } @@ -125,8 +125,8 @@ static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir, { struct inode *inode; - inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev, - NULL, 0, NULL); + inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev, NULL, 0, + NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; } @@ -199,8 +199,8 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir, u32 size = strlen(symname); struct inode *inode; - inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, - 0, symname, size, NULL); + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0, + symname, size, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; } @@ -213,8 +213,8 @@ static int ntfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, { struct inode *inode; - inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode, - 0, NULL, 0, NULL); + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode, 0, + NULL, 0, NULL); return IS_ERR(inode) ? PTR_ERR(inode) : 0; } @@ -422,19 +422,10 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, * fnd contains tree's path to insert to. * If fnd is not NULL then dir is locked. */ - - /* - * Unfortunately I don't know how to get here correct 'struct nameidata *nd' - * or 'struct mnt_idmap *idmap'. - * See atomic_open in fs/namei.c. - * This is why xfstest/633 failed. - * Looks like ntfs_atomic_open must accept 'struct mnt_idmap *idmap' as argument. - */ - - inode = ntfs_create_inode(&nop_mnt_idmap, dir, dentry, uni, mode, 0, - NULL, 0, fnd); + inode = ntfs_create_inode(mnt_idmap(file->f_path.mnt), dir, dentry, uni, + mode, 0, NULL, 0, fnd); err = IS_ERR(inode) ? PTR_ERR(inode) : - finish_open(file, dentry, ntfs_file_open); + finish_open(file, dentry, ntfs_file_open); dput(d); out2: diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 90151e56c122..98b76d1b09e7 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -95,11 +95,10 @@ enum RECORD_NUM { MFT_REC_BITMAP = 6, MFT_REC_BOOT = 7, MFT_REC_BADCLUST = 8, - //MFT_REC_QUOTA = 9, - MFT_REC_SECURE = 9, // NTFS 3.0 + MFT_REC_SECURE = 9, MFT_REC_UPCASE = 10, - MFT_REC_EXTEND = 11, // NTFS 3.0 - MFT_REC_RESERVED = 11, + MFT_REC_EXTEND = 11, + MFT_REC_RESERVED = 12, MFT_REC_FREE = 16, MFT_REC_USER = 24, }; @@ -109,7 +108,6 @@ enum ATTR_TYPE { ATTR_STD = cpu_to_le32(0x10), ATTR_LIST = cpu_to_le32(0x20), ATTR_NAME = cpu_to_le32(0x30), - // ATTR_VOLUME_VERSION on Nt4 ATTR_ID = cpu_to_le32(0x40), ATTR_SECURE = cpu_to_le32(0x50), ATTR_LABEL = cpu_to_le32(0x60), @@ -118,7 +116,6 @@ enum ATTR_TYPE { ATTR_ROOT = cpu_to_le32(0x90), ATTR_ALLOC = cpu_to_le32(0xA0), ATTR_BITMAP = cpu_to_le32(0xB0), - // ATTR_SYMLINK on Nt4 ATTR_REPARSE = cpu_to_le32(0xC0), ATTR_EA_INFO = cpu_to_le32(0xD0), ATTR_EA = cpu_to_le32(0xE0), @@ -144,6 +141,7 @@ enum FILE_ATTRIBUTE { FILE_ATTRIBUTE_ENCRYPTED = cpu_to_le32(0x00004000), FILE_ATTRIBUTE_VALID_FLAGS = cpu_to_le32(0x00007fb7), FILE_ATTRIBUTE_DIRECTORY = cpu_to_le32(0x10000000), + FILE_ATTRIBUTE_INDEX = cpu_to_le32(0x20000000) }; static_assert(sizeof(enum FILE_ATTRIBUTE) == 4); @@ -266,7 +264,7 @@ enum RECORD_FLAG { RECORD_FLAG_IN_USE = cpu_to_le16(0x0001), RECORD_FLAG_DIR = cpu_to_le16(0x0002), RECORD_FLAG_SYSTEM = cpu_to_le16(0x0004), - RECORD_FLAG_UNKNOWN = cpu_to_le16(0x0008), + RECORD_FLAG_INDEX = cpu_to_le16(0x0008), }; /* MFT Record structure. */ @@ -290,6 +288,15 @@ struct MFT_REC { #define MFTRECORD_FIXUP_OFFSET_1 offsetof(struct MFT_REC, res) #define MFTRECORD_FIXUP_OFFSET_3 offsetof(struct MFT_REC, fixups) +/* + * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_3 (0x30) + * to format new mft records with bigger header (as current ntfs.sys does) + * + * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_1 (0x2A) + * to format new mft records with smaller header (as old ntfs.sys did) + * Both variants are valid. + */ +#define MFTRECORD_FIXUP_OFFSET MFTRECORD_FIXUP_OFFSET_1 static_assert(MFTRECORD_FIXUP_OFFSET_1 == 0x2A); static_assert(MFTRECORD_FIXUP_OFFSET_3 == 0x30); @@ -331,18 +338,18 @@ struct ATTR_NONRESIDENT { __le64 svcn; // 0x10: Starting VCN of this segment. __le64 evcn; // 0x18: End VCN of this segment. __le16 run_off; // 0x20: Offset to packed runs. - // Unit of Compression size for this stream, expressed - // as a log of the cluster size. + // Unit of Compression size for this stream, expressed + // as a log of the cluster size. // - // 0 means file is not compressed - // 1, 2, 3, and 4 are potentially legal values if the - // stream is compressed, however the implementation - // may only choose to use 4, or possibly 3. Note - // that 4 means cluster size time 16. If convenient - // the implementation may wish to accept a - // reasonable range of legal values here (1-5?), - // even if the implementation only generates - // a smaller set of values itself. + // 0 means file is not compressed + // 1, 2, 3, and 4 are potentially legal values if the + // stream is compressed, however the implementation + // may only choose to use 4, or possibly 3. + // Note that 4 means cluster size time 16. + // If convenient the implementation may wish to accept a + // reasonable range of legal values here (1-5?), + // even if the implementation only generates + // a smaller set of values itself. u8 c_unit; // 0x22: u8 res1[5]; // 0x23: __le64 alloc_size; // 0x28: The allocated size of attribute in bytes. @@ -836,16 +843,22 @@ static_assert(sizeof(struct ATTR_DEF_ENTRY) == 0xa0); /* Object ID (0x40) */ struct OBJECT_ID { struct GUID ObjId; // 0x00: Unique Id assigned to file. - struct GUID BirthVolumeId; // 0x10: Birth Volume Id is the Object Id of the Volume on. - // which the Object Id was allocated. It never changes. - struct GUID BirthObjectId; // 0x20: Birth Object Id is the first Object Id that was - // ever assigned to this MFT Record. I.e. If the Object Id - // is changed for some reason, this field will reflect the - // original value of the Object Id. - struct GUID DomainId; // 0x30: Domain Id is currently unused but it is intended to be - // used in a network environment where the local machine is - // part of a Windows 2000 Domain. This may be used in a Windows - // 2000 Advanced Server managed domain. + + // Birth Volume Id is the Object Id of the Volume on. + // which the Object Id was allocated. It never changes. + struct GUID BirthVolumeId; //0x10: + + // Birth Object Id is the first Object Id that was + // ever assigned to this MFT Record. I.e. If the Object Id + // is changed for some reason, this field will reflect the + // original value of the Object Id. + struct GUID BirthObjectId; // 0x20: + + // Domain Id is currently unused but it is intended to be + // used in a network environment where the local machine is + // part of a Windows 2000 Domain. This may be used in a Windows + // 2000 Advanced Server managed domain. + struct GUID DomainId; // 0x30: }; static_assert(sizeof(struct OBJECT_ID) == 0x40); @@ -855,32 +868,35 @@ struct NTFS_DE_O { struct NTFS_DE de; struct GUID ObjId; // 0x10: Unique Id assigned to file. struct MFT_REF ref; // 0x20: MFT record number with this file. - struct GUID BirthVolumeId; // 0x28: Birth Volume Id is the Object Id of the Volume on - // which the Object Id was allocated. It never changes. - struct GUID BirthObjectId; // 0x38: Birth Object Id is the first Object Id that was - // ever assigned to this MFT Record. I.e. If the Object Id - // is changed for some reason, this field will reflect the - // original value of the Object Id. - // This field is valid if data_size == 0x48. - struct GUID BirthDomainId; // 0x48: Domain Id is currently unused but it is intended - // to be used in a network environment where the local - // machine is part of a Windows 2000 Domain. This may be - // used in a Windows 2000 Advanced Server managed domain. + + // Birth Volume Id is the Object Id of the Volume on + // which the Object Id was allocated. It never changes. + struct GUID BirthVolumeId; // 0x28: + + // Birth Object Id is the first Object Id that was + // ever assigned to this MFT Record. I.e. If the Object Id + // is changed for some reason, this field will reflect the + // original value of the Object Id. + // This field is valid if data_size == 0x48. + struct GUID BirthObjectId; // 0x38: + + // Domain Id is currently unused but it is intended + // to be used in a network environment where the local + // machine is part of a Windows 2000 Domain. This may be + // used in a Windows 2000 Advanced Server managed domain. + struct GUID BirthDomainId; // 0x48: }; static_assert(sizeof(struct NTFS_DE_O) == 0x58); -#define NTFS_OBJECT_ENTRY_DATA_SIZE1 \ - 0x38 // struct NTFS_DE_O.BirthDomainId is not used -#define NTFS_OBJECT_ENTRY_DATA_SIZE2 \ - 0x48 // struct NTFS_DE_O.BirthDomainId is used - /* Q Directory entry structure ( rule = 0x11 ) */ struct NTFS_DE_Q { struct NTFS_DE de; __le32 owner_id; // 0x10: Unique Id assigned to file + + /* here is 0x30 bytes of user quota. NOTE: 4 byte aligned! */ __le32 Version; // 0x14: 0x02 - __le32 flags2; // 0x18: Quota flags, see above + __le32 Flags; // 0x18: Quota flags, see above __le64 BytesUsed; // 0x1C: __le64 ChangeTime; // 0x24: __le64 WarningLimit; // 0x28: @@ -888,9 +904,9 @@ struct NTFS_DE_Q { __le64 ExceededTime; // 0x3C: // SID is placed here -}; // sizeof() = 0x44 +}__packed; // sizeof() = 0x44 -#define SIZEOF_NTFS_DE_Q 0x44 +static_assert(sizeof(struct NTFS_DE_Q) == 0x44); #define SecurityDescriptorsBlockSize 0x40000 // 256K #define SecurityDescriptorMaxSize 0x20000 // 128K @@ -912,7 +928,7 @@ struct SECURITY_HDR { */ } __packed; -#define SIZEOF_SECURITY_HDR 0x14 +static_assert(sizeof(struct SECURITY_HDR) == 0x14); /* SII Directory entry structure */ struct NTFS_DE_SII { @@ -921,7 +937,8 @@ struct NTFS_DE_SII { struct SECURITY_HDR sec_hdr; // 0x14: } __packed; -#define SIZEOF_SII_DIRENTRY 0x28 +static_assert(offsetof(struct NTFS_DE_SII, sec_hdr) == 0x14); +static_assert(sizeof(struct NTFS_DE_SII) == 0x28); /* SDH Directory entry structure */ struct NTFS_DE_SDH { @@ -1155,7 +1172,7 @@ struct REPARSE_DATA_BUFFER { #define FILE_NEED_EA 0x80 // See ntifs.h /* - *FILE_NEED_EA, indicates that the file to which the EA belongs cannot be + * FILE_NEED_EA, indicates that the file to which the EA belongs cannot be * interpreted without understanding the associated extended attributes. */ struct EA_INFO { diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index eb01f7e76479..629403ede6e5 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -53,6 +53,8 @@ enum utf16_endian; #define E_NTFS_NONRESIDENT 556 /* NTFS specific error code about punch hole. */ #define E_NTFS_NOTALIGNED 557 +/* NTFS specific error code when on-disk struct is corrupted. */ +#define E_NTFS_CORRUPT 558 /* sbi->flags */ @@ -274,7 +276,7 @@ struct ntfs_sb_info { __le16 flags; // Cached current VOLUME_INFO::flags, VOLUME_FLAG_DIRTY. u8 major_ver; u8 minor_ver; - char label[65]; + char label[256]; bool real_dirty; // Real fs state. } volume; @@ -284,7 +286,6 @@ struct ntfs_sb_info { struct ntfs_inode *ni; u32 next_id; u64 next_off; - __le32 def_security_id; } security; @@ -312,6 +313,7 @@ struct ntfs_sb_info { struct ntfs_mount_options *options; struct ratelimit_state msg_ratelimit; + struct proc_dir_entry *procdir; }; /* One MFT record(usually 1024 bytes), consists of attributes. */ @@ -465,8 +467,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, struct ATTR_LIST_ENTRY **new_le); bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le); bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, - const __le16 *name, size_t name_len, - const struct MFT_REF *ref); + const __le16 *name, u8 name_len, const struct MFT_REF *ref); int al_update(struct ntfs_inode *ni, int sync); static inline size_t al_aligned(size_t size) { @@ -525,7 +526,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, int ni_load_all_mi(struct ntfs_inode *ni); bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi); int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, size_t name_len, bool base_only, + const __le16 *name, u8 name_len, bool base_only, const __le16 *id); int ni_create_attr_list(struct ntfs_inode *ni); int ni_expand_list(struct ntfs_inode *ni); @@ -542,7 +543,7 @@ void ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr, struct mft_inode *mi, struct ATTR_LIST_ENTRY *le); int ni_delete_all(struct ntfs_inode *ni); struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, - const struct cpu_str *uni, + const struct le_str *uni, const struct MFT_REF *home, struct mft_inode **mi, struct ATTR_LIST_ENTRY **entry); @@ -629,7 +630,7 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run); int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u64 *lbo, u64 *bytes); struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST nRec, - bool dir); + enum RECORD_FLAG flag); extern const u8 s_default_security[0x50]; bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len); int ntfs_security_init(struct ntfs_sb_info *sbi); @@ -647,8 +648,10 @@ int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag, int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref); void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim); -int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim); +int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run, + bool trim); bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *name); +int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len); /* Globals from index.c */ int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit); @@ -706,8 +709,8 @@ int ntfs_sync_inode(struct inode *inode); int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); int inode_write_data(struct inode *inode, const void *data, size_t bytes); -struct inode *ntfs_create_inode(struct mnt_idmap *idmap, - struct inode *dir, struct dentry *dentry, +struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, struct ntfs_fnd *fnd); @@ -736,7 +739,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr); // TODO: id? struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, enum ATTR_TYPE type, const __le16 *name, - size_t name_len, const __le16 *id); + u8 name_len, const __le16 *id); static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec, struct ATTR_LIST_ENTRY *le) { @@ -856,12 +859,12 @@ unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase, /* globals from xattr.c */ #ifdef CONFIG_NTFS3_FS_POSIX_ACL -struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type); +struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, + int type); int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode, - struct inode *dir); + struct inode *dir); #else #define ntfs_get_acl NULL #define ntfs_set_acl NULL diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 2a281cead2bc..c12ebffc94da 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -124,7 +124,7 @@ int mi_read(struct mft_inode *mi, bool is_mft) struct rw_semaphore *rw_lock = NULL; if (is_mounted(sbi)) { - if (!is_mft) { + if (!is_mft && mft_ni) { rw_lock = &mft_ni->file.run_lock; down_read(rw_lock); } @@ -148,7 +148,7 @@ int mi_read(struct mft_inode *mi, bool is_mft) ni_lock(mft_ni); down_write(rw_lock); } - err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, &mft_ni->file.run, + err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, run, vbo >> sbi->cluster_bits); if (rw_lock) { up_write(rw_lock); @@ -180,6 +180,12 @@ ok: return 0; out: + if (err == -E_NTFS_CORRUPT) { + ntfs_err(sbi->sb, "mft corrupted"); + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + } + return err; } @@ -296,7 +302,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) */ struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, enum ATTR_TYPE type, const __le16 *name, - size_t name_len, const __le16 *id) + u8 name_len, const __le16 *id) { u32 type_in = le32_to_cpu(type); u32 atype; @@ -382,6 +388,8 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, rec->seq = cpu_to_le16(seq); rec->flags = RECORD_FLAG_IN_USE | flags; + if (MFTRECORD_FIXUP_OFFSET == MFTRECORD_FIXUP_OFFSET_3) + rec->mft_record = cpu_to_le32(rno); mi->dirty = true; diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index 47612d16c027..cb8cf0161177 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -434,8 +434,8 @@ requires_new_range: if (should_add_tail) { tail_lcn = r->lcn == SPARSE_LCN ? - SPARSE_LCN : - (r->lcn + Tovcn); + SPARSE_LCN : + (r->lcn + Tovcn); tail_vcn = r->vcn + Tovcn; tail_len = r->len - Tovcn; } diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5158dd31fd97..1a02072b6b0e 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -57,6 +57,7 @@ #include <linux/minmax.h> #include <linux/module.h> #include <linux/nls.h> +#include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/statfs.h> @@ -116,8 +117,8 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...) /* Use static allocated buffer, if possible. */ name = atomic_dec_and_test(&s_name_buf_cnt) ? - s_name_buf : - kmalloc(sizeof(s_name_buf), GFP_NOFS); + s_name_buf : + kmalloc(sizeof(s_name_buf), GFP_NOFS); if (name) { struct dentry *de = d_find_alias(inode); @@ -257,6 +258,7 @@ enum Opt { Opt_err, }; +// clang-format off static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_u32("uid", Opt_uid), fsparam_u32("gid", Opt_gid), @@ -277,9 +279,13 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag_no("nocase", Opt_nocase), {} }; +// clang-format on /* * Load nls table or if @nls is utf8 then return NULL. + * + * It is good idea to use here "const char *nls". + * But load_nls accepts "char*". */ static struct nls_table *ntfs_load_nls(char *nls) { @@ -436,6 +442,103 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) return 0; } +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *proc_info_root; + +/* + * ntfs3_volinfo: + * + * The content of /proc/fs/ntfs3/<dev>/volinfo + * + * ntfs3.1 + * cluster size + * number of clusters +*/ +static int ntfs3_volinfo(struct seq_file *m, void *o) +{ + struct super_block *sb = m->private; + struct ntfs_sb_info *sbi = sb->s_fs_info; + + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n", sbi->volume.major_ver, + sbi->volume.minor_ver, sbi->cluster_size, + sbi->used.bitmap.nbits); + + return 0; +} + +static int ntfs3_volinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, ntfs3_volinfo, pde_data(inode)); +} + +/* read /proc/fs/ntfs3/<dev>/label */ +static int ntfs3_label_show(struct seq_file *m, void *o) +{ + struct super_block *sb = m->private; + struct ntfs_sb_info *sbi = sb->s_fs_info; + + seq_printf(m, "%s\n", sbi->volume.label); + + return 0; +} + +/* write /proc/fs/ntfs3/<dev>/label */ +static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + int err; + struct super_block *sb = pde_data(file_inode(file)); + struct ntfs_sb_info *sbi = sb->s_fs_info; + ssize_t ret = count; + u8 *label = kmalloc(count, GFP_NOFS); + + if (!label) + return -ENOMEM; + + if (copy_from_user(label, buffer, ret)) { + ret = -EFAULT; + goto out; + } + while (ret > 0 && label[ret - 1] == '\n') + ret -= 1; + + err = ntfs_set_label(sbi, label, ret); + + if (err < 0) { + ntfs_err(sb, "failed (%d) to write label", err); + ret = err; + goto out; + } + + *ppos += count; + ret = count; +out: + kfree(label); + return ret; +} + +static int ntfs3_label_open(struct inode *inode, struct file *file) +{ + return single_open(file, ntfs3_label_show, pde_data(inode)); +} + +static const struct proc_ops ntfs3_volinfo_fops = { + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_open = ntfs3_volinfo_open, +}; + +static const struct proc_ops ntfs3_label_fops = { + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_open = ntfs3_label_open, + .proc_write = ntfs3_label_write, +}; + +#endif + static struct kmem_cache *ntfs_inode_cachep; static struct inode *ntfs_alloc_inode(struct super_block *sb) @@ -510,6 +613,16 @@ static void ntfs_put_super(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; +#ifdef CONFIG_PROC_FS + // Remove /proc/fs/ntfs3/.. + if (sbi->procdir) { + remove_proc_entry("label", sbi->procdir); + remove_proc_entry("volinfo", sbi->procdir); + remove_proc_entry(sb->s_id, proc_info_root); + sbi->procdir = NULL; + } +#endif + /* Mark rw ntfs as clear, if possible. */ ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); @@ -711,9 +824,16 @@ static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) /* * ntfs_init_from_boot - Init internal info from on-disk boot sector. + * + * NTFS mount begins from boot - special formatted 512 bytes. + * There are two boots: the first and the last 512 bytes of volume. + * The content of boot is not changed during ntfs life. + * + * NOTE: ntfs.sys checks only first (primary) boot. + * chkdsk checks both boots. */ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, - u64 dev_size) + u64 dev_size, struct NTFS_BOOT **boot2) { struct ntfs_sb_info *sbi = sb->s_fs_info; int err; @@ -724,6 +844,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, struct MFT_REC *rec; u16 fn, ao; u8 cluster_bits; + u32 boot_off = 0; + const char *hint = "Primary boot"; sbi->volume.blocks = dev_size >> PAGE_SHIFT; @@ -731,11 +853,12 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, if (!bh) return -EIO; +check_boot: err = -EINVAL; - boot = (struct NTFS_BOOT *)bh->b_data; + boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { - ntfs_err(sb, "Boot's signature is not NTFS."); + ntfs_err(sb, "%s signature is not NTFS.", hint); goto out; } @@ -748,14 +871,16 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, boot->bytes_per_sector[0]; if (boot_sector_size < SECTOR_SIZE || !is_power_of_2(boot_sector_size)) { - ntfs_err(sb, "Invalid bytes per sector %u.", boot_sector_size); + ntfs_err(sb, "%s: invalid bytes per sector %u.", hint, + boot_sector_size); goto out; } /* cluster size: 512, 1K, 2K, 4K, ... 2M */ sct_per_clst = true_sectors_per_clst(boot); if ((int)sct_per_clst < 0 || !is_power_of_2(sct_per_clst)) { - ntfs_err(sb, "Invalid sectors per cluster %u.", sct_per_clst); + ntfs_err(sb, "%s: invalid sectors per cluster %u.", hint, + sct_per_clst); goto out; } @@ -771,20 +896,20 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, if (mlcn * sct_per_clst >= sectors || mlcn2 * sct_per_clst >= sectors) { ntfs_err( sb, - "Start of MFT 0x%llx (0x%llx) is out of volume 0x%llx.", - mlcn, mlcn2, sectors); + "%s: start of MFT 0x%llx (0x%llx) is out of volume 0x%llx.", + hint, mlcn, mlcn2, sectors); goto out; } sbi->record_size = record_size = boot->record_size < 0 ? 1 << (-boot->record_size) : - (u32)boot->record_size << cluster_bits; + (u32)boot->record_size << cluster_bits; sbi->record_bits = blksize_bits(record_size); sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes /* Check MFT record size. */ if (record_size < SECTOR_SIZE || !is_power_of_2(record_size)) { - ntfs_err(sb, "Invalid bytes per MFT record %u (%d).", + ntfs_err(sb, "%s: invalid bytes per MFT record %u (%d).", hint, record_size, boot->record_size); goto out; } @@ -796,18 +921,18 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, } sbi->index_size = boot->index_size < 0 ? - 1u << (-boot->index_size) : - (u32)boot->index_size << cluster_bits; + 1u << (-boot->index_size) : + (u32)boot->index_size << cluster_bits; /* Check index record size. */ if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { - ntfs_err(sb, "Invalid bytes per index %u(%d).", sbi->index_size, - boot->index_size); + ntfs_err(sb, "%s: invalid bytes per index %u(%d).", hint, + sbi->index_size, boot->index_size); goto out; } if (sbi->index_size > MAXIMUM_BYTES_PER_INDEX) { - ntfs_err(sb, "Unsupported bytes per index %u.", + ntfs_err(sb, "%s: unsupported bytes per index %u.", hint, sbi->index_size); goto out; } @@ -834,7 +959,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, /* Compare boot's cluster and sector. */ if (sbi->cluster_size < boot_sector_size) { - ntfs_err(sb, "Invalid bytes per cluster (%u).", + ntfs_err(sb, "%s: invalid bytes per cluster (%u).", hint, sbi->cluster_size); goto out; } @@ -850,7 +975,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, } sbi->max_bytes_per_attr = - record_size - ALIGN(MFTRECORD_FIXUP_OFFSET_1, 8) - + record_size - ALIGN(MFTRECORD_FIXUP_OFFSET, 8) - ALIGN(((record_size >> SECTOR_SHIFT) * sizeof(short)), 8) - ALIGN(sizeof(enum ATTR_TYPE), 8); @@ -892,10 +1017,10 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sbi->new_rec = rec; rec->rhdr.sign = NTFS_FILE_SIGNATURE; - rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET_1); + rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET); fn = (sbi->record_size >> SECTOR_SHIFT) + 1; rec->rhdr.fix_num = cpu_to_le16(fn); - ao = ALIGN(MFTRECORD_FIXUP_OFFSET_1 + sizeof(short) * fn, 8); + ao = ALIGN(MFTRECORD_FIXUP_OFFSET + sizeof(short) * fn, 8); rec->attr_off = cpu_to_le16(ao); rec->used = cpu_to_le32(ao + ALIGN(sizeof(enum ATTR_TYPE), 8)); rec->total = cpu_to_le32(sbi->record_size); @@ -930,7 +1055,34 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, err = 0; + if (bh->b_blocknr && !sb_rdonly(sb)) { + /* + * Alternative boot is ok but primary is not ok. + * Do not update primary boot here 'cause it may be faked boot. + * Let ntfs to be mounted and update boot later. + */ + *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN); + } + out: + if (err == -EINVAL && !bh->b_blocknr && dev_size > PAGE_SHIFT) { + u32 block_size = min_t(u32, sector_size, PAGE_SIZE); + u64 lbo = dev_size - sizeof(*boot); + + /* + * Try alternative boot (last sector) + */ + brelse(bh); + + sb_set_blocksize(sb, block_size); + bh = ntfs_bread(sb, lbo >> blksize_bits(block_size)); + if (!bh) + return -EINVAL; + + boot_off = lbo & (block_size - 1); + hint = "Alternative boot"; + goto check_boot; + } brelse(bh); return err; @@ -955,6 +1107,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) struct ATTR_DEF_ENTRY *t; u16 *shared; struct MFT_REF ref; + bool ro = sb_rdonly(sb); + struct NTFS_BOOT *boot2 = NULL; ref.high = 0; @@ -985,7 +1139,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) /* Parse boot. */ err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev), - bdev_nr_bytes(bdev)); + bdev_nr_bytes(bdev), &boot2); if (err) goto out; @@ -1035,6 +1189,10 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) sbi->volume.minor_ver = info->minor_ver; sbi->volume.flags = info->flags; sbi->volume.ni = ni; + if (info->flags & VOLUME_FLAG_DIRTY) { + sbi->volume.real_dirty = true; + ntfs_info(sb, "It is recommened to use chkdsk."); + } /* Load $MFTMirr to estimate recs_mirr. */ ref.low = cpu_to_le32(MFT_REC_MIRR); @@ -1069,21 +1227,16 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) iput(inode); - if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) { - if (!sb_rdonly(sb)) { - ntfs_warn(sb, - "failed to replay log file. Can't mount rw!"); - err = -EINVAL; - goto out; - } - } else if (sbi->volume.flags & VOLUME_FLAG_DIRTY) { - if (!sb_rdonly(sb) && !options->force) { - ntfs_warn( - sb, - "volume is dirty and \"force\" flag is not set!"); - err = -EINVAL; - goto out; - } + if ((sbi->flags & NTFS_FLAGS_NEED_REPLAY) && !ro) { + ntfs_warn(sb, "failed to replay log file. Can't mount rw!"); + err = -EINVAL; + goto out; + } + + if ((sbi->volume.flags & VOLUME_FLAG_DIRTY) && !ro && !options->force) { + ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!"); + err = -EINVAL; + goto out; } /* Load $MFT. */ @@ -1173,7 +1326,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) bad_len += len; bad_frags += 1; - if (sb_rdonly(sb)) + if (ro) continue; if (wnd_set_used_safe(&sbi->used.bitmap, lcn, len, &tt) || tt) { @@ -1368,6 +1521,44 @@ load_root: goto put_inode_out; } + if (boot2) { + /* + * Alternative boot is ok but primary is not ok. + * Volume is recognized as NTFS. Update primary boot. + */ + struct buffer_head *bh0 = sb_getblk(sb, 0); + if (bh0) { + if (buffer_locked(bh0)) + __wait_on_buffer(bh0); + + lock_buffer(bh0); + memcpy(bh0->b_data, boot2, sizeof(*boot2)); + set_buffer_uptodate(bh0); + mark_buffer_dirty(bh0); + unlock_buffer(bh0); + if (!sync_dirty_buffer(bh0)) + ntfs_warn(sb, "primary boot is updated"); + put_bh(bh0); + } + + kfree(boot2); + } + +#ifdef CONFIG_PROC_FS + /* Create /proc/fs/ntfs3/.. */ + if (proc_info_root) { + struct proc_dir_entry *e = proc_mkdir(sb->s_id, proc_info_root); + static_assert((S_IRUGO | S_IWUSR) == 0644); + if (e) { + proc_create_data("volinfo", S_IRUGO, e, + &ntfs3_volinfo_fops, sb); + proc_create_data("label", S_IRUGO | S_IWUSR, e, + &ntfs3_label_fops, sb); + sbi->procdir = e; + } + } +#endif + return 0; put_inode_out: @@ -1380,6 +1571,7 @@ out: put_mount_options(sbi->options); put_ntfs(sbi); sb->s_fs_info = NULL; + kfree(boot2); return err; } @@ -1473,12 +1665,14 @@ static void ntfs_fs_free(struct fs_context *fc) put_mount_options(opts); } +// clang-format off static const struct fs_context_operations ntfs_context_ops = { .parse_param = ntfs_fs_parse_param, .get_tree = ntfs_fs_get_tree, .reconfigure = ntfs_fs_reconfigure, .free = ntfs_fs_free, }; +// clang-format on /* * ntfs_init_fs_context - Initialize sbi and opts @@ -1559,6 +1753,12 @@ static int __init init_ntfs_fs(void) if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); + +#ifdef CONFIG_PROC_FS + /* Create "/proc/fs/ntfs3" */ + proc_info_root = proc_mkdir("fs/ntfs3", NULL); +#endif + err = ntfs3_init_bitmap(); if (err) return err; @@ -1590,6 +1790,12 @@ static void __exit exit_ntfs_fs(void) kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); ntfs3_exit_bitmap(); + +#ifdef CONFIG_PROC_FS + if (proc_info_root) + remove_proc_entry("fs/ntfs3", NULL); +#endif + } MODULE_LICENSE("GPL"); diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index c3de60a4543f..023f314e8950 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -24,7 +24,7 @@ static inline size_t unpacked_ea_size(const struct EA_FULL *ea) { return ea->size ? le32_to_cpu(ea->size) : - ALIGN(struct_size(ea, name, + ALIGN(struct_size(ea, name, 1 + ea->name_len + le16_to_cpu(ea->elength)), 4); @@ -141,6 +141,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, memset(Add2Ptr(ea_p, size), 0, add_bytes); + err = -EINVAL; /* Check all attributes for consistency. */ for (off = 0; off < size; off += ea_size) { const struct EA_FULL *ef = Add2Ptr(ea_p, off); @@ -214,6 +215,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, ea = Add2Ptr(ea_all, off); ea_size = unpacked_ea_size(ea); + if (!ea->name_len) + break; + if (buffer) { if (ret + ea->name_len + 1 > bytes_per_buffer) { err = -ERANGE; @@ -524,8 +528,8 @@ out: /* * ntfs_get_acl - inode_operations::get_acl */ -struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, + int type) { struct inode *inode = d_inode(dentry); struct ntfs_inode *ni = ntfs_i(inode); @@ -592,8 +596,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap, case ACL_TYPE_ACCESS: /* Do not change i_mode if we are in init_acl */ if (acl && !init_acl) { - err = posix_acl_update_mode(idmap, inode, &mode, - &acl); + err = posix_acl_update_mode(idmap, inode, &mode, &acl); if (err) return err; } @@ -816,10 +819,9 @@ out: * ntfs_setxattr - inode_operations::setxattr */ static noinline int ntfs_setxattr(const struct xattr_handler *handler, - struct mnt_idmap *idmap, - struct dentry *de, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) + struct mnt_idmap *idmap, struct dentry *de, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) { int err = -EINVAL; struct ntfs_inode *ni = ntfs_i(inode); diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 8372b0e7fd15..b14d165632e7 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -27,6 +27,8 @@ #define ACPI_BAY_HID "LNXIOBAY" #define ACPI_DOCK_HID "LNXDOCK" #define ACPI_ECDT_HID "LNXEC" +/* SMBUS HID definition as supported by Microsoft Windows */ +#define ACPI_SMBUS_MS_HID "SMB0001" /* Quirk for broken IBM BIOSes */ #define ACPI_SMBUS_IBM_HID "SMBUSIBM" diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 45401f7a3548..0587354ba678 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -688,7 +688,7 @@ /* init and exit section handling */ #define INIT_DATA \ KEEP(*(SORT(___kentry+*))) \ - *(.init.data init.data.*) \ + *(.init.data .init.data.*) \ MEM_DISCARD(init.data*) \ KERNEL_CTORS() \ MCOUNT_REC() \ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 640f1c07c894..641dc4843987 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -707,6 +707,9 @@ extern int acpi_nvs_register(__u64 start, __u64 size); extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data); +const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids, + const struct acpi_device *adev); + const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const struct device *dev); @@ -922,6 +925,12 @@ static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), struct acpi_device_id; +static inline const struct acpi_device_id *acpi_match_acpi_device( + const struct acpi_device_id *ids, const struct acpi_device *adev) +{ + return NULL; +} + static inline const struct acpi_device_id *acpi_match_device( const struct acpi_device_id *ids, const struct device *dev) { diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7d6d73b78147..03644237e1ef 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -302,12 +302,10 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, #endif /* - * On 64-bit systems bitmaps are represented as u64 arrays internally. On LE32 - * machines the order of hi and lo parts of numbers match the bitmap structure. - * In both cases conversion is not needed when copying data from/to arrays of - * u64. + * On 64-bit systems bitmaps are represented as u64 arrays internally. So, + * the conversion is not needed when copying data from/to arrays of u64. */ -#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) +#if BITS_PER_LONG == 32 void bitmap_from_arr64(unsigned long *bitmap, const u64 *buf, unsigned int nbits); void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits); #else diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index bb0ee80526b2..8d07116caaf1 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -385,7 +385,7 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ - (node >= 0) && (node) < MAX_NUMNODES; \ + (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 82ec6af71a1d..62d318377379 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -984,7 +984,11 @@ struct usb_ssp_cap_descriptor { #define USB_SSP_MIN_RX_LANE_COUNT (0xf << 8) #define USB_SSP_MIN_TX_LANE_COUNT (0xf << 12) __le16 wReserved; - __le32 bmSublinkSpeedAttr[1]; /* list of sublink speed attrib entries */ + union { + __le32 legacy_padding; + /* list of sublink speed attrib entries */ + __DECLARE_FLEX_ARRAY(__le32, bmSublinkSpeedAttr); + }; #define USB_SSP_SUBLINK_SPEED_SSID (0xf) /* sublink speed ID */ #define USB_SSP_SUBLINK_SPEED_LSE (0x3 << 4) /* Lanespeed exponent */ #define USB_SSP_SUBLINK_SPEED_LSE_BPS 0 diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b04f52e7cd28..4529e264cb86 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8146,7 +8146,7 @@ static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = tracing_lseek, .release = tracing_err_log_release, }; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 5fe525f1b8cc..7ccc7a8e155b 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -31,7 +31,7 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node) /* Common ftrace options */ xbc_node_for_each_array_value(node, "options", anode, p) { - if (strscpy(buf, p, ARRAY_SIZE(buf)) == -E2BIG) { + if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) { pr_err("String is too long: %s\n", p); continue; } @@ -87,7 +87,7 @@ trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node) const char *p; xbc_node_for_each_array_value(node, "events", anode, p) { - if (strscpy(buf, p, ARRAY_SIZE(buf)) == -E2BIG) { + if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) { pr_err("String is too long: %s\n", p); continue; } @@ -486,7 +486,7 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, p = xbc_node_find_value(enode, "filter", NULL); if (p && *p != '\0') { - if (strscpy(buf, p, ARRAY_SIZE(buf)) == -E2BIG) + if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) pr_err("filter string is too long: %s\n", p); else if (apply_event_filter(file, buf) < 0) pr_err("Failed to apply filter: %s\n", buf); @@ -494,7 +494,7 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) { xbc_node_for_each_array_value(enode, "actions", anode, p) { - if (strscpy(buf, p, ARRAY_SIZE(buf)) == -E2BIG) + if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) pr_err("action string is too long: %s\n", p); else if (trigger_process_regex(file, buf) < 0) pr_err("Failed to apply an action: %s\n", p); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 781f061ec0fa..fbc89baf7de6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2739,7 +2739,7 @@ config STACKINIT_KUNIT_TEST config FORTIFY_KUNIT_TEST tristate "Test fortified str*() and mem*() function internals at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && FORTIFY_SOURCE default KUNIT_ALL_TESTS help Builds unit tests for checking internals of FORTIFY_SOURCE as used diff --git a/lib/bitmap.c b/lib/bitmap.c index 1c81413c51f8..ddb31015e38a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1495,7 +1495,7 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits) EXPORT_SYMBOL(bitmap_to_arr32); #endif -#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) +#if BITS_PER_LONG == 32 /** * bitmap_from_arr64 - copy the contents of u64 array of bits to bitmap * @bitmap: array of unsigned longs, the destination bitmap diff --git a/lib/cpumask.c b/lib/cpumask.c index e7258836b60b..de356f16773a 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL(cpumask_local_spread); static DEFINE_PER_CPU(int, distribute_cpu_mask_prev); /** - * Returns an arbitrary cpu within srcp1 & srcp2. + * cpumask_any_and_distribute - Return an arbitrary cpu within srcp1 & srcp2. * * Iterated calls using the same srcp1 and srcp2 will be distributed within * their intersection. diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index 524132f33cf0..c8c33cbaae9e 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -25,11 +25,6 @@ static const char array_of_10[] = "this is 10"; static const char *ptr_of_11 = "this is 11!"; static char array_unknown[] = "compiler thinks I might change"; -/* Handle being built without CONFIG_FORTIFY_SOURCE */ -#ifndef __compiletime_strlen -# define __compiletime_strlen __builtin_strlen -#endif - static void known_sizes_test(struct kunit *test) { KUNIT_EXPECT_EQ(test, __compiletime_strlen("88888888"), 8); @@ -312,14 +307,6 @@ DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc) } while (0) DEFINE_ALLOC_SIZE_TEST_PAIR(devm_kmalloc) -static int fortify_test_init(struct kunit *test) -{ - if (!IS_ENABLED(CONFIG_FORTIFY_SOURCE)) - kunit_skip(test, "Not built with CONFIG_FORTIFY_SOURCE=y"); - - return 0; -} - static struct kunit_case fortify_test_cases[] = { KUNIT_CASE(known_sizes_test), KUNIT_CASE(control_flow_split_test), @@ -336,7 +323,6 @@ static struct kunit_case fortify_test_cases[] = { static struct kunit_suite fortify_test_suite = { .name = "fortify", - .init = fortify_test_init, .test_cases = fortify_test_cases, }; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index a8005ad3bd58..187f5b2db4cf 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -470,6 +470,7 @@ static void __init test_bitmap_parselist(void) if (err != ptest.errno) { pr_err("parselist: %d: input is %s, errno is %d, expected %d\n", i, ptest.in, err, ptest.errno); + failed_tests++; continue; } @@ -478,6 +479,7 @@ static void __init test_bitmap_parselist(void) pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n", i, ptest.in, bmap[0], *ptest.expected); + failed_tests++; continue; } @@ -511,11 +513,13 @@ static void __init test_bitmap_printlist(void) if (ret != slen + 1) { pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen); + failed_tests++; goto out; } if (strncmp(buf, expected, slen)) { pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected); + failed_tests++; goto out; } @@ -583,6 +587,7 @@ static void __init test_bitmap_parse(void) if (err != test.errno) { pr_err("parse: %d: input is %s, errno is %d, expected %d\n", i, test.in, err, test.errno); + failed_tests++; continue; } @@ -591,6 +596,7 @@ static void __init test_bitmap_parse(void) pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n", i, test.in, bmap[0], *test.expected); + failed_tests++; continue; } @@ -615,10 +621,12 @@ static void __init test_bitmap_arr32(void) next_bit = find_next_bit(bmap2, round_up(nbits, BITS_PER_LONG), nbits); - if (next_bit < round_up(nbits, BITS_PER_LONG)) + if (next_bit < round_up(nbits, BITS_PER_LONG)) { pr_err("bitmap_copy_arr32(nbits == %d:" " tail is not safely cleared: %d\n", nbits, next_bit); + failed_tests++; + } if (nbits < EXP1_IN_BITS - 32) expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)], @@ -641,15 +649,19 @@ static void __init test_bitmap_arr64(void) expect_eq_bitmap(bmap2, exp1, nbits); next_bit = find_next_bit(bmap2, round_up(nbits, BITS_PER_LONG), nbits); - if (next_bit < round_up(nbits, BITS_PER_LONG)) + if (next_bit < round_up(nbits, BITS_PER_LONG)) { pr_err("bitmap_copy_arr64(nbits == %d:" " tail is not safely cleared: %d\n", nbits, next_bit); + failed_tests++; + } if ((nbits % 64) && - (arr[(nbits - 1) / 64] & ~GENMASK_ULL((nbits - 1) % 64, 0))) + (arr[(nbits - 1) / 64] & ~GENMASK_ULL((nbits - 1) % 64, 0))) { pr_err("bitmap_to_arr64(nbits == %d): tail is not safely cleared: 0x%016llx (must be 0x%016llx)\n", nbits, arr[(nbits - 1) / 64], GENMASK_ULL((nbits - 1) % 64, 0)); + failed_tests++; + } if (nbits < EXP1_IN_BITS - 64) expect_eq_uint(arr[DIV_ROUND_UP(nbits, 64)], 0xa5a5a5a5); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 8c392fb75049..d0116c6939dc 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1319,6 +1319,9 @@ sub dump_enum($$) { my $file = shift; my $members; + # ignore members marked private: + $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; + $x =~ s/\/\*\s*private:.*}/}/gosi; $x =~ s@/\*.*?\*/@@gos; # strip comments. # strip #define macros inside enums diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index b498ed302461..6724e2ff6da8 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -28,15 +28,15 @@ unsigned int aa_hash_size(void) char *aa_calc_hash(void *data, size_t len) { SHASH_DESC_ON_STACK(desc, apparmor_tfm); - char *hash = NULL; - int error = -ENOMEM; + char *hash; + int error; if (!apparmor_tfm) return NULL; hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!hash) - goto fail; + return ERR_PTR(-ENOMEM); desc->tfm = apparmor_tfm; @@ -62,7 +62,7 @@ int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { SHASH_DESC_ON_STACK(desc, apparmor_tfm); - int error = -ENOMEM; + int error; __le32 le32_version = cpu_to_le32(version); if (!aa_g_hash_policy) @@ -73,7 +73,7 @@ int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!profile->hash) - goto fail; + return -ENOMEM; desc->tfm = apparmor_tfm; diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 9119ddda6217..698b124e649f 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -161,6 +161,7 @@ static int path_name(const char *op, struct aa_label *label, return 0; } +struct aa_perms default_perms = {}; /** * aa_lookup_fperms - convert dfa compressed perms to internal perms * @dfa: dfa to lookup perms for (NOT NULL) @@ -171,7 +172,6 @@ static int path_name(const char *op, struct aa_label *label, * * Returns: a pointer to a file permission set */ -struct aa_perms default_perms = {}; struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules, aa_state_t state, struct path_cond *cond) { diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index f431251ffb91..c9463bd0307d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -46,7 +46,7 @@ int apparmor_initialized; union aa_buffer { struct list_head list; - char buffer[1]; + DECLARE_FLEX_ARRAY(char, buffer); }; #define RESERVE_COUNT 2 @@ -1647,7 +1647,7 @@ retry: list_del(&aa_buf->list); buffer_count--; spin_unlock(&aa_buffers_lock); - return &aa_buf->buffer[0]; + return aa_buf->buffer; } if (in_atomic) { /* @@ -1670,7 +1670,7 @@ retry: pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n"); return NULL; } - return &aa_buf->buffer[0]; + return aa_buf->buffer; } void aa_put_buffer(char *buf) @@ -1747,7 +1747,7 @@ static int __init alloc_buffers(void) destroy_buffers(); return -ENOMEM; } - aa_put_buffer(&aa_buf->buffer[0]); + aa_put_buffer(aa_buf->buffer); } return 0; } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 51e8184e0fec..b38f7b2a5e1d 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -430,11 +430,9 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns, * @hname: hierarchical profile name to find parent of (NOT NULL) * @gfp: type of allocation. * - * Returns: NULL on error, parent profile on success - * * Requires: ns mutex lock held * - * Returns: unrefcounted parent policy or NULL if error creating + * Return: unrefcounted parent policy on success or %NULL if error creating * place holder profiles. */ static struct aa_policy *__create_missing_ancestors(struct aa_ns *ns, @@ -591,7 +589,15 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, profile->label.flags |= FLAG_NULL; rules = list_first_entry(&profile->rules, typeof(*rules), list); rules->file.dfa = aa_get_dfa(nulldfa); + rules->file.perms = kcalloc(2, sizeof(struct aa_perms), GFP_KERNEL); + if (!rules->file.perms) + goto fail; + rules->file.size = 2; rules->policy.dfa = aa_get_dfa(nulldfa); + rules->policy.perms = kcalloc(2, sizeof(struct aa_perms), GFP_KERNEL); + if (!rules->policy.perms) + goto fail; + rules->policy.size = 2; if (parent) { profile->path_flags = parent->path_flags; @@ -602,6 +608,11 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name, } return profile; + +fail: + aa_free_profile(profile); + + return NULL; } /** @@ -828,7 +839,7 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) /** * aa_may_manage_policy - can the current task manage policy * @label: label to check if it can manage policy - * @op: the policy manipulation operation being done + * @mask: contains the policy manipulation operation being done * * Returns: 0 if the task is allowed to manipulate policy else error */ @@ -883,7 +894,6 @@ static struct aa_profile *__list_lookup_parent(struct list_head *lh, * __replace_profile - replace @old with @new on a list * @old: profile to be replaced (NOT NULL) * @new: profile to replace @old with (NOT NULL) - * @share_proxy: transfer @old->proxy to @new * * Will duplicate and refcount elements that @new inherits from @old * and will inherit @old children. diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index cc89d1e88fb7..0cb02da8a319 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -146,7 +146,8 @@ static struct aa_perms compute_fperms_other(struct aa_dfa *dfa, * * Returns: remapped perm table */ -static struct aa_perms *compute_fperms(struct aa_dfa *dfa) +static struct aa_perms *compute_fperms(struct aa_dfa *dfa, + u32 *size) { aa_state_t state; unsigned int state_count; @@ -159,6 +160,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL); if (!table) return NULL; + *size = state_count * 2; for (state = 0; state < state_count; state++) { table[state * 2] = compute_fperms_user(dfa, state); @@ -168,7 +170,8 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) return table; } -static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) +static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch, + u32 *size) { struct aa_perms *perms; int state; @@ -179,6 +182,9 @@ static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch) state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen; /* DFAs are restricted from having a state_count of less than 2 */ perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); + if (!perms) + return NULL; + *size = state_count; /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) @@ -239,7 +245,8 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa, return perms; } -static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version) +static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version, + u32 *size) { unsigned int state; unsigned int state_count; @@ -252,6 +259,7 @@ static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version) table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL); if (!table) return NULL; + *size = state_count; /* zero init so skip the trap state (state == 0) */ for (state = 1; state < state_count; state++) @@ -286,7 +294,7 @@ static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor) /* TODO: merge different dfa mappings into single map_policy fn */ int aa_compat_map_xmatch(struct aa_policydb *policy) { - policy->perms = compute_xmatch_perms(policy->dfa); + policy->perms = compute_xmatch_perms(policy->dfa, &policy->size); if (!policy->perms) return -ENOMEM; @@ -297,7 +305,7 @@ int aa_compat_map_xmatch(struct aa_policydb *policy) int aa_compat_map_policy(struct aa_policydb *policy, u32 version) { - policy->perms = compute_perms(policy->dfa, version); + policy->perms = compute_perms(policy->dfa, version, &policy->size); if (!policy->perms) return -ENOMEM; @@ -308,7 +316,7 @@ int aa_compat_map_policy(struct aa_policydb *policy, u32 version) int aa_compat_map_file(struct aa_policydb *policy) { - policy->perms = compute_fperms(policy->dfa); + policy->perms = compute_fperms(policy->dfa, &policy->size); if (!policy->perms) return -ENOMEM; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index cf2ceec40b28..694fb7a09962 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -448,7 +448,7 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e, int flags) /** * unpack_trans_table - unpack a profile transition table * @e: serialized data extent information (NOT NULL) - * @table: str table to unpack to (NOT NULL) + * @strs: str table to unpack to (NOT NULL) * * Returns: true if table successfully unpacked or not present */ @@ -860,10 +860,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } profile->attach.xmatch_len = tmp; profile->attach.xmatch.start[AA_CLASS_XMATCH] = DFA_START; - error = aa_compat_map_xmatch(&profile->attach.xmatch); - if (error) { - info = "failed to convert xmatch permission table"; - goto fail; + if (!profile->attach.xmatch.perms) { + error = aa_compat_map_xmatch(&profile->attach.xmatch); + if (error) { + info = "failed to convert xmatch permission table"; + goto fail; + } } } @@ -983,31 +985,54 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) AA_CLASS_FILE); if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; - error = aa_compat_map_policy(&rules->policy, e->version); - if (error) { - info = "failed to remap policydb permission table"; - goto fail; + if (!rules->policy.perms) { + error = aa_compat_map_policy(&rules->policy, + e->version); + if (error) { + info = "failed to remap policydb permission table"; + goto fail; + } } - } else + } else { rules->policy.dfa = aa_get_dfa(nulldfa); - + rules->policy.perms = kcalloc(2, sizeof(struct aa_perms), + GFP_KERNEL); + if (!rules->policy.perms) + goto fail; + rules->policy.size = 2; + } /* get file rules */ error = unpack_pdb(e, &rules->file, false, true, &info); if (error) { goto fail; } else if (rules->file.dfa) { - error = aa_compat_map_file(&rules->file); - if (error) { - info = "failed to remap file permission table"; - goto fail; + if (!rules->file.perms) { + error = aa_compat_map_file(&rules->file); + if (error) { + info = "failed to remap file permission table"; + goto fail; + } } } else if (rules->policy.dfa && rules->policy.start[AA_CLASS_FILE]) { rules->file.dfa = aa_get_dfa(rules->policy.dfa); rules->file.start[AA_CLASS_FILE] = rules->policy.start[AA_CLASS_FILE]; - } else + rules->file.perms = kcalloc(rules->policy.size, + sizeof(struct aa_perms), + GFP_KERNEL); + if (!rules->file.perms) + goto fail; + memcpy(rules->file.perms, rules->policy.perms, + rules->policy.size * sizeof(struct aa_perms)); + rules->file.size = rules->policy.size; + } else { rules->file.dfa = aa_get_dfa(nulldfa); - + rules->file.perms = kcalloc(2, sizeof(struct aa_perms), + GFP_KERNEL); + if (!rules->file.perms) + goto fail; + rules->file.size = 2; + } error = -EPROTO; if (aa_unpack_nameX(e, AA_STRUCT, "data")) { info = "out of memory"; @@ -1046,8 +1071,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - rhashtable_insert_fast(profile->data, &data->head, - profile->data->p); + if (rhashtable_insert_fast(profile->data, &data->head, + profile->data->p)) { + kfree_sensitive(data->key); + kfree_sensitive(data); + info = "failed to insert data to table"; + goto fail; + } } if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) { @@ -1134,22 +1164,16 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) return 0; } -static bool verify_xindex(int xindex, int table_size) -{ - int index, xtype; - xtype = xindex & AA_X_TYPE_MASK; - index = xindex & AA_X_INDEX_MASK; - if (xtype == AA_X_TABLE && index >= table_size) - return false; - return true; -} - -/* verify dfa xindexes are in range of transition tables */ -static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size) +/** + * verify_dfa_accept_index - verify accept indexes are in range of perms table + * @dfa: the dfa to check accept indexes are in range + * table_size: the permission table size the indexes should be within + */ +static bool verify_dfa_accept_index(struct aa_dfa *dfa, int table_size) { int i; for (i = 0; i < dfa->tables[YYTD_ID_ACCEPT]->td_lolen; i++) { - if (!verify_xindex(ACCEPT_TABLE(dfa)[i], table_size)) + if (ACCEPT_TABLE(dfa)[i] >= table_size) return false; } return true; @@ -1186,11 +1210,13 @@ static bool verify_perms(struct aa_policydb *pdb) if (!verify_perm(&pdb->perms[i])) return false; /* verify indexes into str table */ - if (pdb->perms[i].xindex >= pdb->trans.size) + if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE && + (pdb->perms[i].xindex & AA_X_INDEX_MASK) >= pdb->trans.size) return false; - if (pdb->perms[i].tag >= pdb->trans.size) + if (pdb->perms[i].tag && pdb->perms[i].tag >= pdb->trans.size) return false; - if (pdb->perms[i].label >= pdb->trans.size) + if (pdb->perms[i].label && + pdb->perms[i].label >= pdb->trans.size) return false; } @@ -1212,10 +1238,10 @@ static int verify_profile(struct aa_profile *profile) if (!rules) return 0; - if ((rules->file.dfa && !verify_dfa_xindex(rules->file.dfa, - rules->file.trans.size)) || + if ((rules->file.dfa && !verify_dfa_accept_index(rules->file.dfa, + rules->file.size)) || (rules->policy.dfa && - !verify_dfa_xindex(rules->policy.dfa, rules->policy.trans.size))) { + !verify_dfa_accept_index(rules->policy.dfa, rules->policy.size))) { audit_iface(profile, NULL, NULL, "Unpack: Invalid named transition", NULL, -EPROTO); return -EPROTO; diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index e1bfdab524b7..5c9bde25e56d 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -69,31 +69,30 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *buf = AA_NAME; *(buf + 1) = strlen(TEST_STRING_NAME) + 1; - strcpy(buf + 3, TEST_STRING_NAME); + strscpy(buf + 3, TEST_STRING_NAME, e->end - (void *)(buf + 3)); buf = e->start + TEST_STRING_BUF_OFFSET; *buf = AA_STRING; *(buf + 1) = strlen(TEST_STRING_DATA) + 1; - strcpy(buf + 3, TEST_STRING_DATA); - + strscpy(buf + 3, TEST_STRING_DATA, e->end - (void *)(buf + 3)); buf = e->start + TEST_NAMED_U32_BUF_OFFSET; *buf = AA_NAME; *(buf + 1) = strlen(TEST_U32_NAME) + 1; - strcpy(buf + 3, TEST_U32_NAME); + strscpy(buf + 3, TEST_U32_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U32_NAME) + 1) = AA_U32; *((u32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = TEST_U32_DATA; buf = e->start + TEST_NAMED_U64_BUF_OFFSET; *buf = AA_NAME; *(buf + 1) = strlen(TEST_U64_NAME) + 1; - strcpy(buf + 3, TEST_U64_NAME); + strscpy(buf + 3, TEST_U64_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U64_NAME) + 1) = AA_U64; *((u64 *)(buf + 3 + strlen(TEST_U64_NAME) + 2)) = TEST_U64_DATA; buf = e->start + TEST_NAMED_BLOB_BUF_OFFSET; *buf = AA_NAME; *(buf + 1) = strlen(TEST_BLOB_NAME) + 1; - strcpy(buf + 3, TEST_BLOB_NAME); + strscpy(buf + 3, TEST_BLOB_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_BLOB_NAME) + 1) = AA_BLOB; *(buf + 3 + strlen(TEST_BLOB_NAME) + 2) = TEST_BLOB_DATA_SIZE; memcpy(buf + 3 + strlen(TEST_BLOB_NAME) + 6, @@ -102,7 +101,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, buf = e->start + TEST_NAMED_ARRAY_BUF_OFFSET; *buf = AA_NAME; *(buf + 1) = strlen(TEST_ARRAY_NAME) + 1; - strcpy(buf + 3, TEST_ARRAY_NAME); + strscpy(buf + 3, TEST_ARRAY_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_ARRAY_NAME) + 1) = AA_ARRAY; *((u16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = TEST_ARRAY_SIZE; diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 24a0e23f1b2b..83d3d1e6d9dc 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -53,8 +53,7 @@ void aa_secid_update(u32 secid, struct aa_label *label) xa_unlock_irqrestore(&aa_secids, flags); } -/** - * +/* * see label for inverse aa_label_to_secid */ struct aa_label *aa_secid_to_label(u32 secid) diff --git a/sound/core/jack.c b/sound/core/jack.c index 88493cc31914..03d155ed362b 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -654,6 +654,7 @@ void snd_jack_report(struct snd_jack *jack, int status) struct snd_jack_kctl *jack_kctl; unsigned int mask_bits = 0; #ifdef CONFIG_SND_JACK_INPUT_DEV + struct input_dev *idev; int i; #endif @@ -670,17 +671,15 @@ void snd_jack_report(struct snd_jack *jack, int status) status & jack_kctl->mask_bits); #ifdef CONFIG_SND_JACK_INPUT_DEV - mutex_lock(&jack->input_dev_lock); - if (!jack->input_dev) { - mutex_unlock(&jack->input_dev_lock); + idev = input_get_device(jack->input_dev); + if (!idev) return; - } for (i = 0; i < ARRAY_SIZE(jack->key); i++) { int testbit = ((SND_JACK_BTN_0 >> i) & ~mask_bits); if (jack->type & testbit) - input_report_key(jack->input_dev, jack->key[i], + input_report_key(idev, jack->key[i], status & testbit); } @@ -688,13 +687,13 @@ void snd_jack_report(struct snd_jack *jack, int status) int testbit = ((1 << i) & ~mask_bits); if (jack->type & testbit) - input_report_switch(jack->input_dev, + input_report_switch(idev, jack_switch_types[i], status & testbit); } - input_sync(jack->input_dev); - mutex_unlock(&jack->input_dev_lock); + input_sync(idev); + input_put_device(idev); #endif /* CONFIG_SND_JACK_INPUT_DEV */ } EXPORT_SYMBOL(snd_jack_report); diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index 7bde7fb64011..a0b951471699 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -31,15 +31,41 @@ static unsigned long max_alloc_per_card = 32UL * 1024UL * 1024UL; module_param(max_alloc_per_card, ulong, 0644); MODULE_PARM_DESC(max_alloc_per_card, "Max total allocation bytes per card."); +static void __update_allocated_size(struct snd_card *card, ssize_t bytes) +{ + card->total_pcm_alloc_bytes += bytes; +} + +static void update_allocated_size(struct snd_card *card, ssize_t bytes) +{ + mutex_lock(&card->memory_mutex); + __update_allocated_size(card, bytes); + mutex_unlock(&card->memory_mutex); +} + +static void decrease_allocated_size(struct snd_card *card, size_t bytes) +{ + mutex_lock(&card->memory_mutex); + WARN_ON(card->total_pcm_alloc_bytes < bytes); + __update_allocated_size(card, -(ssize_t)bytes); + mutex_unlock(&card->memory_mutex); +} + static int do_alloc_pages(struct snd_card *card, int type, struct device *dev, int str, size_t size, struct snd_dma_buffer *dmab) { enum dma_data_direction dir; int err; + /* check and reserve the requested size */ + mutex_lock(&card->memory_mutex); if (max_alloc_per_card && - card->total_pcm_alloc_bytes + size > max_alloc_per_card) + card->total_pcm_alloc_bytes + size > max_alloc_per_card) { + mutex_unlock(&card->memory_mutex); return -ENOMEM; + } + __update_allocated_size(card, size); + mutex_unlock(&card->memory_mutex); if (str == SNDRV_PCM_STREAM_PLAYBACK) dir = DMA_TO_DEVICE; @@ -47,9 +73,14 @@ static int do_alloc_pages(struct snd_card *card, int type, struct device *dev, dir = DMA_FROM_DEVICE; err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab); if (!err) { - mutex_lock(&card->memory_mutex); - card->total_pcm_alloc_bytes += dmab->bytes; - mutex_unlock(&card->memory_mutex); + /* the actual allocation size might be bigger than requested, + * and we need to correct the account + */ + if (dmab->bytes != size) + update_allocated_size(card, dmab->bytes - size); + } else { + /* take back on allocation failure */ + decrease_allocated_size(card, size); } return err; } @@ -58,10 +89,7 @@ static void do_free_pages(struct snd_card *card, struct snd_dma_buffer *dmab) { if (!dmab->area) return; - mutex_lock(&card->memory_mutex); - WARN_ON(card->total_pcm_alloc_bytes < dmab->bytes); - card->total_pcm_alloc_bytes -= dmab->bytes; - mutex_unlock(&card->memory_mutex); + decrease_allocated_size(card, dmab->bytes); snd_dma_free_pages(dmab); dmab->area = NULL; } diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index eb1d86ff6166..7cc84e137999 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -714,7 +714,7 @@ static int system_2p_ev_to_ump_midi1(const struct snd_seq_event *event, { data->system.status = status; data->system.parm1 = (event->data.control.value >> 7) & 0x7f; - data->system.parm1 = event->data.control.value & 0x7f; + data->system.parm2 = event->data.control.value & 0x7f; return 1; } diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c index fb61df424a87..de04799fdb69 100644 --- a/sound/core/ump_convert.c +++ b/sound/core/ump_convert.c @@ -73,7 +73,7 @@ static int cvt_ump_system_to_legacy(u32 data, unsigned char *buf) case UMP_SYSTEM_STATUS_MIDI_TIME_CODE: case UMP_SYSTEM_STATUS_SONG_SELECT: buf[1] = (data >> 8) & 0x7f; - return 1; + return 2; case UMP_SYSTEM_STATUS_SONG_POSITION: buf[1] = (data >> 8) & 0x7f; buf[2] = data & 0x7f; diff --git a/sound/firewire/fireface/ff.c b/sound/firewire/fireface/ff.c index 82241058ea14..6e84e4787259 100644 --- a/sound/firewire/fireface/ff.c +++ b/sound/firewire/fireface/ff.c @@ -16,7 +16,7 @@ MODULE_LICENSE("GPL"); static void name_card(struct snd_ff *ff) { struct fw_device *fw_dev = fw_parent_device(ff->unit); - const char *const names[] = { + static const char *const names[] = { [SND_FF_UNIT_VERSION_FF800] = "Fireface800", [SND_FF_UNIT_VERSION_FF400] = "Fireface400", [SND_FF_UNIT_VERSION_UFX] = "FirefaceUFX", diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 9523479fa94a..63d40f1a914f 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -44,7 +44,7 @@ struct compat_info { static bool detect_loud_models(struct fw_unit *unit) { - const char *const models[] = { + static const char *const models[] = { "Onyxi", "Onyx-i", "Onyx 1640i", diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c index f258cb3a6895..9b1bcabd8414 100644 --- a/sound/hda/hdac_regmap.c +++ b/sound/hda/hdac_regmap.c @@ -596,10 +596,9 @@ EXPORT_SYMBOL_GPL(snd_hdac_regmap_update_raw_once); */ void snd_hdac_regmap_sync(struct hdac_device *codec) { - if (codec->regmap) { - mutex_lock(&codec->regmap_lock); + mutex_lock(&codec->regmap_lock); + if (codec->regmap) regcache_sync(codec->regmap); - mutex_unlock(&codec->regmap_lock); - } + mutex_unlock(&codec->regmap_lock); } EXPORT_SYMBOL_GPL(snd_hdac_regmap_sync); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index afe8253f9a4f..e2f8b608de82 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5883,7 +5883,7 @@ static void alc_fixup_headset_mode_alc255_no_hp_mic(struct hda_codec *codec, struct alc_spec *spec = codec->spec; spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; alc255_set_default_jack_type(codec); - } + } else alc_fixup_headset_mode(codec, fix, action); } @@ -7068,6 +7068,9 @@ enum { ALC285_FIXUP_SPEAKER2_TO_DAC1, ALC285_FIXUP_ASUS_SPEAKER2_TO_DAC1, ALC285_FIXUP_ASUS_HEADSET_MIC, + ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS, + ALC285_FIXUP_ASUS_I2C_SPEAKER2_TO_DAC1, + ALC285_FIXUP_ASUS_I2C_HEADSET_MIC, ALC280_FIXUP_HP_HEADSET_MIC, ALC221_FIXUP_HP_FRONT_MIC, ALC292_FIXUP_TPT460, @@ -8058,6 +8061,31 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_ASUS_SPEAKER2_TO_DAC1 }, + [ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90170120 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_HEADSET_MIC + }, + [ALC285_FIXUP_ASUS_I2C_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2 + }, + [ALC285_FIXUP_ASUS_I2C_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, + { 0x1b, 0x03a11c30 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_I2C_SPEAKER2_TO_DAC1 + }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -9573,10 +9601,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), + SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650P", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), + SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), @@ -9602,7 +9633,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), - SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), @@ -9731,6 +9763,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11286,6 +11319,7 @@ enum { ALC897_FIXUP_HP_HSMIC_VERB, ALC897_FIXUP_LENOVO_HEADSET_MODE, ALC897_FIXUP_HEADSET_MIC_PIN2, + ALC897_FIXUP_UNIS_H3C_X500S, }; static const struct hda_fixup alc662_fixups[] = { @@ -11725,6 +11759,13 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MODE }, + [ALC897_FIXUP_UNIS_H3C_X500S] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x14, AC_VERB_SET_EAPD_BTLENABLE, 0 }, + {} + }, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -11886,6 +11927,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = { {.id = ALC662_FIXUP_USI_HEADSET_MODE, .name = "usi-headset"}, {.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, {.id = ALC669_FIXUP_ACER_ASPIRE_ETHOS, .name = "aspire-ethos"}, + {.id = ALC897_FIXUP_UNIS_H3C_X500S, .name = "unis-h3c-x500s"}, {} }; diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index e56ae54805a8..1478068ad5dd 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -669,7 +669,7 @@ llvm.*:: "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" + "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" llvm.clang-opt:: Options passed to clang. diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 6e5ba3cd2b72..30eea576721f 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -36,6 +36,9 @@ COMMON OPTIONS --input=<file>:: Input file name. (default: perf.data unless stdin is a fifo) +--output=<file>:: + Output file name for perf lock contention and report. + -v:: --verbose:: Be more verbose (show symbol address, etc). @@ -200,6 +203,11 @@ CONTENTION OPTIONS Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock' and 'irq_enter_rcu'. +-x:: +--field-separator=<SEP>:: + Show results using a CSV-style output to make it easy to import directly + into spreadsheets. Columns are separated by the string specified in SEP. + SEE ALSO -------- diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9c5aa14a44cf..0609c19caabd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -315,6 +315,9 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt +FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl +FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl + CORE_CFLAGS += -fno-omit-frame-pointer CORE_CFLAGS += -ggdb3 CORE_CFLAGS += -funwind-tables @@ -344,8 +347,8 @@ ifneq ($(TCMALLOC),) endif ifeq ($(FEATURES_DUMP),) -# We will display at the end of this Makefile.config, using $(call feature_display_entries), -# as we may retry some feature detection here. +# We will display at the end of this Makefile.config, using $(call feature_display_entries) +# As we may retry some feature detection here, see the disassembler-four-args case, for instance FEATURE_DISPLAY_DEFERRED := 1 include $(srctree)/tools/build/Makefile.feature else @@ -680,6 +683,10 @@ ifdef BUILD_BPF_SKEL CFLAGS += -DHAVE_BPF_SKEL endif +ifndef GEN_VMLINUX_H + VMLINUX_H=$(src-perf)/util/bpf_skel/vmlinux/vmlinux.h +endif + dwarf-post-unwind := 1 dwarf-post-unwind-text := BUG @@ -903,9 +910,13 @@ ifdef BUILD_NONDISTRO ifeq ($(feature-libbfd-liberty), 1) EXTLIBS += -lbfd -lopcodes -liberty + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl + FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl else ifeq ($(feature-libbfd-liberty-z), 1) EXTLIBS += -lbfd -lopcodes -liberty -lz + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl + FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl endif endif $(call feature_check,disassembler-four-args) @@ -1329,6 +1340,6 @@ endif # re-generate FEATURE-DUMP as we may have called feature_check, found out # extra libraries to add to LDFLAGS of some other test and then redo those -# tests. +# tests, see the block about libbfd, disassembler-four-args, for instance. $(shell rm -f $(FEATURE_DUMP_FILENAME)) $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME))) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b1e62a621f92..097316ef38e6 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -132,6 +132,8 @@ include ../scripts/utilities.mak # Define EXTRA_TESTS to enable building extra tests useful mainly to perf # developers, such as: # x86 instruction decoder - new instructions test +# +# Define GEN_VMLINUX_H to generate vmlinux.h from the BTF. # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -197,6 +199,7 @@ FLEX ?= flex BISON ?= bison STRIP = strip AWK = awk +READELF ?= readelf # include Makefile.config by default and rule out # non-config cases @@ -1061,7 +1064,7 @@ $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_ ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool # Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, +# '--target=bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. # # Use '-idirafter': Don't interfere with include mechanics except where the @@ -1084,8 +1087,44 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap -$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ +# Paths to search for a kernel to generate vmlinux.h from. +VMLINUX_BTF_ELF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../vmlinux \ + /boot/vmlinux-$(shell uname -r) + +# Paths to BTF information. +VMLINUX_BTF_BTF_PATHS ?= /sys/kernel/btf/vmlinux + +# Filter out kernels that don't exist or without a BTF section. +VMLINUX_BTF_ELF_ABSPATHS ?= $(abspath $(wildcard $(VMLINUX_BTF_ELF_PATHS))) +VMLINUX_BTF_PATHS ?= $(shell for file in $(VMLINUX_BTF_ELF_ABSPATHS); \ + do \ + if [ -f $$file ] && ($(READELF) -S "$$file" | grep -q .BTF); \ + then \ + echo "$$file"; \ + fi; \ + done) \ + $(wildcard $(VMLINUX_BTF_BTF_PATHS)) + +# Select the first as the source of vmlinux.h. +VMLINUX_BTF ?= $(firstword $(VMLINUX_BTF_PATHS)) + +ifeq ($(VMLINUX_H),) + ifeq ($(VMLINUX_BTF),) + $(error Missing bpftool input for generating vmlinux.h) + endif +endif + +$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) +ifeq ($(VMLINUX_H),) + $(QUIET_GEN)$(BPFTOOL) btf dump file $< format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT) + $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 512c2d885d24..81d22657922a 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -102,3 +102,23 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) } } } + +int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) +{ + if (!x86__is_amd_cpu()) + return 0; + + if (!evsel->core.attr.precise_ip && + !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3))) + return 0; + + /* More verbose IBS errors. */ + if (evsel->core.attr.exclude_kernel || evsel->core.attr.exclude_user || + evsel->core.attr.exclude_hv || evsel->core.attr.exclude_idle || + evsel->core.attr.exclude_host || evsel->core.attr.exclude_guest) { + return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try " + "again without the privilege modifiers (like 'k') at the end."); + } + + return 0; +} diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index f5674d824a40..83954af36753 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1524,7 +1524,7 @@ int cmd_daemon(int argc, const char **argv) if (argc) { if (!strcmp(argv[0], "start")) ret = __cmd_start(&__daemon, daemon_options, argc, argv); - if (!strcmp(argv[0], "signal")) + else if (!strcmp(argv[0], "signal")) ret = __cmd_signal(&__daemon, daemon_options, argc, argv); else if (!strcmp(argv[0], "stop")) ret = __cmd_stop(&__daemon, daemon_options, argc, argv); diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 3751df744577..7f75c5b73f26 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -62,7 +62,6 @@ int cmd_kallsyms(int argc, const char **argv) if (argc < 1) usage_with_options(kallsyms_usage, options); - symbol_conf.sort_by_name = true; symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init(NULL) < 0) return -1; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 8b505e1e5002..c15386cb1033 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -27,6 +27,7 @@ #include "util/map.h" #include "util/util.h" +#include <stdio.h> #include <sys/types.h> #include <sys/prctl.h> #include <semaphore.h> @@ -65,6 +66,8 @@ static int max_stack_depth = CONTENTION_STACK_DEPTH; static int stack_skip = CONTENTION_STACK_SKIP; static int print_nr_entries = INT_MAX / 2; static LIST_HEAD(callstack_filters); +static const char *output_name = NULL; +static FILE *lock_output; struct callstack_filter { struct list_head list; @@ -225,22 +228,28 @@ static void lock_stat_key_print_time(unsigned long long nsec, int len) { 0, NULL }, }; + /* for CSV output */ + if (len == 0) { + fprintf(lock_output, "%llu", nsec); + return; + } + for (int i = 0; table[i].unit; i++) { if (nsec < table[i].base) continue; - pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); + fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); return; } - pr_info("%*llu %s", len - 3, nsec, "ns"); + fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns"); } #define PRINT_KEY(member) \ static void lock_stat_key_print_ ## member(struct lock_key *key, \ struct lock_stat *ls) \ { \ - pr_info("%*llu", key->len, (unsigned long long)ls->member); \ + fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\ } #define PRINT_TIME(member) \ @@ -1329,12 +1338,12 @@ static void print_bad_events(int bad, int total) if (quiet || total == 0 || (broken == 0 && verbose <= 0)) return; - pr_info("\n=== output for debug ===\n\n"); - pr_info("bad: %d, total: %d\n", bad, total); - pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); - pr_info("histogram of events caused bad sequence\n"); + fprintf(lock_output, "\n=== output for debug ===\n\n"); + fprintf(lock_output, "bad: %d, total: %d\n", bad, total); + fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100); + fprintf(lock_output, "histogram of events caused bad sequence\n"); for (i = 0; i < BROKEN_MAX; i++) - pr_info(" %10s: %d\n", name[i], bad_hist[i]); + fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]); } /* TODO: various way to print, coloring, nano or milli sec */ @@ -1346,10 +1355,10 @@ static void print_result(void) int bad, total, printed; if (!quiet) { - pr_info("%20s ", "Name"); + fprintf(lock_output, "%20s ", "Name"); list_for_each_entry(key, &lock_keys, list) - pr_info("%*s ", key->len, key->header); - pr_info("\n\n"); + fprintf(lock_output, "%*s ", key->len, key->header); + fprintf(lock_output, "\n\n"); } bad = total = printed = 0; @@ -1374,7 +1383,7 @@ static void print_result(void) name = thread__comm_str(t); } - pr_info("%20s ", name); + fprintf(lock_output, "%20s ", name); } else { strncpy(cut_name, st->name, 16); cut_name[16] = '.'; @@ -1382,14 +1391,14 @@ static void print_result(void) cut_name[18] = '.'; cut_name[19] = '\0'; /* cut off name for saving output style */ - pr_info("%20s ", cut_name); + fprintf(lock_output, "%20s ", cut_name); } list_for_each_entry(key, &lock_keys, list) { key->print(key, st); - pr_info(" "); + fprintf(lock_output, " "); } - pr_info("\n"); + fprintf(lock_output, "\n"); if (++printed >= print_nr_entries) break; @@ -1406,13 +1415,13 @@ static void dump_threads(void) struct rb_node *node; struct thread *t; - pr_info("%10s: comm\n", "Thread ID"); + fprintf(lock_output, "%10s: comm\n", "Thread ID"); node = rb_first(&thread_stats); while (node) { st = container_of(node, struct thread_stat, rb); t = perf_session__findnew(session, st->tid); - pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); + fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t)); node = rb_next(node); thread__put(t); } @@ -1438,7 +1447,7 @@ static void dump_map(void) unsigned int i; struct lock_stat *st; - pr_info("Address of instance: name of class\n"); + fprintf(lock_output, "Address of instance: name of class\n"); for (i = 0; i < LOCKHASH_SIZE; i++) { hlist_for_each_entry(st, &lockhash_table[i], hash_entry) { insert_to_result(st, compare_maps); @@ -1446,7 +1455,7 @@ static void dump_map(void) } while ((st = pop_from_result())) - pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name); + fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); } static int dump_info(void) @@ -1626,58 +1635,242 @@ static void sort_contention_result(void) sort_result(); } -static void print_bpf_events(int total, struct lock_contention_fails *fails) +static void print_header_stdio(void) +{ + struct lock_key *key; + + list_for_each_entry(key, &lock_keys, list) + fprintf(lock_output, "%*s ", key->len, key->header); + + switch (aggr_mode) { + case LOCK_AGGR_TASK: + fprintf(lock_output, " %10s %s\n\n", "pid", + show_lock_owner ? "owner" : "comm"); + break; + case LOCK_AGGR_CALLER: + fprintf(lock_output, " %10s %s\n\n", "type", "caller"); + break; + case LOCK_AGGR_ADDR: + fprintf(lock_output, " %16s %s\n\n", "address", "symbol"); + break; + default: + break; + } +} + +static void print_header_csv(const char *sep) +{ + struct lock_key *key; + + fprintf(lock_output, "# output: "); + list_for_each_entry(key, &lock_keys, list) + fprintf(lock_output, "%s%s ", key->header, sep); + + switch (aggr_mode) { + case LOCK_AGGR_TASK: + fprintf(lock_output, "%s%s %s\n", "pid", sep, + show_lock_owner ? "owner" : "comm"); + break; + case LOCK_AGGR_CALLER: + fprintf(lock_output, "%s%s %s", "type", sep, "caller"); + if (verbose > 0) + fprintf(lock_output, "%s %s", sep, "stacktrace"); + fprintf(lock_output, "\n"); + break; + case LOCK_AGGR_ADDR: + fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type"); + break; + default: + break; + } +} + +static void print_header(void) +{ + if (!quiet) { + if (symbol_conf.field_sep) + print_header_csv(symbol_conf.field_sep); + else + print_header_stdio(); + } +} + +static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st) +{ + struct lock_key *key; + struct thread *t; + int pid; + + list_for_each_entry(key, &lock_keys, list) { + key->print(key, st); + fprintf(lock_output, " "); + } + + switch (aggr_mode) { + case LOCK_AGGR_CALLER: + fprintf(lock_output, " %10s %s\n", get_type_str(st->flags), st->name); + break; + case LOCK_AGGR_TASK: + pid = st->addr; + t = perf_session__findnew(session, pid); + fprintf(lock_output, " %10d %s\n", + pid, pid == -1 ? "Unknown" : thread__comm_str(t)); + break; + case LOCK_AGGR_ADDR: + fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr, + st->name, get_type_name(st->flags)); + break; + default: + break; + } + + if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { + struct map *kmap; + struct symbol *sym; + char buf[128]; + u64 ip; + + for (int i = 0; i < max_stack_depth; i++) { + if (!st->callstack || !st->callstack[i]) + break; + + ip = st->callstack[i]; + sym = machine__find_kernel_symbol(con->machine, ip, &kmap); + get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); + fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf); + } + } +} + +static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st, + const char *sep) +{ + struct lock_key *key; + struct thread *t; + int pid; + + list_for_each_entry(key, &lock_keys, list) { + key->print(key, st); + fprintf(lock_output, "%s ", sep); + } + + switch (aggr_mode) { + case LOCK_AGGR_CALLER: + fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name); + if (verbose <= 0) + fprintf(lock_output, "\n"); + break; + case LOCK_AGGR_TASK: + pid = st->addr; + t = perf_session__findnew(session, pid); + fprintf(lock_output, "%d%s %s\n", pid, sep, + pid == -1 ? "Unknown" : thread__comm_str(t)); + break; + case LOCK_AGGR_ADDR: + fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep, + st->name, sep, get_type_name(st->flags)); + break; + default: + break; + } + + if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { + struct map *kmap; + struct symbol *sym; + char buf[128]; + u64 ip; + + for (int i = 0; i < max_stack_depth; i++) { + if (!st->callstack || !st->callstack[i]) + break; + + ip = st->callstack[i]; + sym = machine__find_kernel_symbol(con->machine, ip, &kmap); + get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); + fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf); + } + fprintf(lock_output, "\n"); + } +} + +static void print_lock_stat(struct lock_contention *con, struct lock_stat *st) +{ + if (symbol_conf.field_sep) + print_lock_stat_csv(con, st, symbol_conf.field_sep); + else + print_lock_stat_stdio(con, st); +} + +static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails) { /* Output for debug, this have to be removed */ int broken = fails->task + fails->stack + fails->time + fails->data; + if (!use_bpf) + print_bad_events(bad, total); + if (quiet || total == 0 || (broken == 0 && verbose <= 0)) return; total += broken; - pr_info("\n=== output for debug ===\n\n"); - pr_info("bad: %d, total: %d\n", broken, total); - pr_info("bad rate: %.2f %%\n", (double)broken / (double)total * 100); + fprintf(lock_output, "\n=== output for debug ===\n\n"); + fprintf(lock_output, "bad: %d, total: %d\n", broken, total); + fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total); - pr_info("histogram of failure reasons\n"); - pr_info(" %10s: %d\n", "task", fails->task); - pr_info(" %10s: %d\n", "stack", fails->stack); - pr_info(" %10s: %d\n", "time", fails->time); - pr_info(" %10s: %d\n", "data", fails->data); + fprintf(lock_output, "histogram of failure reasons\n"); + fprintf(lock_output, " %10s: %d\n", "task", fails->task); + fprintf(lock_output, " %10s: %d\n", "stack", fails->stack); + fprintf(lock_output, " %10s: %d\n", "time", fails->time); + fprintf(lock_output, " %10s: %d\n", "data", fails->data); +} + +static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails, + const char *sep) +{ + /* Output for debug, this have to be removed */ + if (use_bpf) + bad = fails->task + fails->stack + fails->time + fails->data; + + if (quiet || total == 0 || (bad == 0 && verbose <= 0)) + return; + + total += bad; + fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad); + + if (use_bpf) { + fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task); + fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack); + fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time); + fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data); + } else { + int i; + const char *name[4] = { "acquire", "acquired", "contended", "release" }; + + for (i = 0; i < BROKEN_MAX; i++) + fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]); + } + fprintf(lock_output, "\n"); +} + +static void print_footer(int total, int bad, struct lock_contention_fails *fails) +{ + if (symbol_conf.field_sep) + print_footer_csv(total, bad, fails, symbol_conf.field_sep); + else + print_footer_stdio(total, bad, fails); } static void print_contention_result(struct lock_contention *con) { struct lock_stat *st; - struct lock_key *key; int bad, total, printed; - if (!quiet) { - list_for_each_entry(key, &lock_keys, list) - pr_info("%*s ", key->len, key->header); - - switch (aggr_mode) { - case LOCK_AGGR_TASK: - pr_info(" %10s %s\n\n", "pid", - show_lock_owner ? "owner" : "comm"); - break; - case LOCK_AGGR_CALLER: - pr_info(" %10s %s\n\n", "type", "caller"); - break; - case LOCK_AGGR_ADDR: - pr_info(" %16s %s\n\n", "address", "symbol"); - break; - default: - break; - } - } + if (!quiet) + print_header(); bad = total = printed = 0; while ((st = pop_from_result())) { - struct thread *t; - int pid; - total += use_bpf ? st->nr_contended : 1; if (st->broken) bad++; @@ -1685,45 +1878,7 @@ static void print_contention_result(struct lock_contention *con) if (!st->wait_time_total) continue; - list_for_each_entry(key, &lock_keys, list) { - key->print(key, st); - pr_info(" "); - } - - switch (aggr_mode) { - case LOCK_AGGR_CALLER: - pr_info(" %10s %s\n", get_type_str(st->flags), st->name); - break; - case LOCK_AGGR_TASK: - pid = st->addr; - t = perf_session__findnew(session, pid); - pr_info(" %10d %s\n", - pid, pid == -1 ? "Unknown" : thread__comm_str(t)); - break; - case LOCK_AGGR_ADDR: - pr_info(" %016llx %s (%s)\n", (unsigned long long)st->addr, - st->name, get_type_name(st->flags)); - break; - default: - break; - } - - if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { - struct map *kmap; - struct symbol *sym; - char buf[128]; - u64 ip; - - for (int i = 0; i < max_stack_depth; i++) { - if (!st->callstack || !st->callstack[i]) - break; - - ip = st->callstack[i]; - sym = machine__find_kernel_symbol(con->machine, ip, &kmap); - get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); - pr_info("\t\t\t%#lx %s\n", (unsigned long)ip, buf); - } - } + print_lock_stat(con, st); if (++printed >= print_nr_entries) break; @@ -1740,10 +1895,7 @@ static void print_contention_result(struct lock_contention *con) /* some entries are collected but hidden by the callstack filter */ total += con->nr_filtered; - if (use_bpf) - print_bpf_events(total, &con->fails); - else - print_bad_events(bad, total); + print_footer(total, bad, &con->fails); } static bool force; @@ -1773,8 +1925,6 @@ static int __cmd_report(bool display_info) return PTR_ERR(session); } - /* for lock function check */ - symbol_conf.sort_by_name = true; symbol_conf.allow_aliases = true; symbol__init(&session->header.env); @@ -1849,6 +1999,16 @@ static int check_lock_contention_options(const struct option *options, return -1; } + if (symbol_conf.field_sep) { + if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */ + strstr(symbol_conf.field_sep, "+") || /* part of caller offset */ + strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */ + pr_err("Cannot use the separator that is already used\n"); + parse_options_usage(usage, options, "x", 1); + return -1; + } + } + if (show_lock_owner) show_thread_stats = true; @@ -1903,8 +2063,6 @@ static int __cmd_contention(int argc, const char **argv) if (con.aggr_mode == LOCK_AGGR_CALLER) con.save_callstack = true; - /* for lock function check */ - symbol_conf.sort_by_name = true; symbol_conf.allow_aliases = true; symbol__init(&session->header.env); @@ -1966,6 +2124,15 @@ static int __cmd_contention(int argc, const char **argv) if (select_key(true)) goto out_delete; + if (symbol_conf.field_sep) { + int i; + struct lock_key *keys = contention_keys; + + /* do not align output in CSV format */ + for (i = 0; keys[i].name; i++) + keys[i].len = 0; + } + if (use_bpf) { lock_contention_start(); if (argc) @@ -2264,10 +2431,29 @@ static int parse_call_stack(const struct option *opt __maybe_unused, const char return ret; } +static int parse_output(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) +{ + const char **name = (const char **)opt->value; + + if (str == NULL) + return -1; + + lock_output = fopen(str, "w"); + if (lock_output == NULL) { + pr_err("Cannot open %s\n", str); + return -1; + } + + *name = str; + return 0; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -2334,6 +2520,8 @@ int cmd_lock(int argc, const char **argv) OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", "Filter specific function in the callstack", parse_call_stack), OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), + OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", + "print result in CSV format with custom separator"), OPT_PARENT(lock_options) }; @@ -2365,6 +2553,7 @@ int cmd_lock(int argc, const char **argv) for (i = 0; i < LOCKHASH_SIZE; i++) INIT_HLIST_HEAD(lockhash_table + i); + lock_output = stderr; argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a31a23af5547..dcedfe00f04d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1676,7 +1676,6 @@ repeat: * See symbol__browser_index. */ symbol_conf.priv_size += sizeof(u32); - symbol_conf.sort_by_name = true; } annotation_config__init(&report.annotation_opts); } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 85fb975b6f56..daf9458f0b77 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -92,28 +92,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 0e2e446ced7a..fbb111e40829 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -1830,28 +1830,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json index 04f08e4d2402..095904c77001 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "EventCode": "0xAB", "EventName": "DSB2MITE_SWITCHES.COUNT", @@ -245,27 +253,34 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", "SampleAfterValue": "200003", "UMask": "0x4" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]", "CounterMask": "4", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x18" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]", "SampleAfterValue": "2000003", "UMask": "0x18" }, @@ -297,6 +312,24 @@ "UMask": "0x8" }, { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_OK", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "EventCode": "0x79", "EventName": "IDQ.DSB_UOPS", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json index 31a1663d57f8..66d686cc933e 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json @@ -361,10 +361,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -488,11 +488,11 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]", "CounterMask": "4", "EventCode": "0xA8", "EventName": "LSD.CYCLES_4_UOPS", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -506,6 +506,15 @@ "UMask": "0x1" }, { + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_OK", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Number of Uops delivered by the LSD.", "EventCode": "0xA8", "EventName": "LSD.UOPS", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json index 725780fb3990..1a342dff1503 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json @@ -6606,7 +6606,7 @@ "EventCode": "0x52", "EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS", "PerPkg": "1", - "PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle", + "PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel", "UMask": "0x8", "Unit": "M3UPI" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json index f761856d738e..d82d2cca6f0a 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json @@ -2735,7 +2735,7 @@ "EventCode": "0x81", "EventName": "UNC_M_WPQ_OCCUPANCY", "PerPkg": "1", - "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts?", + "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.", "Unit": "iMC" }, { diff --git a/tools/perf/pmu-events/arch/x86/icelake/cache.json b/tools/perf/pmu-events/arch/x86/icelake/cache.json index 79b9f02a4b63..d26c4efe35f0 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/cache.json +++ b/tools/perf/pmu-events/arch/x86/icelake/cache.json @@ -155,18 +155,18 @@ "UMask": "0x21" }, { - "BriefDescription": "All requests that miss L2 cache. This event is not supported on ICL and ICX products, only supported on RKL products.", + "BriefDescription": "This event is deprecated.", + "Deprecated": "1", "EventCode": "0x24", "EventName": "L2_RQSTS.MISS", - "PublicDescription": "Counts all requests that miss L2 cache. This event is not supported on ICL and ICX products, only supported on RKL products.", "SampleAfterValue": "200003", "UMask": "0x3f" }, { - "BriefDescription": "All L2 requests. This event is not supported on ICL and ICX products, only supported on RKL products.", + "BriefDescription": "This event is deprecated.", + "Deprecated": "1", "EventCode": "0x24", "EventName": "L2_RQSTS.REFERENCES", - "PublicDescription": "Counts all L2 requests. This event is not supported on ICL and ICX products, only supported on RKL products.", "SampleAfterValue": "200003", "UMask": "0xff" }, diff --git a/tools/perf/pmu-events/arch/x86/icelake/frontend.json b/tools/perf/pmu-events/arch/x86/icelake/frontend.json index 3e3d2b002170..2b539a08d2bf 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/icelake/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.", "CounterMask": "1", "EdgeDetect": "1", @@ -213,10 +221,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]", "EventCode": "0x80", "EventName": "ICACHE_16B.IFDATA_STALL", - "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]", "SampleAfterValue": "500009", "UMask": "0x4" }, @@ -237,10 +245,26 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", - "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALLS", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "SampleAfterValue": "500009", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", "SampleAfterValue": "200003", "UMask": "0x4" }, diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json index cc4edf855064..8fcc05c4e0a1 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json @@ -1516,28 +1516,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json index 154fee4b60fb..375b78044f14 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json @@ -318,10 +318,10 @@ "UMask": "0x40" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "500009", "UMask": "0x1" }, @@ -556,7 +556,7 @@ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", "EventCode": "0xa4", "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", - "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.", "SampleAfterValue": "10000003", "UMask": "0x8" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json index 71498044f1cb..f6edc4222f42 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.", "CounterMask": "1", "EdgeDetect": "1", @@ -213,10 +221,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]", "EventCode": "0x80", "EventName": "ICACHE_16B.IFDATA_STALL", - "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]", "SampleAfterValue": "500009", "UMask": "0x4" }, @@ -237,10 +245,26 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", - "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALLS", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "SampleAfterValue": "500009", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", "SampleAfterValue": "200003", "UMask": "0x4" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 6f25b5b7aaf6..9bb7e3f20f7f 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -1812,28 +1812,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json index 442a4c7539dd..176e5ef2a24a 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json @@ -318,10 +318,10 @@ "UMask": "0x40" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "500009", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json index 8ac5907762e1..f87ea3f66d1b 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json @@ -9311,7 +9311,7 @@ "EventCode": "0x50", "EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS", "PerPkg": "1", - "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in paralle", + "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in parallel", "UMask": "0x8", "Unit": "M3UPI" }, diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 6543a68d4a17..6650100830c4 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -5,7 +5,7 @@ GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core GenuineIntel-6-(3D|47),v28,broadwell,core GenuineIntel-6-56,v10,broadwellde,core GenuineIntel-6-4F,v21,broadwellx,core -GenuineIntel-6-55-[56789ABCDEF],v1.18,cascadelakex,core +GenuineIntel-6-55-[56789ABCDEF],v1.19,cascadelakex,core GenuineIntel-6-9[6C],v1.04,elkhartlake,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core @@ -13,23 +13,24 @@ GenuineIntel-6-B6,v1.00,grandridge,core GenuineIntel-6-A[DE],v1.01,graniterapids,core GenuineIntel-6-(3C|45|46),v33,haswell,core GenuineIntel-6-3F,v27,haswellx,core -GenuineIntel-6-(7D|7E|A7),v1.18,icelake,core -GenuineIntel-6-6[AC],v1.20,icelakex,core +GenuineIntel-6-7[DE],v1.19,icelake,core +GenuineIntel-6-6[AC],v1.21,icelakex,core GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v23,ivytown,core GenuineIntel-6-2D,v23,jaketown,core GenuineIntel-6-(57|85),v10,knightslanding,core -GenuineIntel-6-A[AC],v1.01,meteorlake,core +GenuineIntel-6-A[AC],v1.03,meteorlake,core GenuineIntel-6-1[AEF],v3,nehalemep,core GenuineIntel-6-2E,v3,nehalemex,core +GenuineIntel-6-A7,v1.01,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-(8F|CF),v1.13,sapphirerapids,core +GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core GenuineIntel-6-AF,v1.00,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core -GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v56,skylake,core -GenuineIntel-6-55-[01234],v1.30,skylakex,core +GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core +GenuineIntel-6-55-[01234],v1.31,skylakex,core GenuineIntel-6-86,v1.21,snowridgex,core -GenuineIntel-6-8[CD],v1.12,tigerlake,core +GenuineIntel-6-8[CD],v1.13,tigerlake,core GenuineIntel-6-2C,v4,westmereep-dp,core GenuineIntel-6-25,v3,westmereep-sp,core GenuineIntel-6-2F,v3,westmereex,core diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json index bf24d3f25a3d..e1ae7c92f38e 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json @@ -1,5 +1,115 @@ [ { + "BriefDescription": "L1D.HWPF_MISS", + "EventCode": "0x51", + "EventName": "L1D.HWPF_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS", + "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of L1D misses that are outstanding", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache lines filling L2", + "EventCode": "0x25", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.", + "EventCode": "0x26", + "EventName": "L2_LINES_OUT.NON_SILENT", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.", + "EventCode": "0x26", + "EventName": "L2_LINES_OUT.SILENT", + "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES]", + "EventCode": "0x24", + "EventName": "L2_REQUEST.ALL", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]", + "SampleAfterValue": "200003", + "UMask": "0xff", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]", + "EventCode": "0x24", + "EventName": "L2_REQUEST.HIT", + "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]", + "SampleAfterValue": "200003", + "UMask": "0xdf", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_RQSTS.MISS]", + "EventCode": "0x24", + "EventName": "L2_REQUEST.MISS", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]", + "SampleAfterValue": "200003", + "UMask": "0x3f", + "Unit": "cpu_core" + }, + { "BriefDescription": "L2 code requests", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_CODE_RD", @@ -18,6 +128,139 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Demand requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Counts demand requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x27", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand requests to L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Counts demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2_RQSTS.ALL_HWPF", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_HWPF", + "SampleAfterValue": "200003", + "UMask": "0xf0", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests to L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 cache misses when fetching instructions", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.", + "SampleAfterValue": "200003", + "UMask": "0x21", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.HIT", + "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]", + "SampleAfterValue": "200003", + "UMask": "0xdf", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2_RQSTS.HWPF_MISS", + "EventCode": "0x24", + "EventName": "L2_RQSTS.HWPF_MISS", + "SampleAfterValue": "200003", + "UMask": "0x30", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_REQUEST.MISS]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]", + "SampleAfterValue": "200003", + "UMask": "0x3f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "All accesses to L2 cache [This event is alias to L2_REQUEST.ALL]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]", + "SampleAfterValue": "200003", + "UMask": "0xff", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that hit L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22", + "Unit": "cpu_core" + }, + { + "BriefDescription": "L2 writebacks that access L2 cache", + "EventCode": "0x23", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.MISS", @@ -54,6 +297,72 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an instruction cache or TLB miss.", + "EventCode": "0x35", + "EventName": "MEM_BOUND_STALLS_IFETCH.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", + "EventCode": "0x35", + "EventName": "MEM_BOUND_STALLS_IFETCH.L2_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which hit in the LLC.", + "EventCode": "0x35", + "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_HIT", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which missed all the caches.", + "EventCode": "0x35", + "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x68", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an L1 demand load miss.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS_LOAD.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS_LOAD.L2_HIT", + "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x68", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Retired load instructions.", "Data_LA": "1", "EventCode": "0xd0", @@ -76,6 +385,352 @@ "Unit": "cpu_core" }, { + "BriefDescription": "All retired memory instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", + "SampleAfterValue": "1000003", + "UMask": "0x83", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with locked access.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with locked access.", + "SampleAfterValue": "100007", + "UMask": "0x21", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x42", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions that hit the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS", + "PEBS": "1", + "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions that hit the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_HIT_STORES", + "PEBS": "1", + "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0xa", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x11", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "PEBS": "1", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x12", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Completed demand load uops that miss the L1 d-cache.", + "EventCode": "0x43", + "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY", + "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)", + "SampleAfterValue": "1000003", + "UMask": "0xfd", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", + "Data_LA": "1", + "EventCode": "0xd3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "Data_LA": "1", + "EventCode": "0xd4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).", + "SampleAfterValue": "100007", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "100021", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "100021", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM", + "EventCode": "0xd4", + "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load ops retired that miss in the L1 data cache.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load ops retired that miss in the L2 cache.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x1c", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ALL", + "SampleAfterValue": "20003", + "UMask": "0x7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", + "SampleAfterValue": "20003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.RSV", + "SampleAfterValue": "20003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", + "SampleAfterValue": "20003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of load ops retired.", "Data_LA": "1", "EventCode": "0xd0", @@ -99,6 +754,18 @@ "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", "Data_LA": "1", "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "PEBS": "2", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", @@ -123,6 +790,18 @@ "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", "Data_LA": "1", "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "PEBS": "2", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", @@ -192,6 +871,46 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Counts the number of load uops retired that performed one or more locks", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x21", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of memory uops retired that were splits.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x43", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired split load uops.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x41", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired split store uops.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x42", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES", "Data_LA": "1", "EventCode": "0xd0", @@ -200,5 +919,97 @@ "SampleAfterValue": "1000003", "UMask": "0x6", "Unit": "cpu_atom" + }, + { + "BriefDescription": "Retired memory uops for any access", + "EventCode": "0xe5", + "EventName": "MEM_UOP_RETIRED.ANY", + "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Any memory transaction that reached the SQ.", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "SampleAfterValue": "100003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand and prefetch data reads", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read requests sent to uncore", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.", + "EventCode": "0x2c", + "EventName": "SQ_MISC.BUS_LOCK", + "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json new file mode 100644 index 000000000000..616489f0974a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json @@ -0,0 +1,143 @@ +[ + { + "BriefDescription": "This event counts the cycles the floating point divider is busy.", + "CounterMask": "1", + "EventCode": "0xb0", + "EventName": "ARITH.FPDIV_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts all microcode FP assists.", + "EventCode": "0xc1", + "EventName": "ASSISTS.FP", + "PublicDescription": "Counts all microcode Floating Point assists.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "ASSISTS.SSE_AVX_MIX", + "EventCode": "0xc1", + "EventName": "ASSISTS.SSE_AVX_MIX", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.PORT_0", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.PORT_1", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x18", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of any Vector retired FP arithmetic instructions", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.VECTOR", + "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "1000003", + "UMask": "0xfc", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_atom" + } +] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json index 66e5609699ea..0f064518d1c0 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json @@ -1,5 +1,260 @@ [ { + "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "EventCode": "0xe6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Clears due to Unknown Branches.", + "EventCode": "0x60", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "SampleAfterValue": "500009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles the Microcode Sequencer is busy.", + "EventCode": "0x87", + "EventName": "DECODE.MS_BUSY", + "SampleAfterValue": "500009", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "DSB-to-MITE switch true penalty cycles.", + "EventCode": "0x61", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired ANT branches", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ANY_ANT", + "MSRIndex": "0x3F7", + "MSRValue": "0x9", + "PEBS": "1", + "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted)", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x12", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x600106", + "PEBS": "1", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "MSRValue": "0x608006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "MSRValue": "0x601006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", + "MSRIndex": "0x3F7", + "MSRValue": "0x600206", + "PEBS": "1", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "MSRIndex": "0x3F7", + "MSRValue": "0x610006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "MSRIndex": "0x3F7", + "MSRValue": "0x602006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "MSRIndex": "0x3F7", + "MSRValue": "0x600406", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", + "MSRIndex": "0x3F7", + "MSRValue": "0x620006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x604006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", + "MSRIndex": "0x3F7", + "MSRValue": "0x600806", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted Retired ANT branches", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.MISP_ANT", + "MSRIndex": "0x3F7", + "MSRValue": "0x9", + "PEBS": "1", + "PublicDescription": "ANT retired branches that got just mispredicted", + "SampleAfterValue": "100007", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.MS_FLOWS", + "MSRIndex": "0x3F7", + "MSRValue": "0x8", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH", + "MSRIndex": "0x3F7", + "MSRValue": "0x17", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", "EventCode": "0x80", "EventName": "ICACHE.ACCESSES", @@ -16,6 +271,131 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALLS", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.", + "SampleAfterValue": "500009", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "ICACHE_DATA.STALL_PERIODS", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALL_PERIODS", + "SampleAfterValue": "500009", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.HIT", + "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles DSB is delivering optimal number of Uops", + "CounterMask": "6", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_OK", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles MITE is delivering any Uop", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles MITE is delivering optimal number of Uops", + "CounterMask": "6", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES_OK", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_CYCLES_ANY", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of switches from DSB or MITE to the MS", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "100003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { "BriefDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.", "EventCode": "0x9c", "EventName": "IDQ_BUBBLES.CORE", @@ -23,5 +403,35 @@ "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled", + "EventCode": "0x9c", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled", + "CounterMask": "6", + "EventCode": "0x9c", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled", + "CounterMask": "1", + "EventCode": "0x9c", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" } ] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json index 20c2efe70eeb..67e949b4c789 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json @@ -1,5 +1,101 @@ [ { + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "CounterMask": "2", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ANY_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xff", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", + "EventCode": "0x05", + "EventName": "LD_HEAD.L1_BOUND_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xf4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DL1 miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.L1_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", + "EventCode": "0x05", + "EventName": "LD_HEAD.OTHER_AT_RET", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", + "SampleAfterValue": "1000003", + "UMask": "0xc0", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", + "EventCode": "0x05", + "EventName": "LD_HEAD.PGWALK_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xa0", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ST_ADDR_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x84", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "3", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding.", + "CounterMask": "5", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS", + "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding.", + "CounterMask": "9", + "EventCode": "0x47", + "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS", + "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).", + "SampleAfterValue": "1000003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Data_LA": "1", "EventCode": "0xcd", @@ -115,43 +211,29 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00001", - "SampleAfterValue": "100003", - "UMask": "0x1", + "BriefDescription": "Counts misaligned loads that are 4K page splits.", + "EventCode": "0x13", + "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_core" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00002", - "SampleAfterValue": "100003", - "UMask": "0x1", + "BriefDescription": "Counts misaligned stores that are 4K page splits.", + "EventCode": "0x13", + "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x4", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_RFO.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3FBFC00002", + "BriefDescription": "Counts demand data read requests that miss the L3 cache.", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", "SampleAfterValue": "100003", - "UMask": "0x1", + "UMask": "0x10", "Unit": "cpu_core" } ] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json index 14e648bf11c5..2ec57f487525 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json @@ -1,41 +1,50 @@ [ { - "BriefDescription": "Counts demand data reads that have any type of response.", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "BriefDescription": "Counts streaming stores that have any type of response.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10001", + "MSRValue": "0x10800", "SampleAfterValue": "100003", "UMask": "0x1", - "Unit": "cpu_atom" + "Unit": "cpu_core" }, { - "BriefDescription": "Counts demand data reads that have any type of response.", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10001", - "SampleAfterValue": "100003", - "UMask": "0x1", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.", + "EventCode": "0xa5", + "EventName": "RS.EMPTY", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", + "SampleAfterValue": "1000003", + "UMask": "0x7", "Unit": "cpu_core" }, { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10002", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xa5", + "EventName": "RS.EMPTY_COUNT", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)", "SampleAfterValue": "100003", - "UMask": "0x1", + "UMask": "0x7", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)", + "EventCode": "0x75", + "EventName": "SERIALIZATION.C01_MS_SCB", + "SampleAfterValue": "200003", + "UMask": "0x4", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "EventCode": "0x2A,0x2B", - "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10002", - "SampleAfterValue": "100003", + "BriefDescription": "Cycles the uncore cannot take further requests", + "CounterMask": "1", + "EventCode": "0x2d", + "EventName": "XQ.FULL_CYCLES", + "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).", + "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" } diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json index 639789478073..eeaa7a97f71c 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json @@ -1,5 +1,33 @@ [ { + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", + "CounterMask": "1", + "EventCode": "0xb0", + "EventName": "ARITH.DIV_ACTIVE", + "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", + "SampleAfterValue": "1000003", + "UMask": "0x9", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event counts the cycles the integer divider is busy.", + "CounterMask": "1", + "EventCode": "0xb0", + "EventName": "ARITH.IDIV_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.", + "EventCode": "0xc1", + "EventName": "ASSISTS.ANY", + "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.", + "SampleAfterValue": "100003", + "UMask": "0x1b", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", @@ -18,6 +46,104 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Conditional branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "PublicDescription": "Counts conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x11", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Not taken branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PEBS": "1", + "PublicDescription": "Counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Taken conditional branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xbf", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Far branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "Counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Indirect near branch instructions retired (excluding returns)", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.", + "SampleAfterValue": "100003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Direct and indirect near call instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Return instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "Counts return instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Taken branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", @@ -36,6 +162,174 @@ "Unit": "cpu_core" }, { + "BriefDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x44", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Mispredicted conditional branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "PublicDescription": "Counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x11", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x51", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_NTAKEN", + "PEBS": "1", + "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.", + "SampleAfterValue": "400009", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x50", + "Unit": "cpu_core" + }, + { + "BriefDescription": "number of branch instructions retired that were mispredicted and taken.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Miss-predicted near indirect branch instructions retired (excluding returns)", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.", + "SampleAfterValue": "100003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Mispredicted indirect CALL retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.", + "SampleAfterValue": "400009", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x42", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_COST", + "PEBS": "1", + "SampleAfterValue": "100003", + "UMask": "0xc0", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.", + "SampleAfterValue": "400009", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST", + "PEBS": "1", + "SampleAfterValue": "400009", + "UMask": "0x60", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RET_COST", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x48", + "Unit": "cpu_core" + }, + { "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles", "EventName": "CPU_CLK_UNHALTED.CORE", "SampleAfterValue": "2000003", @@ -50,6 +344,33 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", + "SampleAfterValue": "25003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", @@ -65,6 +386,15 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Reference cycles when the core is not in halt state.", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", @@ -104,6 +434,133 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "16", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0xc", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "5", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS", + "SampleAfterValue": "2000003", + "UMask": "0x21", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.", + "CounterMask": "2", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Instruction decoders utilized in a cycle", + "EventCode": "0x75", + "EventName": "INST_DECODED.DECODERS", + "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { "BriefDescription": "Fixed Counter: Counts the number of instructions retired", "EventName": "INST_RETIRED.ANY", "PEBS": "1", @@ -138,12 +595,240 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "BriefDescription": "INST_RETIRED.MACRO_FUSED", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.MACRO_FUSED", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Precise instruction retired with PEBS precise-distribution", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "1", + "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.", + "CounterMask": "1", + "EventCode": "0xad", + "EventName": "INT_MISC.ALL_RECOVERY_CYCLES", + "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.", + "SampleAfterValue": "2000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", + "EventCode": "0xad", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", + "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", + "SampleAfterValue": "500009", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread", + "EventCode": "0xad", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "SampleAfterValue": "500009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "EventCode": "0xad", + "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "MSRIndex": "0x3F7", + "MSRValue": "0x7", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots where uops got dropped", + "EventCode": "0xad", + "EventName": "INT_MISC.UOP_DROPPING", + "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.128BIT", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.128BIT", + "SampleAfterValue": "1000003", + "UMask": "0x13", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.256BIT", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.256BIT", + "SampleAfterValue": "1000003", + "UMask": "0xac", + "Unit": "cpu_core" + }, + { + "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.ADD_128", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.ADD_256", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.", + "SampleAfterValue": "1000003", + "UMask": "0xc", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.MUL_256", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.MUL_256", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.SHUFFLES", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.SHUFFLES", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.VNNI_128", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.VNNI_128", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "INT_VEC_RETIRED.VNNI_256", + "EventCode": "0xe7", + "EventName": "INT_VEC_RETIRED.VNNI_256", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.", "EventCode": "0x03", "EventName": "LD_BLOCKS.STORE_FORWARD", - "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", + "EventCode": "0xa8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.", + "CounterMask": "6", + "EventCode": "0xa8", + "EventName": "LSD.CYCLES_OK", + "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xa8", + "EventName": "LSD.UOPS", + "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.COUNT", + "PublicDescription": "Counts the number of machine clears (nukes) of any type.", "SampleAfterValue": "100003", - "UMask": "0x82", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "20003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.SCOREBOARD", + "SampleAfterValue": "100003", + "UMask": "0x2", "Unit": "cpu_core" }, { @@ -156,6 +841,32 @@ "Unit": "cpu_core" }, { + "BriefDescription": "TMA slots wasted due to incorrect speculations.", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BAD_SPEC_SLOTS", + "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.", + "SampleAfterValue": "10000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.", + "SampleAfterValue": "10000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS", + "EventCode": "0xa4", + "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS", + "SampleAfterValue": "10000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", "EventName": "TOPDOWN.SLOTS", "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", @@ -181,6 +892,30 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Fast Nukes such as Memory Ordering Machine clears and MRN nukes", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls", "EventCode": "0x74", "EventName": "TOPDOWN_BE_BOUND.ALL", @@ -188,6 +923,30 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to due to certain allocation restrictions", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to mrbl stall. A 'marble' refers to a physical register file entry, also known as the physical destination (PDST).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls", "EventCode": "0x71", "EventName": "TOPDOWN_FE_BOUND.ALL", @@ -195,6 +954,79 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTClear", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ms", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stall", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. [This event is alias to TOPDOWN_FE_BOUND.ITLB_MISS]", + "Deprecated": "1", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to itlb miss [This event is alias to TOPDOWN_FE_BOUND.ITLB]", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to predecode wrong", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL", "EventCode": "0x72", "EventName": "TOPDOWN_RETIRING.ALL", @@ -203,6 +1035,269 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Number of non dec-by-all uops decoded by decoder", + "EventCode": "0x76", + "EventName": "UOPS_DECODED.DEC0_UOPS", + "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on port 0", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_0", + "PublicDescription": "Number of uops dispatch to execution port 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on port 1", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_1", + "PublicDescription": "Number of uops dispatch to execution port 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on ports 2, 3 and 10", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_2_3_10", + "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on ports 4 and 9", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_4_9", + "PublicDescription": "Number of uops dispatch to execution ports 4 and 9", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on ports 5 and 11", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_5_11", + "PublicDescription": "Number of uops dispatch to execution ports 5 and 11", + "SampleAfterValue": "2000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on port 6", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_6", + "PublicDescription": "Number of uops dispatch to execution port 6.", + "SampleAfterValue": "2000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Uops executed on ports 7 and 8", + "EventCode": "0xb2", + "EventName": "UOPS_DISPATCHED.PORT_7_8", + "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.", + "SampleAfterValue": "2000003", + "UMask": "0x80", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Counts the number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.STALLS", + "Invert": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.THREAD", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of x87 uops dispatched.", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of uops issued by the front end every cycle.", + "EventCode": "0x0e", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2. Uops_issued correlates to the number of ROB entries. If uop takes 2 ROB slots it counts as 2 uops_issued.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Uops that RAT issues to RS", + "EventCode": "0xae", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "UOPS_ISSUED.CYCLES", + "CounterMask": "1", + "EventCode": "0xae", + "EventName": "UOPS_ISSUED.CYCLES", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread", + "CounterMask": "1", + "EventCode": "0xae", + "EventName": "UOPS_ISSUED.STALLS", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired uops except the last uop of each instruction.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.HEAVY", + "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "UOPS_RETIRED.MS", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "MSRIndex": "0x3F7", + "MSRValue": "0x8", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { "BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.SLOTS", @@ -210,5 +1305,25 @@ "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts the number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in ms flows", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json new file mode 100644 index 000000000000..188843be4caf --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Number of all entries allocated. Includes also retries.", + "EventCode": "0x35", + "EventName": "UNC_HAC_CBO_TOR_ALLOCATION.ALL", + "PerPkg": "1", + "UMask": "0x8", + "Unit": "HAC_CBO" + }, + { + "BriefDescription": "Asserted on coherent DRD + DRdPref allocations into the queue. Cacheable only", + "EventCode": "0x35", + "EventName": "UNC_HAC_CBO_TOR_ALLOCATION.DRD", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "HAC_CBO" + } +] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json new file mode 100644 index 000000000000..08b5c7574cfc --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json @@ -0,0 +1,42 @@ +[ + { + "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches", + "EventCode": "0x81", + "EventName": "UNC_HAC_ARB_REQ_TRK_REQUEST.DRD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "HAC_ARB" + }, + { + "BriefDescription": "Number of all CMI transactions", + "EventCode": "0x8a", + "EventName": "UNC_HAC_ARB_TRANSACTIONS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "HAC_ARB" + }, + { + "BriefDescription": "Number of all CMI reads", + "EventCode": "0x8a", + "EventName": "UNC_HAC_ARB_TRANSACTIONS.READS", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "HAC_ARB" + }, + { + "BriefDescription": "Number of all CMI writes not including Mflush", + "EventCode": "0x8a", + "EventName": "UNC_HAC_ARB_TRANSACTIONS.WRITES", + "PerPkg": "1", + "UMask": "0x4", + "Unit": "HAC_ARB" + }, + { + "BriefDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x81", + "EventName": "UNC_HAC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "HAC_ARB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json new file mode 100644 index 000000000000..c9d248d1042e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json @@ -0,0 +1,126 @@ +[ + { + "BriefDescription": "Counts every CAS read command sent from the Memory Controller 0 to DRAM (sum of all channels).", + "EventCode": "0xff", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every CAS read command sent from the Memory Controller 0 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.", + "UMask": "0x20", + "Unit": "imc_free_running_0" + }, + { + "BriefDescription": "Counts every read and write request entering the Memory Controller 0.", + "EventCode": "0xff", + "EventName": "UNC_MC0_TOTAL_REQCOUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every read and write request entering the Memory Controller 0 (sum of all channels). All requests are counted as one, whether they are 32B or 64B Read/Write or partial/full line writes. Some write requests to the same address may merge to a single write command to DRAM. Therefore, the total request count may be higher than total DRAM BW.", + "UMask": "0x10", + "Unit": "imc_free_running_0" + }, + { + "BriefDescription": "Counts every CAS write command sent from the Memory Controller 0 to DRAM (sum of all channels).", + "EventCode": "0xff", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every CAS write command sent from the Memory Controller 0 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.", + "UMask": "0x30", + "Unit": "imc_free_running_0" + }, + { + "BriefDescription": "Counts every CAS read command sent from the Memory Controller 1 to DRAM (sum of all channels).", + "EventCode": "0xff", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every CAS read command sent from the Memory Controller 1 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.", + "UMask": "0x20", + "Unit": "imc_free_running_1" + }, + { + "BriefDescription": "Counts every read and write request entering the Memory Controller 1.", + "EventCode": "0xff", + "EventName": "UNC_MC1_TOTAL_REQCOUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every read and write request entering the Memory Controller 1 (sum of all channels). All requests are counted as one, whether they are 32B or 64B Read/Write or partial/full line writes. Some write requests to the same address may merge to a single write command to DRAM. Therefore, the total request count may be higher than total DRAM BW.", + "UMask": "0x10", + "Unit": "imc_free_running_1" + }, + { + "BriefDescription": "Counts every CAS write command sent from the Memory Controller 1 to DRAM (sum of all channels).", + "EventCode": "0xff", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every CAS write command sent from the Memory Controller 1 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.", + "UMask": "0x30", + "Unit": "imc_free_running_1" + }, + { + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Read CAS command sent to DRAM", + "EventCode": "0x22", + "EventName": "UNC_M_CAS_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Write CAS command sent to DRAM", + "EventCode": "0x23", + "EventName": "UNC_M_CAS_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", + "EventCode": "0x28", + "EventName": "UNC_M_PRE_COUNT_IDLE", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM for a read/write request", + "EventCode": "0x27", + "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of bytes read from DRAM, in 32B chunks. Counter increments by 1 after receiving 32B chunk data.", + "EventCode": "0x3A", + "EventName": "UNC_M_RD_DATA", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Total number of read and write byte transfers to/from DRAM, in 32B chunks. Counter increments by 1 after sending or receiving 32B chunk data.", + "EventCode": "0x3C", + "EventName": "UNC_M_TOTAL_DATA", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of bytes written to DRAM, in 32B chunks. Counter increments by 1 after sending 32B chunk data.", + "EventCode": "0x3B", + "EventName": "UNC_M_WR_DATA", + "PerPkg": "1", + "Unit": "iMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json index 556e4292fcc8..056c2a885a32 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json @@ -1,5 +1,40 @@ [ { + "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "200003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", + "CounterMask": "1", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "200003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", @@ -9,6 +44,95 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Page walks completed due to a demand data load to a 1G page.", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 2M or 4M page.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Page walks completed due to a demand data load to a 4K page.", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.", + "SampleAfterValue": "200003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all pages sizes. Will result in a DTLB write from STLB.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.", + "CounterMask": "1", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 1G page.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "2000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", @@ -18,6 +142,86 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Page walks completed due to a demand data store to a 1G page.", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Page walks completed due to a demand data store to a 4K page.", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.", + "SampleAfterValue": "200003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.STLB_HIT", + "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.", + "CounterMask": "1", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", "EventCode": "0x85", "EventName": "ITLB_MISSES.WALK_COMPLETED", @@ -34,5 +238,58 @@ "SampleAfterValue": "100003", "UMask": "0xe", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 2M or 4M page.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for iside in PMH every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals. Walks could be counted by edge detecting on this event, but would count restarted suspended walks.", + "SampleAfterValue": "200003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/cache.json b/tools/perf/pmu-events/arch/x86/rocketlake/cache.json new file mode 100644 index 000000000000..b0f54a6650fe --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/cache.json @@ -0,0 +1,894 @@ +[ + { + "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS", + "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.L2_STALL", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of L1D misses that are outstanding", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "L2 cache lines filling L2", + "EventCode": "0xF1", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "UMask": "0x1f" + }, + { + "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.SILENT", + "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses", + "EventCode": "0xf2", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "L2 code requests", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4" + }, + { + "BriefDescription": "Demand Data Read requests", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0xe1" + }, + { + "BriefDescription": "Demand requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Counts demand requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x27" + }, + { + "BriefDescription": "Demand requests to L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Counts demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7" + }, + { + "BriefDescription": "RFO requests to L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2" + }, + { + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4" + }, + { + "BriefDescription": "L2 cache misses when fetching instructions", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24" + }, + { + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc1" + }, + { + "BriefDescription": "Demand Data Read miss L2, no rejects", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0x21" + }, + { + "BriefDescription": "All requests that miss L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "Counts all requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x3f" + }, + { + "BriefDescription": "All L2 requests.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "Counts all L2 requests.", + "SampleAfterValue": "200003", + "UMask": "0xff" + }, + { + "BriefDescription": "RFO requests that hit L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2" + }, + { + "BriefDescription": "RFO requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22" + }, + { + "BriefDescription": "SW prefetch requests that hit L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_HIT", + "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0xc8" + }, + { + "BriefDescription": "SW prefetch requests that miss L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_MISS", + "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0x28" + }, + { + "BriefDescription": "L2 writebacks that access L2 cache", + "EventCode": "0xF0", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x40" + }, + { + "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41" + }, + { + "BriefDescription": "Retired load instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.", + "SampleAfterValue": "1000003", + "UMask": "0x81" + }, + { + "BriefDescription": "Retired store instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PEBS": "1", + "PublicDescription": "Counts all retired store instructions.", + "SampleAfterValue": "1000003", + "UMask": "0x82" + }, + { + "BriefDescription": "All retired memory instructions.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", + "SampleAfterValue": "1000003", + "UMask": "0x83" + }, + { + "BriefDescription": "Retired load instructions with locked access.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with locked access.", + "SampleAfterValue": "100007", + "UMask": "0x21" + }, + { + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x41" + }, + { + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x42" + }, + { + "BriefDescription": "Retired load instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x11" + }, + { + "BriefDescription": "Retired store instructions that miss the STLB.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "PEBS": "1", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x12" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Retired instructions with at least 1 uncacheable load or Bus Lock.", + "Data_LA": "1", + "EventCode": "0xd4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).", + "SampleAfterValue": "100007", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40" + }, + { + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8" + }, + { + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "100021", + "UMask": "0x10" + }, + { + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "100021", + "UMask": "0x4" + }, + { + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C0004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C0010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C0020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetches to the L3 only that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L3.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C2380", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C8000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x2003C8000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1003C8000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.L3_HIT.SNOOP_SENT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E003C8000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that hit a cacheline in the L3 where a snoop was sent or not.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.STREAMING_WR.L3_HIT.ANY", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FC03C0800", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand and prefetch data reads", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts memory transactions sent to the uncore.", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions sent to the uncore including requests initiated by the core, all L3 prefetches, reads resulting from page walks, and snoop responses.", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Demand Data Read requests sent to uncore", + "EventCode": "0xb0", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "EventCode": "0xb0", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "For every cycle, increments by the number of outstanding data read requests pending.", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of outstanding data read requests pending. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles where at least 1 outstanding data read request is pending.", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Cycles where at least 1 outstanding data read request is pending. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles where at least 1 outstanding Demand RFO request is pending.", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Cycles where at least 1 outstanding Demand RFO request is pending. RFOs are initiated by a core as part of a data store operation. Demand RFO requests include RFOs, locks, and ItoM transactions. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.", + "EventCode": "0xF4", + "EventName": "SQ_MISC.BUS_LOCK", + "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles the queue waiting for offcore responses is full.", + "EventCode": "0xf4", + "EventName": "SQ_MISC.SQ_FULL", + "PublicDescription": "Counts the cycles for which the thread is active and the queue waiting for responses from the uncore cannot take any more entries.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of PREFETCHW instructions executed.", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "PublicDescription": "Counts the number of PREFETCHW instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T0", + "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x4" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json b/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json new file mode 100644 index 000000000000..85c26c889088 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json @@ -0,0 +1,105 @@ +[ + { + "BriefDescription": "Counts all microcode FP assists.", + "EventCode": "0xc1", + "EventName": "ASSISTS.FP", + "PublicDescription": "Counts all microcode Floating Point assists.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x18" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.8_FLOPS", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x60" + }, + { + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of any Vector retired FP arithmetic instructions", + "EventCode": "0xc7", + "EventName": "FP_ARITH_INST_RETIRED.VECTOR", + "SampleAfterValue": "1000003", + "UMask": "0xfc" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json b/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json new file mode 100644 index 000000000000..2b539a08d2bf --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json @@ -0,0 +1,377 @@ +[ + { + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xe6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xab", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "Counts the number of Decode Stream Buffer (DSB a.k.a. Uop Cache)-to-MITE speculative transitions.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "DSB-to-MITE switch true penalty cycles.", + "EventCode": "0xab", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired Instructions who experienced DSB miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x1", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x12", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x500106", + "PEBS": "1", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "MSRValue": "0x508006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "MSRValue": "0x501006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", + "MSRIndex": "0x3F7", + "MSRValue": "0x500206", + "PEBS": "1", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "MSRIndex": "0x3F7", + "MSRValue": "0x510006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "MSRIndex": "0x3F7", + "MSRValue": "0x502006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "MSRIndex": "0x3F7", + "MSRValue": "0x500406", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", + "MSRIndex": "0x3F7", + "MSRValue": "0x520006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x504006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", + "MSRIndex": "0x3F7", + "MSRValue": "0x500806", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]", + "EventCode": "0x80", + "EventName": "ICACHE_16B.IFDATA_STALL", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]", + "SampleAfterValue": "500009", + "UMask": "0x4" + }, + { + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_HIT", + "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_MISS", + "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_STALL", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALLS", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "SampleAfterValue": "500009", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles DSB is delivering optimal number of Uops", + "CounterMask": "5", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_OK", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles MITE is delivering any Uop", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles MITE is delivering optimal number of Uops", + "CounterMask": "5", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES_OK", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_CYCLES_ANY", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "UMask": "0x30" + }, + { + "BriefDescription": "Number of switches from DSB or MITE to the MS", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "100003", + "UMask": "0x30" + }, + { + "BriefDescription": "Uops delivered to IDQ while MS is busy", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "100003", + "UMask": "0x30" + }, + { + "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled", + "EventCode": "0x9c", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled", + "CounterMask": "5", + "EventCode": "0x9c", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/memory.json b/tools/perf/pmu-events/arch/x86/rocketlake/memory.json new file mode 100644 index 000000000000..e8d2ec1c029b --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/memory.json @@ -0,0 +1,394 @@ +[ + { + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "CounterMask": "2", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.ABORTED", + "PublicDescription": "Counts the number of times HLE abort was triggered.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Counts the number of times an HLE execution aborted due to unfriendly events (such as interrupts).", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.ABORTED_MEM", + "PublicDescription": "Counts the number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Counts the number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of times an HLE execution successfully committed", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Counts the number of times HLE commit succeeded.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times an HLE execution started.", + "EventCode": "0xc8", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Counts the number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of machine clears due to memory ordering conflicts.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "1009", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC08000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that was not supplied by the L3 cache.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.STREAMING_WR.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FFFC00800", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data read requests that miss the L3 cache.", + "EventCode": "0xb0", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles where at least one demand data read request known to have missed the L3 cache is pending.", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Cycles where at least one demand data read request known to have missed the L3 cache is pending. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Number of times an RTM execution aborted.", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.ABORTED", + "PublicDescription": "Counts the number of times RTM abort was triggered.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.ABORTED_MEM", + "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.", + "SampleAfterValue": "100003", + "UMask": "0x40" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of times an RTM execution successfully committed", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Counts the number of times RTM commit succeeded.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times an RTM execution started.", + "EventCode": "0xc9", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY_READ", + "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY_WRITE", + "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Counts the number of times a TSX line had a cache conflict.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Counts the number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Counts the number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Counts the number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Counts the number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "EventCode": "0x54", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Counts the number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "100003", + "UMask": "0x40" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json b/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json new file mode 100644 index 000000000000..a151ba9cccb0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json @@ -0,0 +1,113 @@ +{ + "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TopdownL1": "Metrics for top-down breakdown at level 1", + "TopdownL2": "Metrics for top-down breakdown at level 2", + "TopdownL3": "Metrics for top-down breakdown at level 3", + "TopdownL4": "Metrics for top-down breakdown at level 4", + "TopdownL5": "Metrics for top-down breakdown at level 5", + "TopdownL6": "Metrics for top-down breakdown at level 6", + "tma_L1_group": "Metrics for top-down breakdown at level 1", + "tma_L2_group": "Metrics for top-down breakdown at level 2", + "tma_L3_group": "Metrics for top-down breakdown at level 3", + "tma_L4_group": "Metrics for top-down breakdown at level 4", + "tma_L5_group": "Metrics for top-down breakdown at level 5", + "tma_L6_group": "Metrics for top-down breakdown at level 6", + "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category", + "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category", + "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category", + "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category", + "tma_core_bound_group": "Metrics contributing to tma_core_bound category", + "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category", + "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category", + "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category", + "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category", + "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category", + "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category", + "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category", + "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category", + "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category", + "tma_issue2P": "Metrics related by the issue $issue2P", + "tma_issueBC": "Metrics related by the issue $issueBC", + "tma_issueBM": "Metrics related by the issue $issueBM", + "tma_issueBW": "Metrics related by the issue $issueBW", + "tma_issueD0": "Metrics related by the issue $issueD0", + "tma_issueFB": "Metrics related by the issue $issueFB", + "tma_issueFL": "Metrics related by the issue $issueFL", + "tma_issueL1": "Metrics related by the issue $issueL1", + "tma_issueLat": "Metrics related by the issue $issueLat", + "tma_issueMC": "Metrics related by the issue $issueMC", + "tma_issueMS": "Metrics related by the issue $issueMS", + "tma_issueMV": "Metrics related by the issue $issueMV", + "tma_issueRFO": "Metrics related by the issue $issueRFO", + "tma_issueSL": "Metrics related by the issue $issueSL", + "tma_issueSO": "Metrics related by the issue $issueSO", + "tma_issueSmSt": "Metrics related by the issue $issueSmSt", + "tma_issueSpSt": "Metrics related by the issue $issueSpSt", + "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn", + "tma_issueTLB": "Metrics related by the issue $issueTLB", + "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category", + "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category", + "tma_light_operations_group": "Metrics contributing to tma_light_operations category", + "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category", + "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category", + "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category", + "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category", + "tma_mite_group": "Metrics contributing to tma_mite category", + "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category", + "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category", + "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category", + "tma_retiring_group": "Metrics contributing to tma_retiring category", + "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category", + "tma_store_bound_group": "Metrics contributing to tma_store_bound category", + "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category" +} diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/other.json b/tools/perf/pmu-events/arch/x86/rocketlake/other.json new file mode 100644 index 000000000000..cfb590632918 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/other.json @@ -0,0 +1,242 @@ +[ + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", + "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", + "SampleAfterValue": "200003", + "UMask": "0x7" + }, + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", + "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x18" + }, + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_CODE_RD.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000004", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_RFO.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L1D_AND_SWPF.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000400", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_DATA_RD.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000010", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.HWPF_L2_RFO.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000020", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x18000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184008000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.OTHER.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184008000", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.STREAMING_WR.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000800", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that DRAM supplied the request.", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.STREAMING_WR.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x184000800", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json b/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json new file mode 100644 index 000000000000..375b78044f14 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json @@ -0,0 +1,801 @@ +[ + { + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", + "CounterMask": "1", + "EventCode": "0x14", + "EventName": "ARITH.DIVIDER_ACTIVE", + "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", + "SampleAfterValue": "1000003", + "UMask": "0x9" + }, + { + "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.", + "EventCode": "0xc1", + "EventName": "ASSISTS.ANY", + "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.", + "SampleAfterValue": "100003", + "UMask": "0x7" + }, + { + "BriefDescription": "All branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts all branch instructions retired.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Conditional branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "PublicDescription": "Counts conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x11" + }, + { + "BriefDescription": "Not taken branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PEBS": "1", + "PublicDescription": "Counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" + }, + { + "BriefDescription": "Taken conditional branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x1" + }, + { + "BriefDescription": "Far branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "Counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40" + }, + { + "BriefDescription": "Indirect near branch instructions retired (excluding returns)", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Direct and indirect near call instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2" + }, + { + "BriefDescription": "Return instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "Counts return instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x8" + }, + { + "BriefDescription": "Taken branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20" + }, + { + "BriefDescription": "All mispredicted branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "50021" + }, + { + "BriefDescription": "Mispredicted conditional branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "PublicDescription": "Counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "50021", + "UMask": "0x11" + }, + { + "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_NTAKEN", + "PEBS": "1", + "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.", + "SampleAfterValue": "50021", + "UMask": "0x10" + }, + { + "BriefDescription": "number of branch instructions retired that were mispredicted and taken.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.", + "SampleAfterValue": "50021", + "UMask": "0x1" + }, + { + "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).", + "SampleAfterValue": "50021", + "UMask": "0x80" + }, + { + "BriefDescription": "Mispredicted indirect CALL instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.", + "SampleAfterValue": "50021", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.", + "SampleAfterValue": "50021", + "UMask": "0x20" + }, + { + "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RET", + "PEBS": "1", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "50021", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", + "SampleAfterValue": "25003", + "UMask": "0x2" + }, + { + "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.", + "SampleAfterValue": "25003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core cycles when the thread is not in halt state", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "16", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "1000003", + "UMask": "0xc" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "20", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "1000003", + "UMask": "0x14" + }, + { + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", + "EventCode": "0xa6", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.", + "CounterMask": "2", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", + "EventCode": "0x87", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { + "BriefDescription": "Instruction decoders utilized in a cycle", + "EventCode": "0x55", + "EventName": "INST_DECODED.DECODERS", + "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Number of all retired NOP instructions.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.NOP", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "1", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles without actually retired instructions.", + "CounterMask": "1", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired instructions.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.", + "CounterMask": "1", + "EventCode": "0x0D", + "EventName": "INT_MISC.ALL_RECOVERY_CYCLES", + "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Clears speculative count", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x0D", + "EventName": "INT_MISC.CLEARS_COUNT", + "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", + "EventCode": "0x0d", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", + "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", + "SampleAfterValue": "500009", + "UMask": "0x80" + }, + { + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { + "BriefDescription": "TMA slots where uops got dropped", + "EventCode": "0x0d", + "EventName": "INT_MISC.UOP_DROPPING", + "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "False dependencies due to partial compare on address.", + "EventCode": "0x07", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "PublicDescription": "Counts the number of times a load got blocked due to false dependencies due to partial compare on address.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "EventCode": "0x4c", + "EventName": "LOAD_HIT_PREFETCH.SWPF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.", + "CounterMask": "5", + "EventCode": "0xa8", + "EventName": "LSD.CYCLES_OK", + "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xa8", + "EventName": "LSD.UOPS", + "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.COUNT", + "PublicDescription": "Counts the number of machine clears (nukes) of any type.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Self-modifying code (SMC) detected.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Increments whenever there is an update to the LBR array.", + "EventCode": "0xcc", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR to be enabled properly.", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.", + "EventCode": "0xcc", + "EventName": "MISC_RETIRED.PAUSE_INST", + "PublicDescription": "Counts number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.", + "SampleAfterValue": "100003", + "UMask": "0x40" + }, + { + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.SCOREBOARD", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "EventCode": "0x5e", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_END", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS", + "PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.", + "SampleAfterValue": "10000003", + "UMask": "0x2" + }, + { + "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.", + "SampleAfterValue": "10000003", + "UMask": "0x8" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", + "EventName": "TOPDOWN.SLOTS", + "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", + "SampleAfterValue": "10000003", + "UMask": "0x4" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event", + "EventCode": "0xa4", + "EventName": "TOPDOWN.SLOTS_P", + "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.", + "SampleAfterValue": "10000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of uops decoded out of instructions exclusively fetched by decoder 0", + "EventCode": "0x56", + "EventName": "UOPS_DECODED.DEC0", + "PublicDescription": "Uops exclusively fetched by decoder 0", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of uops executed on port 0", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_0", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of uops executed on port 1", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_1", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of uops executed on port 2 and 3", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_2_3", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of uops executed on port 4 and 9", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_4_9", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Number of uops executed on port 5", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of uops executed on port 6", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_6", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Number of uops executed on port 7 and 8", + "EventCode": "0xa1", + "EventName": "UOPS_DISPATCHED.PORT_7_8", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Counts the number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xb1", + "EventName": "UOPS_EXECUTED.THREAD", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of x87 uops dispatched.", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Uops that RAT issues to RS", + "EventCode": "0x0e", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread", + "CounterMask": "1", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "EventCode": "0x0e", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to 'Mixing Intel AVX and Intel SSE Code' section of the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retirement slots used.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.SLOTS", + "PublicDescription": "Counts the retirement slots used each cycle.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles without actually retired uops.", + "CounterMask": "1", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts the number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json new file mode 100644 index 000000000000..1bb9cededa56 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json @@ -0,0 +1,1571 @@ +[ + { + "BriefDescription": "C10 residency percent per package", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C8 residency percent per package", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C9 residency percent per package", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Uncore frequency per die [GHZ]", + "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", + "MetricGroup": "SoC", + "MetricName": "UNCORE_FREQ" + }, + { + "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", + "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", + "MetricGroup": "smi", + "MetricName": "smi_cycles", + "MetricThreshold": "smi_cycles > 0.1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Number of SMI interrupts.", + "MetricExpr": "msr@smi@", + "MetricGroup": "smi", + "MetricName": "smi_num", + "ScaleUnit": "1SMI#" + }, + { + "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset", + "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks", + "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", + "MetricName": "tma_4k_aliasing", + "MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset. False match is possible; which incur a few cycles load re-issue. However; the short re-issue duration is often hidden by the out-of-order core and HW optimizations; hence a user may safely ignore a high value of this metric unless it manages to propagate up into parent nodes of the hierarchy (e.g. to L1_Bound).", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.", + "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)", + "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", + "MetricName": "tma_alu_op_utilization", + "MetricThreshold": "tma_alu_op_utilization > 0.6", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists", + "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots", + "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group", + "MetricName": "tma_assists", + "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)", + "PublicDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists. Assists are long sequences of uops that are required in certain corner-cases for operations that cannot be handled natively by the execution pipeline. For example; when working with very small floating point values (so-called Denormals); the FP units are not set up to perform these operations natively. Instead; a sequence of instructions to perform the computation on the Denormals is injected into the pipeline. Since these microcode sequences might be dozens of uops long; Assists can be extremely deleterious to performance and they can be avoided in many cases. Sample with: ASSISTS.ANY", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots", + "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group", + "MetricName": "tma_backend_bound", + "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1;Default", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)", + "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group", + "MetricName": "tma_bad_speculation", + "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1;Default", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.", + "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", + "MetricName": "tma_branch_instructions", + "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * tma_bad_speculation", + "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", + "MetricName": "tma_branch_mispredicts", + "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers", + "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches", + "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group", + "MetricName": "tma_branch_resteers", + "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers. Branch Resteers estimates the Frontend delay in fetching operations from corrected path; following all sorts of miss-predicted branches. For example; branchy code with lots of miss-predictions might get categorized under Branch Resteers. Note the value of this node may overlap with its siblings. Sample with: BR_MISP_RETIRED.ALL_BRANCHES", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction", + "MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)", + "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group", + "MetricName": "tma_cisc", + "MetricThreshold": "tma_cisc > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)", + "PublicDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction. A CISC instruction has multiple uops that are required to perform the instruction's functionality as in the case of read-modify-write as an example. Since these instructions require multiple uops they may or may not imply sub-optimal use of machine resources.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears", + "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks", + "MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC", + "MetricName": "tma_clears_resteers", + "MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(29 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", + "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", + "MetricName": "tma_contested_accesses", + "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses. Contested accesses occur when data written by one Logical Processor are read by another Logical Processor on a different Physical Core. Examples of contested accesses include synchronizations such as locks; true data sharing such as modified locked variables; and false sharing. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM_PS;MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS_PS. Related metrics: tma_data_sharing, tma_false_sharing, tma_machine_clears, tma_remote_cache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck", + "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)", + "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", + "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", + "MetricName": "tma_data_sharing", + "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses. Data shared by multiple Logical Processors (even just read shared) may cause increased access latency due to cache coherency. Excessive data sharing can drastically harm multithreaded performance. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT_PS. Related metrics: tma_contested_accesses, tma_false_sharing, tma_machine_clears, tma_remote_cache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder", + "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2", + "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group", + "MetricName": "tma_decoder0_alone", + "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))", + "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active", + "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks", + "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_divider", + "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric represents fraction of cycles where the Divider unit was active. Divide and square root instructions are performed by the Divider unit and can take considerably longer latency than integer or Floating Point addition; subtraction; or multiplication. Sample with: ARITH.DIVIDER_ACTIVE", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound", + "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", + "MetricName": "tma_dram_bound", + "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads. Better caching can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_MISS_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline", + "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2", + "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "MetricName": "tma_dsb", + "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)", + "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks", + "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", + "MetricName": "tma_dsb_switches", + "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses", + "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks", + "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group", + "MetricName": "tma_dtlb_load", + "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses", + "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks", + "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group", + "MetricName": "tma_dtlb_store", + "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing", + "MetricExpr": "32.5 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks", + "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group", + "MetricName": "tma_false_sharing", + "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing. False Sharing is a multithreading hiccup; where multiple Logical Processors contend on different data-elements mapped into the same cache line. Sample with: OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM. Related metrics: tma_contested_accesses, tma_data_sharing, tma_machine_clears, tma_remote_cache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed", + "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks", + "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group", + "MetricName": "tma_fb_full", + "MetricThreshold": "tma_fb_full > 0.3", + "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues", + "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)", + "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", + "MetricName": "tma_fetch_bandwidth", + "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues", + "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_thread_slots", + "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", + "MetricName": "tma_fetch_latency", + "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops", + "MetricExpr": "tma_heavy_operations - tma_microcode_sequencer", + "MetricGroup": "TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueD0", + "MetricName": "tma_few_uops_instructions", + "MetricThreshold": "tma_few_uops_instructions > 0.05 & tma_heavy_operations > 0.1", + "PublicDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops. This highly-correlates with the number of uops in such instructions. Related metrics: tma_decoder0_alone", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", + "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", + "MetricName": "tma_fp_arith", + "MetricThreshold": "tma_fp_arith > 0.2 & tma_light_operations > 0.6", + "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired", + "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P", + "MetricName": "tma_fp_scalar", + "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)", + "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths", + "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P", + "MetricName": "tma_fp_vector", + "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)", + "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors", + "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P", + "MetricName": "tma_fp_vector_128b", + "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))", + "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors", + "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P", + "MetricName": "tma_fp_vector_256b", + "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))", + "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors", + "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P", + "MetricName": "tma_fp_vector_512b", + "MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))", + "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots", + "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group", + "MetricName": "tma_frontend_bound", + "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1;Default", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences", + "MetricExpr": "tma_microcode_sequencer + tma_retiring * (UOPS_DECODED.DEC0 - cpu@UOPS_DECODED.DEC0\\,cmask\\=1@) / IDQ.MITE_UOPS", + "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", + "MetricName": "tma_heavy_operations", + "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses", + "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks", + "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group", + "MetricName": "tma_icache_misses", + "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BrMispredicts;tma_issueBM", + "MetricName": "tma_info_bad_spec_branch_misprediction_cost", + "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers" + }, + { + "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken", + "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200" + }, + { + "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "tma_info_bad_spec_ipmisp_cond_taken", + "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200" + }, + { + "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "tma_info_bad_spec_ipmisp_indirect", + "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3" + }, + { + "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "tma_info_bad_spec_ipmisp_ret", + "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500" + }, + { + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BadSpec;BrMispredicts", + "MetricName": "tma_info_bad_spec_ipmispredict", + "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200" + }, + { + "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", + "MetricGroup": "Cor;SMT", + "MetricName": "tma_info_botlnk_l0_core_bound_likely", + "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5" + }, + { + "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", + "MetricGroup": "DSBmiss;Fed;tma_issueFB", + "MetricName": "tma_info_botlnk_l2_dsb_misses", + "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10", + "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp" + }, + { + "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", + "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", + "MetricName": "tma_info_botlnk_l2_ic_misses", + "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5", + "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: " + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", + "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", + "MetricName": "tma_info_bottleneck_big_code", + "MetricThreshold": "tma_info_bottleneck_big_code > 20", + "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead" + }, + { + "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", + "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)", + "MetricGroup": "Ret;tma_issueBC", + "MetricName": "tma_info_bottleneck_branching_overhead", + "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10", + "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", + "MetricGroup": "Fed;FetchBW;Frontend", + "MetricName": "tma_info_bottleneck_instruction_fetch_bw", + "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20" + }, + { + "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", + "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", + "MetricName": "tma_info_bottleneck_memory_bandwidth", + "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20", + "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full" + }, + { + "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", + "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", + "MetricName": "tma_info_bottleneck_memory_data_tlbs", + "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20", + "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store" + }, + { + "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", + "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", + "MetricName": "tma_info_bottleneck_memory_latency", + "MetricThreshold": "tma_info_bottleneck_memory_latency > 20", + "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency" + }, + { + "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", + "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", + "MetricName": "tma_info_bottleneck_mispredictions", + "MetricThreshold": "tma_info_bottleneck_mispredictions > 20", + "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers" + }, + { + "BriefDescription": "Fraction of branches that are CALL or RET", + "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "tma_info_branches_callret" + }, + { + "BriefDescription": "Fraction of branches that are non-taken conditionals", + "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "tma_info_branches_cond_nt" + }, + { + "BriefDescription": "Fraction of branches that are taken conditionals", + "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "tma_info_branches_cond_tk" + }, + { + "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps", + "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "tma_info_branches_jump" + }, + { + "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)", + "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)", + "MetricGroup": "Bad;Branches", + "MetricName": "tma_info_branches_other_branches" + }, + { + "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core", + "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED", + "MetricGroup": "SMT", + "MetricName": "tma_info_core_core_clks" + }, + { + "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks", + "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group", + "MetricName": "tma_info_core_coreipc" + }, + { + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks", + "MetricGroup": "Flops;Ret", + "MetricName": "tma_info_core_flopc" + }, + { + "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)", + "MetricGroup": "Cor;Flops;HPC", + "MetricName": "tma_info_core_fp_arith_utilization", + "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)." + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", + "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", + "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", + "MetricName": "tma_info_core_ilp" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY", + "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", + "MetricName": "tma_info_frontend_dsb_coverage", + "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 5 > 0.35", + "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp" + }, + { + "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.", + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", + "MetricGroup": "DSBmiss", + "MetricName": "tma_info_frontend_dsb_switch_cost" + }, + { + "BriefDescription": "Average number of Uops issued by front-end when it issued something", + "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@", + "MetricGroup": "Fed;FetchBW", + "MetricName": "tma_info_frontend_fetch_upc" + }, + { + "BriefDescription": "Average Latency for L1 instruction cache misses", + "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@", + "MetricGroup": "Fed;FetchLat;IcMiss", + "MetricName": "tma_info_frontend_icache_miss_latency" + }, + { + "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", + "MetricGroup": "DSBmiss;Fed", + "MetricName": "tma_info_frontend_ipdsb_miss_ret", + "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50" + }, + { + "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)", + "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY", + "MetricGroup": "Fed", + "MetricName": "tma_info_frontend_ipunknown_branch" + }, + { + "BriefDescription": "L2 cache true code cacheline misses per kilo instruction", + "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricGroup": "IcMiss", + "MetricName": "tma_info_frontend_l2mpki_code" + }, + { + "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction", + "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY", + "MetricGroup": "IcMiss", + "MetricName": "tma_info_frontend_l2mpki_code_all" + }, + { + "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", + "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY", + "MetricGroup": "Fed;LSD", + "MetricName": "tma_info_frontend_lsd_coverage" + }, + { + "BriefDescription": "Branch instructions per taken branch.", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;PGO", + "MetricName": "tma_info_inst_mix_bptkbranch" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary;TmaL1;tma_L1_group", + "MetricName": "tma_info_inst_mix_instructions", + "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST" + }, + { + "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)", + "MetricGroup": "Flops;InsType", + "MetricName": "tma_info_inst_mix_iparith", + "MetricThreshold": "tma_info_inst_mix_iparith < 10", + "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "tma_info_inst_mix_iparith_avx128", + "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10", + "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "tma_info_inst_mix_iparith_avx256", + "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10", + "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "tma_info_inst_mix_iparith_avx512", + "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10", + "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "tma_info_inst_mix_iparith_scalar_dp", + "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10", + "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "tma_info_inst_mix_iparith_scalar_sp", + "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10", + "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Branches;Fed;InsType", + "MetricName": "tma_info_inst_mix_ipbranch", + "MetricThreshold": "tma_info_inst_mix_ipbranch < 8" + }, + { + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "MetricGroup": "Branches;Fed;PGO", + "MetricName": "tma_info_inst_mix_ipcall", + "MetricThreshold": "tma_info_inst_mix_ipcall < 200" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)", + "MetricGroup": "Flops;InsType", + "MetricName": "tma_info_inst_mix_ipflop", + "MetricThreshold": "tma_info_inst_mix_ipflop < 10" + }, + { + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "MetricGroup": "InsType", + "MetricName": "tma_info_inst_mix_ipload", + "MetricThreshold": "tma_info_inst_mix_ipload < 3" + }, + { + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "MetricGroup": "InsType", + "MetricName": "tma_info_inst_mix_ipstore", + "MetricThreshold": "tma_info_inst_mix_ipstore < 8" + }, + { + "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", + "MetricGroup": "Prefetches", + "MetricName": "tma_info_inst_mix_ipswpf", + "MetricThreshold": "tma_info_inst_mix_ipswpf < 100" + }, + { + "BriefDescription": "Instruction per taken branch", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB", + "MetricName": "tma_info_inst_mix_iptb", + "MetricThreshold": "tma_info_inst_mix_iptb < 11", + "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp" + }, + { + "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]", + "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_core_l1d_cache_fill_bw" + }, + { + "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]", + "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_core_l2_cache_fill_bw" + }, + { + "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time", + "MetricGroup": "Mem;MemoryBW;Offcore", + "MetricName": "tma_info_memory_core_l3_cache_access_bw" + }, + { + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_core_l3_cache_fill_bw" + }, + { + "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)", + "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_fb_hpki" + }, + { + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l1mpki" + }, + { + "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l1mpki_load" + }, + { + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l2hpki_all" + }, + { + "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l2hpki_load" + }, + { + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricGroup": "Backend;CacheMisses;Mem", + "MetricName": "tma_info_memory_l2mpki" + }, + { + "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)", + "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem;Offcore", + "MetricName": "tma_info_memory_l2mpki_all" + }, + { + "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l2mpki_load" + }, + { + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "MetricGroup": "CacheMisses;Mem", + "MetricName": "tma_info_memory_l3mpki" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)", + "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)", + "MetricGroup": "Mem;MemoryBound;MemoryLat", + "MetricName": "tma_info_memory_load_miss_real_latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Mem;MemoryBW;MemoryBound", + "MetricName": "tma_info_memory_mlp", + "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)" + }, + { + "BriefDescription": "Average Parallel L2 cache miss data reads", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "MetricGroup": "Memory_BW;Offcore", + "MetricName": "tma_info_memory_oro_data_l2_mlp" + }, + { + "BriefDescription": "Average Latency for L2 cache miss demand Loads", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD", + "MetricGroup": "Memory_Lat;Offcore", + "MetricName": "tma_info_memory_oro_load_l2_miss_latency" + }, + { + "BriefDescription": "Average Parallel L2 cache miss demand Loads", + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", + "MetricGroup": "Memory_BW;Offcore", + "MetricName": "tma_info_memory_oro_load_l2_mlp" + }, + { + "BriefDescription": "Average Latency for L3 cache miss demand Loads", + "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "MetricGroup": "Memory_Lat;Offcore", + "MetricName": "tma_info_memory_oro_load_l3_miss_latency" + }, + { + "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]", + "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t" + }, + { + "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]", + "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t" + }, + { + "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "tma_info_memory_core_l3_cache_access_bw", + "MetricGroup": "Mem;MemoryBW;Offcore", + "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t" + }, + { + "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw", + "MetricGroup": "Mem;MemoryBW", + "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t" + }, + { + "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)", + "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricGroup": "Fed;MemoryTLB", + "MetricName": "tma_info_memory_tlb_code_stlb_mpki" + }, + { + "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)", + "MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "tma_info_memory_tlb_load_stlb_mpki" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_core_clks)", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "tma_info_memory_tlb_page_walks_utilization", + "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5" + }, + { + "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)", + "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "tma_info_memory_tlb_store_stlb_mpki" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread", + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@", + "MetricGroup": "Cor;Pipeline;PortsUtil;SMT", + "MetricName": "tma_info_pipeline_execute" + }, + { + "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ret", + "MetricName": "tma_info_pipeline_retire" + }, + { + "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]", + "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time", + "MetricGroup": "Power;Summary", + "MetricName": "tma_info_system_average_frequency" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricGroup": "HPC;Summary", + "MetricName": "tma_info_system_cpu_utilization" + }, + { + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3", + "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW", + "MetricName": "tma_info_system_dram_bw_use", + "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time", + "MetricGroup": "Cor;Flops;HPC", + "MetricName": "tma_info_system_gflops", + "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine." + }, + { + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", + "MetricGroup": "Branches;OS", + "MetricName": "tma_info_system_ipfarbranch", + "MetricThreshold": "tma_info_system_ipfarbranch < 1e6" + }, + { + "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k", + "MetricGroup": "OS", + "MetricName": "tma_info_system_kernel_cpi" + }, + { + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "OS", + "MetricName": "tma_info_system_kernel_utilization", + "MetricThreshold": "tma_info_system_kernel_utilization > 0.05" + }, + { + "BriefDescription": "Average number of parallel data read requests to external memory", + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", + "MetricGroup": "Mem;MemoryBW;SoC", + "MetricName": "tma_info_system_mem_parallel_reads", + "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches" + }, + { + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD", + "MetricGroup": "Mem;MemoryLat;SoC", + "MetricName": "tma_info_system_mem_read_latency", + "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)" + }, + { + "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)", + "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL", + "MetricGroup": "Mem;SoC", + "MetricName": "tma_info_system_mem_request_latency" + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0", + "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks", + "MetricGroup": "Power", + "MetricName": "tma_info_system_power_license0_utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1", + "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks", + "MetricGroup": "Power", + "MetricName": "tma_info_system_power_license1_utilization", + "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)", + "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks", + "MetricGroup": "Power", + "MetricName": "tma_info_system_power_license2_utilization", + "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", + "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)", + "MetricGroup": "SMT", + "MetricName": "tma_info_system_smt_2t_utilization" + }, + { + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricExpr": "UNC_CLOCK.SOCKET", + "MetricGroup": "SoC", + "MetricName": "tma_info_system_socket_clks" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "tma_info_system_turbo_utilization" + }, + { + "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Pipeline", + "MetricName": "tma_info_thread_clks" + }, + { + "BriefDescription": "Cycles Per Instruction (per Logical Processor)", + "MetricExpr": "1 / tma_info_thread_ipc", + "MetricGroup": "Mem;Pipeline", + "MetricName": "tma_info_thread_cpi" + }, + { + "BriefDescription": "The ratio of Executed- by Issued-Uops", + "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", + "MetricGroup": "Cor;Pipeline", + "MetricName": "tma_info_thread_execute_per_issue", + "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage." + }, + { + "BriefDescription": "Instructions Per Cycle (per Logical Processor)", + "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks", + "MetricGroup": "Ret;Summary", + "MetricName": "tma_info_thread_ipc" + }, + { + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "TOPDOWN.SLOTS", + "MetricGroup": "TmaL1;tma_L1_group", + "MetricName": "tma_info_thread_slots" + }, + { + "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor", + "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)", + "MetricGroup": "SMT;TmaL1;tma_L1_group", + "MetricName": "tma_info_thread_slots_utilization" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY", + "MetricGroup": "Pipeline;Ret;Retire", + "MetricName": "tma_info_thread_uoppi", + "MetricThreshold": "tma_info_thread_uoppi > 1.05" + }, + { + "BriefDescription": "Instruction per taken branch", + "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW", + "MetricName": "tma_info_thread_uptb", + "MetricThreshold": "tma_info_thread_uptb < 7.5" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses", + "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks", + "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group", + "MetricName": "tma_itlb_misses", + "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache", + "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)", + "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group", + "MetricName": "tma_l1_bound", + "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache. The L1 data cache typically has the shortest latency. However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)", + "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", + "MetricName": "tma_l2_bound", + "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads. Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", + "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", + "MetricName": "tma_l3_bound", + "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core. Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)", + "MetricExpr": "9 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", + "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group", + "MetricName": "tma_l3_hit_latency", + "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)", + "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks", + "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", + "MetricName": "tma_lcp", + "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation)", + "MetricExpr": "max(0, tma_retiring - tma_heavy_operations)", + "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", + "MetricName": "tma_light_operations", + "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations", + "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_core_clks)", + "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", + "MetricName": "tma_load_op_utilization", + "MetricThreshold": "tma_load_op_utilization > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations. Sample with: UOPS_DISPATCHED.PORT_2_3", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates the fraction of cycles where the (first level) DTLB was missed by load accesses, that later on hit in second-level TLB (STLB)", + "MetricExpr": "tma_dtlb_load - tma_load_stlb_miss", + "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", + "MetricName": "tma_load_stlb_hit", + "MetricThreshold": "tma_load_stlb_hit > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk", + "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks", + "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", + "MetricName": "tma_load_stlb_miss", + "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", + "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", + "MetricName": "tma_lock_latency", + "MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations. Due to the microarchitecture handling of locks; they are classified as L1_Bound regardless of what memory source satisfied them. Sample with: MEM_INST_RETIRED.LOCK_LOADS_PS. Related metrics: tma_store_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit", + "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2", + "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "MetricName": "tma_lsd", + "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)", + "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears", + "MetricExpr": "max(0, tma_bad_speculation - tma_branch_mispredicts)", + "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", + "MetricName": "tma_machine_clears", + "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)", + "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks", + "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW", + "MetricName": "tma_mem_bandwidth", + "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)", + "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth", + "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat", + "MetricName": "tma_mem_latency", + "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck", + "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * tma_backend_bound", + "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_memory_bound", + "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY", + "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", + "MetricName": "tma_memory_operations", + "MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit", + "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots", + "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS", + "MetricName": "tma_microcode_sequencer", + "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1", + "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit. The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks", + "MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM", + "MetricName": "tma_mispredicts_resteers", + "MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)", + "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2", + "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "MetricName": "tma_mite", + "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)", + "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles where (only) 4 uops were delivered by the MITE pipeline", + "MetricExpr": "([email protected]_UOPS\\,cmask\\=4@ - [email protected]_UOPS\\,cmask\\=5@) / tma_info_thread_clks", + "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group", + "MetricName": "tma_mite_4wide", + "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued", + "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY", + "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group", + "MetricName": "tma_mixing_vectors", + "MetricThreshold": "tma_mixing_vectors > 0.05", + "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)", + "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks", + "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO", + "MetricName": "tma_ms_switches", + "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", + "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions", + "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)", + "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", + "MetricName": "tma_nop_instructions", + "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6", + "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", + "MetricConstraint": "NO_GROUP_EVENTS", + "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))", + "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", + "MetricName": "tma_other_light_ops", + "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6", + "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)", + "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks", + "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", + "MetricName": "tma_port_0", + "MetricThreshold": "tma_port_0 > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)", + "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks", + "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", + "MetricName": "tma_port_1", + "MetricThreshold": "tma_port_1 > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)", + "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_core_clks", + "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", + "MetricName": "tma_port_5", + "MetricThreshold": "tma_port_5 > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)", + "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks", + "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", + "MetricName": "tma_port_6", + "MetricThreshold": "tma_port_6 > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)", + "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)", + "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_ports_utilization", + "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related). Two distinct categories can be attributed into this metric: (1) heavy data-dependency among contiguous instructions would manifest in this metric - such cases are often referred to as low Instruction Level Parallelism (ILP). (2) Contention on some hardware execution unit other than Divider. For example; when there are too many multiply operations.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks", + "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", + "MetricName": "tma_ports_utilized_0", + "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise). Long-latency instructions like divides may contribute to this metric.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks", + "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group", + "MetricName": "tma_ports_utilized_1", + "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). This can be due to heavy data-dependency among software instructions; or over oversubscribing a particular hardware resource. In some other cases with high 1_Port_Utilized and L1_Bound; this metric can point to L1 data-cache latency bottleneck that may not necessarily manifest with complete execution starvation (due to the short L1 latency e.g. walking a linked list) - looking at the assembly can be helpful. Sample with: EXE_ACTIVITY.1_PORTS_UTIL. Related metrics: tma_l1_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", + "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", + "MetricName": "tma_ports_utilized_2", + "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Sample with: EXE_ACTIVITY.2_PORTS_UTIL. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", + "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", + "MetricName": "tma_ports_utilized_3m", + "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots", + "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group", + "MetricName": "tma_retiring", + "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1;Default", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations", + "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks", + "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group", + "MetricName": "tma_serializing_operation", + "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))", + "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", + "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks", + "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", + "MetricName": "tma_slow_pause", + "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary", + "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks", + "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", + "MetricName": "tma_split_loads", + "MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary. Sample with: MEM_INST_RETIRED.SPLIT_LOADS_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents rate of split store accesses", + "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", + "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", + "MetricName": "tma_split_stores", + "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric represents rate of split store accesses. Consider aligning your data to the 64-byte cache line granularity. Sample with: MEM_INST_RETIRED.SPLIT_STORES_PS. Related metrics: tma_port_4", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)", + "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_thread_clks", + "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group", + "MetricName": "tma_sq_full", + "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write", + "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks", + "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", + "MetricName": "tma_store_bound", + "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", + "PublicDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should RFO stores be a bottleneck. Sample with: MEM_INST_RETIRED.ALL_STORES_PS", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", + "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", + "MetricName": "tma_store_fwd_blk", + "MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores. To streamline memory operations in the pipeline; a load can avoid waiting for memory if a prior in-flight store is writing the data that the load wants to read (store forwarding process). However; in some cases the load may be blocked for a significant time pending the store forward. For example; when the prior store is writing a smaller region than the load is reading.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses", + "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks", + "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group", + "MetricName": "tma_store_latency", + "MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses. Store accesses usually less impact out-of-order core performance; however; holding resources for longer time can lead into undesired implications (e.g. contention on L1D fill-buffer entries - see FB_Full). Related metrics: tma_fb_full, tma_lock_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations", + "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)", + "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", + "MetricName": "tma_store_op_utilization", + "MetricThreshold": "tma_store_op_utilization > 0.6", + "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations. Sample with: UOPS_DISPATCHED.PORT_7_8", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric roughly estimates the fraction of cycles where the TLB was missed by store accesses, hitting in the second-level TLB (STLB)", + "MetricExpr": "tma_dtlb_store - tma_store_stlb_miss", + "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", + "MetricName": "tma_store_stlb_hit", + "MetricThreshold": "tma_store_stlb_hit > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk", + "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks", + "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", + "MetricName": "tma_store_stlb_miss", + "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores", + "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks", + "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group", + "MetricName": "tma_streaming_stores", + "MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", + "PublicDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should Streaming stores be a bottleneck. Sample with: OCR.STREAMING_WR.ANY_RESPONSE. Related metrics: tma_fb_full", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears", + "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks", + "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group", + "MetricName": "tma_unknown_branches", + "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", + "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "This metric serves as an approximation of legacy x87 usage", + "MetricExpr": "tma_retiring * UOPS_EXECUTED.X87 / UOPS_EXECUTED.THREAD", + "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group", + "MetricName": "tma_x87_use", + "MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)", + "PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of cycles in aborted transactions.", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", + "MetricGroup": "transaction", + "MetricName": "tsx_aborted_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", + "MetricGroup": "transaction", + "MetricName": "tsx_cycles_per_elision", + "ScaleUnit": "1cycles / elision" + }, + { + "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", + "MetricGroup": "transaction", + "MetricName": "tsx_cycles_per_transaction", + "ScaleUnit": "1cycles / transaction" + }, + { + "BriefDescription": "Percentage of cycles within a transaction region.", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", + "MetricGroup": "transaction", + "MetricName": "tsx_transactional_cycles", + "ScaleUnit": "100%" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json new file mode 100644 index 000000000000..8027590f1776 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json @@ -0,0 +1,74 @@ +[ + { + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, etc.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries . Such entry is defined as valid when it is allocated till deallocation. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x80", + "EventName": "UNC_ARB_REQ_TRK_OCCUPANCY.DRD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches", + "EventCode": "0x81", + "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries . Such entry is defined as valid when it is allocated till deallocation. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json new file mode 100644 index 000000000000..c6596ba09195 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json @@ -0,0 +1,9 @@ +[ + { + "BriefDescription": "UNC_CLOCK.SOCKET", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "CLOCK" + } +] diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json new file mode 100644 index 000000000000..b28f62ce1f39 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json @@ -0,0 +1,165 @@ +[ + { + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", + "CounterMask": "1", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Page walks completed due to a demand data load to a 4K page.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.", + "CounterMask": "1", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Page walks completed due to a demand data store to a 4K page.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", + "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.", + "CounterMask": "1", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "STLB flush attempts", + "EventCode": "0xBD", + "EventName": "TLB_FLUSH.STLB_ANY", + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "SampleAfterValue": "100007", + "UMask": "0x20" + } +] diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json index 72e9bdfa9f80..6dcf3b763af4 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json @@ -706,7 +706,7 @@ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", "EventCode": "0xa4", "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", - "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.", "SampleAfterValue": "10000003", "UMask": "0x8" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index c732982f70b5..c207c851a9f9 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -1938,28 +1938,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json index b91cebf81f50..3fa660694bc7 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json @@ -3157,6 +3157,23 @@ "Unit": "CHA" }, { + "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c0018101", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c0008101", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts; DRd hits from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD", @@ -3372,6 +3389,23 @@ "Unit": "CHA" }, { + "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c0018201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c0008201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts for DRd misses from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD", @@ -3398,6 +3432,23 @@ "Unit": "CHA" }, { + "BriefDescription": "DRds issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8178201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8168201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts for DRds issued by IA Cores targeting DDR Mem that Missed the LLC", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR", @@ -3443,6 +3494,15 @@ "Unit": "CHA" }, { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8268201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts; DRd Opt Pref misses from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF", @@ -3452,6 +3512,15 @@ "Unit": "CHA" }, { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8a68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts for DRds issued by iA Cores targeting PMM Mem that Missed the LLC", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM", @@ -3470,6 +3539,23 @@ "Unit": "CHA" }, { + "BriefDescription": "L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8978201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8968201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_DDR", @@ -3604,6 +3690,23 @@ "Unit": "CHA" }, { + "BriefDescription": "LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10ccd78201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10ccd68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts; LLCPrefRFO misses from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO", @@ -3613,6 +3716,23 @@ "Unit": "CHA" }, { + "BriefDescription": "L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8878201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8868201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCILF_DDR", @@ -3702,6 +3822,23 @@ "Unit": "CHA" }, { + "BriefDescription": "RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8078201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8068201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts RFO misses from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_LOCAL", @@ -3720,6 +3857,23 @@ "Unit": "CHA" }, { + "BriefDescription": "LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10ccc78201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10ccc68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Inserts; RFO prefetch misses from local IA", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_LOCAL", @@ -4428,6 +4582,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c0018101", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c0008101", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy; DRd hits from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD", @@ -4645,6 +4816,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c0018201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c0008201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy for DRd misses from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD", @@ -4673,6 +4861,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for DRds and equivalent opcodes issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8178201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8168201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy for DRds issued by iA Cores targeting DDR Mem that Missed the LLC", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR", @@ -4718,6 +4923,15 @@ "Unit": "CHA" }, { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8268201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy; DRd Opt Pref misses from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF", @@ -4727,6 +4941,15 @@ "Unit": "CHA" }, { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8a68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy for DRds issued by iA Cores targeting PMM Mem that Missed the LLC", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM", @@ -4745,6 +4968,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8978201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8968201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_DDR", @@ -4879,6 +5119,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10ccd78201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10ccd68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy; LLCPrefRFO misses from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO", @@ -4888,6 +5145,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8878201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8868201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCILF_DDR", @@ -4977,6 +5251,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10c8078201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10c8068201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy; RFO misses from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_LOCAL", @@ -4995,6 +5286,23 @@ "Unit": "CHA" }, { + "BriefDescription": "TOR Occupancy for LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC", + "PerPkg": "1", + "UMask": "0x10ccc78201", + "Unit": "CHA" + }, + { + "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC_LOCAL", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC_LOCAL", + "PerPkg": "1", + "PortMask": "0x000", + "UMask": "0x10ccc68201", + "Unit": "CHA" + }, + { "BriefDescription": "TOR Occupancy; RFO prefetch misses from local IA", "EventCode": "0x36", "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_LOCAL", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json index 6800de05c836..09d840c7da4c 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json @@ -3326,7 +3326,7 @@ "EventCode": "0x50", "EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS", "PerPkg": "1", - "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in paralle", + "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in parallel", "UMask": "0x8", "Unit": "M3UPI" }, diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index 04f08e4d2402..095904c77001 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "EventCode": "0xAB", "EventName": "DSB2MITE_SWITCHES.COUNT", @@ -245,27 +253,34 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", "SampleAfterValue": "200003", "UMask": "0x4" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]", "CounterMask": "4", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x18" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]", "SampleAfterValue": "2000003", "UMask": "0x18" }, @@ -297,6 +312,24 @@ "UMask": "0x8" }, { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_OK", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "EventCode": "0x79", "EventName": "IDQ.DSB_UOPS", diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index cc800fb8180a..cd3e737bf4a1 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -352,10 +352,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -479,11 +479,11 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]", "CounterMask": "4", "EventCode": "0xA8", "EventName": "LSD.CYCLES_4_UOPS", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -497,6 +497,15 @@ "UMask": "0x1" }, { + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_OK", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Number of Uops delivered by the LSD.", "EventCode": "0xA8", "EventName": "LSD.UOPS", diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 2ed88842b880..94cb38540b5a 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -1466,28 +1466,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 04f08e4d2402..095904c77001 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", "EventCode": "0xAB", "EventName": "DSB2MITE_SWITCHES.COUNT", @@ -245,27 +253,34 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", "SampleAfterValue": "200003", "UMask": "0x4" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]", "CounterMask": "4", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x18" }, { - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]", "SampleAfterValue": "2000003", "UMask": "0x18" }, @@ -297,6 +312,24 @@ "UMask": "0x8" }, { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_ANY", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES_OK", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "EventCode": "0x79", "EventName": "IDQ.DSB_UOPS", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 31a1663d57f8..66d686cc933e 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -361,10 +361,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -488,11 +488,11 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]", "CounterMask": "4", "EventCode": "0xA8", "EventName": "LSD.CYCLES_4_UOPS", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -506,6 +506,15 @@ "UMask": "0x1" }, { + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_OK", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { "BriefDescription": "Number of Uops delivered by the LSD.", "EventCode": "0xA8", "EventName": "LSD.UOPS", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 507d39efacc8..fa4209809c57 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1774,28 +1774,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json index 26a5a20bf37a..3eece8a728b5 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json @@ -6504,7 +6504,7 @@ "EventCode": "0x52", "EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS", "PerPkg": "1", - "PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle", + "PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel", "UMask": "0x8", "Unit": "M3UPI" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json index 6f8ff2262ce7..7a40aa0f1018 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json @@ -1952,7 +1952,7 @@ "EventCode": "0x81", "EventName": "UNC_M_WPQ_OCCUPANCY", "PerPkg": "1", - "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts?", + "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.", "Unit": "iMC" }, { diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json index 23b8528590b3..d7b972452c0e 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json @@ -8,6 +8,14 @@ "UMask": "0x1" }, { + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]", + "EventCode": "0x87", + "EventName": "DECODE.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]", + "SampleAfterValue": "500009", + "UMask": "0x1" + }, + { "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.", "CounterMask": "1", "EdgeDetect": "1", @@ -213,10 +221,10 @@ "UMask": "0x1" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]", "EventCode": "0x80", "EventName": "ICACHE_16B.IFDATA_STALL", - "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]", "SampleAfterValue": "500009", "UMask": "0x4" }, @@ -237,10 +245,26 @@ "UMask": "0x2" }, { - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", "EventCode": "0x83", "EventName": "ICACHE_64B.IFTAG_STALL", - "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALLS", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]", + "SampleAfterValue": "500009", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", + "EventCode": "0x83", + "EventName": "ICACHE_TAG.STALLS", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]", "SampleAfterValue": "200003", "UMask": "0x4" }, diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json index 020801cbd7e3..541bf1dd1679 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json @@ -335,10 +335,10 @@ "UMask": "0x80" }, { - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]", "EventCode": "0x87", "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]", "SampleAfterValue": "500009", "UMask": "0x1" }, @@ -564,7 +564,7 @@ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", "EventCode": "0xa4", "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", - "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.", "SampleAfterValue": "10000003", "UMask": "0x8" }, diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json index 83346911aa63..c7c2d6ab1a93 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json @@ -1530,28 +1530,28 @@ }, { "BriefDescription": "Percentage of cycles in aborted transactions.", - "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles", + "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_aborted_cycles", "ScaleUnit": "100%" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@", + "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_elision", "ScaleUnit": "1cycles / elision" }, { "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.", - "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@", + "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_cycles_per_transaction", "ScaleUnit": "1cycles / transaction" }, { "BriefDescription": "Percentage of cycles within a transaction region.", - "MetricExpr": "cpu@cycles\\-t@ / cycles", + "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)", "MetricGroup": "transaction", "MetricName": "tsx_transactional_cycles", "ScaleUnit": "100%" diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index af58b74d1644..85a3545f5b6a 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -408,6 +408,12 @@ def source_count(event: Event) -> Function: return Function('source_count', event) +def has_event(event: Event) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('has_event', event) + + class Metric: """An individual metric that will specifiable on the perf command line.""" groups: Set[str] @@ -539,7 +545,7 @@ def ParsePerfJson(orig: str) -> Expression: r'Event(r"\1")', py) py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py) py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py) - keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count'] + keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event'] for kw in keywords: py = re.sub(rf'Event\(r"{kw}"\)', kw, py) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index aa44fdc84763..1f6557ce3b0a 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -542,7 +542,6 @@ int cmd_test(int argc, const char **argv) return run_workload(workload, argc, argv); symbol_conf.priv_size = sizeof(int); - symbol_conf.sort_by_name = true; symbol_conf.try_vmlinux_path = true; if (symbol__init(NULL) < 0) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 3d01eb5e2512..c1c3fcbc2753 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -254,6 +254,10 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1); TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", &val_ptr)); + /* has_event returns 1 when an event exists. */ + expr__add_id_val(ctx, strdup("cycles"), 2); + ret = test(ctx, "has_event(cycles)", 1); + expr__ctx_free(ctx); return 0; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 885cd321d67b..58cf96d762d0 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -70,6 +70,8 @@ make_python_perf_so := $(python_perf_so) make_debug := DEBUG=1 make_nondistro := BUILD_NONDISTRO=1 make_extra_tests := EXTRA_TESTS=1 +make_bpf_skel := BUILD_BPF_SKEL=1 +make_gen_vmlinux_h := BUILD_BPF_SKEL=1 GEN_VMLINUX_H=1 make_no_libperl := NO_LIBPERL=1 make_no_libpython := NO_LIBPYTHON=1 make_no_scripts := NO_LIBPYTHON=1 NO_LIBPERL=1 @@ -137,6 +139,8 @@ endif run += make_python_perf_so run += make_debug run += make_nondistro +run += make_build_bpf_skel +run += make_gen_vmlinux_h run += make_no_libperl run += make_no_libpython run += make_no_scripts diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 133218e51ab4..b2f82847e4c3 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,6 +20,20 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +static int num_core_entries(void) +{ + /* + * If the kernel supports extended type, expect events to be + * opened once for each core PMU type. Otherwise fall back to the legacy + * behavior of opening only one event even though there are multiple + * PMUs + */ + if (perf_pmus__supports_extended_type()) + return perf_pmus__num_core_pmus(); + + return 1; +} + static bool test_config(const struct evsel *evsel, __u64 expected_config) { __u32 type = evsel->core.attr.type; @@ -108,10 +122,21 @@ static int test__checkevent_raw(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - struct perf_pmu *pmu = NULL; + struct perf_pmu *pmu __maybe_unused = NULL; bool type_matched = false; TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a)); + TEST_ASSERT_VAL("event not parsed as raw type", + evsel->attr.type == PERF_TYPE_RAW); +#if defined(__aarch64__) + /* + * Arm doesn't have a real raw type PMU in sysfs, so raw events + * would never match any PMU. However, RAW events on Arm will + * always successfully open on the first available core PMU + * so no need to test for a matching type here. + */ + type_matched = raw_type_match = true; +#else while ((pmu = perf_pmus__scan(pmu)) != NULL) { if (pmu->type == evsel->attr.type) { TEST_ASSERT_VAL("PMU type expected once", !type_matched); @@ -120,6 +145,7 @@ static int test__checkevent_raw(struct evlist *evlist) raw_type_match = true; } } +#endif TEST_ASSERT_VAL("No PMU found for type", type_matched); } TEST_ASSERT_VAL("Raw PMU not matched", raw_type_match); @@ -327,7 +353,7 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) struct perf_evsel *evsel; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == perf_pmus__num_core_pmus()); + evlist->core.nr_entries == num_core_entries()); perf_evlist__for_each_entry(&evlist->core, evsel) { TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); @@ -830,11 +856,11 @@ static int test__group1(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (perf_pmus__num_core_pmus() * 2)); + evlist->core.nr_entries == (num_core_entries() * 2)); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == perf_pmus__num_core_pmus()); + evlist__nr_groups(evlist) == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* instructions:k */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -873,7 +899,7 @@ static int test__group2(struct evlist *evlist) struct evsel *evsel, *leader = NULL; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus() + 1)); + evlist->core.nr_entries == (2 * num_core_entries() + 1)); /* * TODO: Currently the software event won't be grouped with the hardware * event except for 1 PMU. @@ -1039,11 +1065,11 @@ static int test__group4(struct evlist *evlist __maybe_unused) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (perf_pmus__num_core_pmus() * 2)); + evlist->core.nr_entries == (num_core_entries() * 2)); TEST_ASSERT_VAL("wrong number of groups", - perf_pmus__num_core_pmus() == evlist__nr_groups(evlist)); + num_core_entries() == evlist__nr_groups(evlist)); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles:u + p */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1084,11 +1110,11 @@ static int test__group5(struct evlist *evlist __maybe_unused) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (5 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (5 * num_core_entries())); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == (2 * perf_pmus__num_core_pmus())); + evlist__nr_groups(evlist) == (2 * num_core_entries())); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles + G */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1119,7 +1145,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); } - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles:G */ evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1149,7 +1175,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); } - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles */ evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1170,11 +1196,11 @@ static int test__group_gh1(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (2 * num_core_entries())); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == perf_pmus__num_core_pmus()); + evlist__nr_groups(evlist) == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles + :H group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1211,11 +1237,11 @@ static int test__group_gh2(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (2 * num_core_entries())); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == perf_pmus__num_core_pmus()); + evlist__nr_groups(evlist) == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles + :G group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1252,11 +1278,11 @@ static int test__group_gh3(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (2 * num_core_entries())); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == perf_pmus__num_core_pmus()); + evlist__nr_groups(evlist) == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles:G + :u group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1293,11 +1319,11 @@ static int test__group_gh4(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (2 * num_core_entries())); TEST_ASSERT_VAL("wrong number of groups", - evlist__nr_groups(evlist) == perf_pmus__num_core_pmus()); + evlist__nr_groups(evlist) == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles:G + :uG group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1334,9 +1360,9 @@ static int test__leader_sample1(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (3 * num_core_entries())); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1386,9 +1412,9 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (2 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (2 * num_core_entries())); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* instructions - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1425,9 +1451,9 @@ static int test__checkevent_pinned_modifier(struct evlist *evlist) struct evsel *evsel = NULL; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == perf_pmus__num_core_pmus()); + evlist->core.nr_entries == num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { evsel = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -1443,9 +1469,9 @@ static int test__pinned_group(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == (3 * num_core_entries())); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1488,9 +1514,9 @@ static int test__exclusive_group(struct evlist *evlist) struct evsel *evsel = NULL, *leader; TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus())); + evlist->core.nr_entries == 3 * num_core_entries()); - for (int i = 0; i < perf_pmus__num_core_pmus(); i++) { + for (int i = 0; i < num_core_entries(); i++) { /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -1562,7 +1588,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", - evlist->core.nr_entries == (1 + perf_pmus__num_core_pmus())); + evlist->core.nr_entries == 1 + num_core_entries()); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_TASK_CLOCK)); return TEST_OK; diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index c09a9fae1689..fff58b220c07 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -34,6 +34,7 @@ static int run_dir(const char *d) struct dso *dso; struct symbol *sym; int ret; + size_t idx; scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); ret = filename__read_build_id(filename, &bid); @@ -61,7 +62,7 @@ static int run_dir(const char *d) TEST_ASSERT_VAL("Failed to load symbols", ret == 0); dso__sort_by_name(dso); - sym = dso__find_symbol_by_name(dso, "main"); + sym = dso__find_symbol_by_name(dso, "main", &idx); TEST_ASSERT_VAL("Failed to find main", sym); dso__delete(dso); diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index f2cc187b6186..4a194420416e 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -233,6 +233,41 @@ test_aggr_task_stack_filter() fi } +test_csv_output() +{ + echo "Testing perf lock contention CSV output" + perf lock contention -i ${perfdata} -E 1 -x , --output ${result} + # count the number of commas in the header + # it should have 5: contended, total-wait, max-wait, avg-wait, type, caller + header=$(grep "# output:" ${result} | tr -d -c , | wc -c) + if [ "${header}" != "5" ]; then + echo "[Fail] Recorded result does not have enough output columns: ${header} != 5" + err=1 + exit + fi + # count the number of commas in the output + output=$(grep -v "^#" ${result} | tr -d -c , | wc -c) + if [ "${header}" != "${output}" ]; then + echo "[Fail] Recorded result does not match the number of commas: ${header} != ${output}" + err=1 + exit + fi + + if ! perf lock con -b true > /dev/null 2>&1 ; then + echo "[Skip] No BPF support" + return + fi + + # the perf lock contention output goes to the stderr + perf lock con -a -b -E 1 -x , --output ${result} -- perf bench sched messaging > /dev/null 2>&1 + output=$(grep -v "^#" ${result} | tr -d -c , | wc -c) + if [ "${header}" != "${output}" ]; then + echo "[Fail] BPF result does not match the number of commas: ${header} != ${output}" + err=1 + exit + fi +} + check test_record @@ -244,5 +279,6 @@ test_type_filter test_lock_filter test_stack_filter test_aggr_task_stack_filter +test_csv_output exit ${err} diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore index 7a1c832825de..cd01455e1b53 100644 --- a/tools/perf/util/bpf_skel/.gitignore +++ b/tools/perf/util/bpf_skel/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only .tmp *.skel.h +vmlinux.h diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 1d48226ae75d..8d3cfbb3cc65 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -416,8 +416,6 @@ int contention_end(u64 *ctx) return 0; } -struct rq {}; - extern struct rq runqueues __ksym; struct rq___old { diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h index c7ed51b0c1ef..ab84a6e1da5e 100644 --- a/tools/perf/util/bpf_skel/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h @@ -171,4 +171,14 @@ struct bpf_perf_event_data_kern { struct perf_sample_data *data; struct perf_event *event; } __attribute__((preserve_access_index)); + +/* + * If 'struct rq' isn't defined for lock_contention.bpf.c, for the sake of + * rq___old and rq___new, then the type for the 'runqueue' variable ends up + * being a forward declaration (BTF_KIND_FWD) while the kernel has it defined + * (BTF_KIND_STRUCT). The definition appears in vmlinux.h rather than + * lock_contention.bpf.c for consistency with a generated vmlinux.h. + */ +struct rq {}; + #endif // __VMLINUX_H diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 046fbfcfdaab..bdfead36b83a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1320,7 +1320,9 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->id = *id; dso__set_long_name_id(dso, dso->name, id, false); dso__set_short_name(dso, dso->name, false); - dso->symbols = dso->symbol_names = RB_ROOT_CACHED; + dso->symbols = RB_ROOT_CACHED; + dso->symbol_names = NULL; + dso->symbol_names_len = 0; dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; @@ -1364,7 +1366,8 @@ void dso__delete(struct dso *dso) inlines__tree_delete(&dso->inlined_nodes); srcline__tree_delete(&dso->srclines); symbols__delete(&dso->symbols); - + dso->symbol_names_len = 0; + zfree(&dso->symbol_names); if (dso->short_name_allocated) { zfree((char **)&dso->short_name); dso->short_name_allocated = false; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index b23a157c914d..b41c9782c754 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -150,7 +150,8 @@ struct dso { struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root_cached symbols; - struct rb_root_cached symbol_names; + struct symbol **symbol_names; + size_t symbol_names_len; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; struct { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3860b0c74829..4cbb092e0684 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -581,15 +581,14 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, maps__zput(al->maps); map__zput(al->map); thread__zput(al->thread); + al->thread = thread__get(thread); al->addr = addr; al->cpumode = cpumode; al->filtered = 0; - if (machine == NULL) { - al->map = NULL; + if (machine == NULL) return NULL; - } if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; @@ -605,7 +604,6 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, al->level = 'u'; } else { al->level = 'H'; - al->map = NULL; if ((cpumode == PERF_RECORD_MISC_GUEST_USER || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && @@ -619,7 +617,6 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } al->maps = maps__get(maps); - al->thread = thread__get(thread); al->map = map__get(maps__find(maps, al->addr)); if (al->map != NULL) { /* diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f607b5bddc76..762e2b2634a5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2924,25 +2924,19 @@ static bool find_process(const char *name) return ret ? false : true; } -static bool is_amd(const char *arch, const char *cpuid) +int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, + char *msg __maybe_unused, + size_t size __maybe_unused) { - return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD"); -} - -static bool is_amd_ibs(struct evsel *evsel) -{ - return evsel->core.attr.precise_ip - || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3)); + return 0; } int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size) { - struct perf_env *env = evsel__env(evsel); - const char *arch = perf_env__arch(env); - const char *cpuid = perf_env__cpuid(env); char sbuf[STRERR_BUFSIZE]; int printed = 0, enforced = 0; + int ret; switch (err) { case EPERM: @@ -3044,16 +3038,6 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", evsel__name(evsel)); - if (is_amd(arch, cpuid)) { - if (is_amd_ibs(evsel)) { - if (evsel->core.attr.exclude_kernel) - return scnprintf(msg, size, - "AMD IBS can't exclude kernel events. Try running at a higher privilege level."); - if (!evsel->core.system_wide) - return scnprintf(msg, size, - "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a, or -C and workload affinity"); - } - } break; case ENODATA: @@ -3063,6 +3047,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, break; } + ret = arch_evsel__open_strerror(evsel, msg, size); + if (ret) + return ret; + return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg | grep -i perf may provide additional information.\n", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9f06d6cd5379..848534ec74fa 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -311,6 +311,7 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); +int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index f4e52919324e..4814262e3805 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -8,6 +8,7 @@ #include "cpumap.h" #include "cputopo.h" #include "debug.h" +#include "evlist.h" #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" @@ -474,3 +475,23 @@ out: pr_debug2("literal: %s = %f\n", literal, result); return result; } + +/* Does the event 'id' parse? Determine via ctx->ids if possible. */ +double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id) +{ + struct evlist *tmp; + double ret; + + if (hashmap__find(ctx->ids, id, /*value=*/NULL)) + return 1.0; + + if (!compute_ids) + return 0.0; + + tmp = evlist__new(); + if (!tmp) + return NAN; + ret = parse_event(tmp, id) ? 0 : 1; + evlist__delete(tmp); + return ret; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index eaa44b24c555..3c1e49b3e35d 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -54,5 +54,6 @@ int expr__find_ids(const char *expr, const char *one, double expr_id_data__value(const struct expr_id_data *data); double expr_id_data__source_count(const struct expr_id_data *data); double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx); +double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 4fbf353e78e7..dbb117414710 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -113,6 +113,7 @@ min { return MIN; } if { return IF; } else { return ELSE; } source_count { return SOURCE_COUNT; } +has_event { return HAS_EVENT; } {literal} { return literal(yyscanner, sctx); } {number} { return value(yyscanner); } {symbol} { return str(yyscanner, ID, sctx->runtime); } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index f04963eb6be0..dd504afd8f36 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -37,7 +37,7 @@ } ids; } -%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR +%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT EXPR_ERROR %left MIN MAX IF %left '|' %left '^' @@ -199,6 +199,12 @@ expr: NUMBER } | ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); } | SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); } +| HAS_EVENT '(' ID ')' +{ + $$.val = expr__has_event(ctx, compute_ids, $3); + $$.ids = NULL; + free($3); +} | expr '|' expr { if (is_const($1.val) && is_const($3.val)) { diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4e8e243a6e4b..c6c9c2228578 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -25,7 +25,7 @@ "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE" + "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE" struct llvm_param llvm_param = { .clang_path = "clang", @@ -569,7 +569,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, pr_err("ERROR:\tunable to compile %s\n", path); pr_err("Hint:\tCheck error message shown above.\n"); pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); - pr_err(" \t\tclang -target bpf -O2 -c %s\n", path); + pr_err(" \t\tclang --target=bpf -O2 -c %s\n", path); pr_err(" \twith proper -I and -D options.\n"); goto errout; } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f30d34903aa4..f64b83004421 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -390,7 +390,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr) return dso__find_symbol(map__dso(map), addr); } -struct symbol *map__find_symbol_by_name(struct map *map, const char *name) +struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx) { struct dso *dso; @@ -398,10 +398,16 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name) return NULL; dso = map__dso(map); - if (!dso__sorted_by_name(dso)) - dso__sort_by_name(dso); + dso__sort_by_name(dso); + + return dso__find_symbol_by_name(dso, name, idx); +} + +struct symbol *map__find_symbol_by_name(struct map *map, const char *name) +{ + size_t idx; - return dso__find_symbol_by_name(dso, name); + return map__find_symbol_by_name_idx(map, name, &idx); } struct map *map__clone(struct map *from) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 66a87b3d9965..1b53d53adc86 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -148,16 +148,17 @@ struct thread; * @map: the 'struct map *' in which symbols are iterated * @sym_name: the symbol name * @pos: the 'struct symbol *' to use as a loop cursor + * @idx: the cursor index in the symbol names array */ -#define __map__for_each_symbol_by_name(map, sym_name, pos) \ - for (pos = map__find_symbol_by_name(map, sym_name); \ +#define __map__for_each_symbol_by_name(map, sym_name, pos, idx) \ + for (pos = map__find_symbol_by_name_idx(map, sym_name, &idx); \ pos && \ !symbol__match_symbol_name(pos->name, sym_name, \ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \ - pos = symbol__next_by_name(pos)) + pos = dso__next_symbol_by_name(map__dso(map), &idx)) -#define map__for_each_symbol_by_name(map, sym_name, pos) \ - __map__for_each_symbol_by_name(map, sym_name, (pos)) +#define map__for_each_symbol_by_name(map, sym_name, pos, idx) \ + __map__for_each_symbol_by_name(map, sym_name, (pos), idx) void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); @@ -202,6 +203,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, int map__load(struct map *map); struct symbol *map__find_symbol(struct map *map, u64 addr); struct symbol *map__find_symbol_by_name(struct map *map, const char *name); +struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx); void map__fixup_start(struct map *map); void map__fixup_end(struct map *map); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 7e5e7b30510d..2247991451f3 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> #include <stdio.h> +#include <stdlib.h> #include <stdbool.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/perf_event.h> #include "util/evsel_fprintf.h" +#include "trace-event.h" struct bit_names { int bit; @@ -71,6 +73,170 @@ static void __p_read_format(char *buf, size_t size, u64 value) __p_bits(buf, size, value, bits); } +#define ENUM_ID_TO_STR_CASE(x) case x: return (#x); +static const char *stringify_perf_type_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) + ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) + ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) + ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE) + ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW) + ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT) + default: + return NULL; + } +} + +static const char *stringify_perf_hw_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_MISSES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_INSTRUCTIONS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_MISSES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BUS_CYCLES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_BACKEND) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_REF_CPU_CYCLES) + default: + return NULL; + } +} + +static const char *stringify_perf_hw_cache_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1D) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1I) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_LL) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_DTLB) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_ITLB) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_BPU) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_NODE) + default: + return NULL; + } +} + +static const char *stringify_perf_hw_cache_op_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_READ) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_WRITE) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_PREFETCH) + default: + return NULL; + } +} + +static const char *stringify_perf_hw_cache_op_result_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_ACCESS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_MISS) + default: + return NULL; + } +} + +static const char *stringify_perf_sw_id(u64 value) +{ + switch (value) { + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_CLOCK) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_TASK_CLOCK) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CONTEXT_SWITCHES) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_MIGRATIONS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MIN) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MAJ) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_ALIGNMENT_FAULTS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_EMULATION_FAULTS) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_DUMMY) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_BPF_OUTPUT) + ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CGROUP_SWITCHES) + default: + return NULL; + } +} +#undef ENUM_ID_TO_STR_CASE + +#define PRINT_ID(_s, _f) \ +do { \ + const char *__s = _s; \ + if (__s == NULL) \ + snprintf(buf, size, _f, value); \ + else \ + snprintf(buf, size, _f" (%s)", value, __s); \ +} while (0) +#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) +#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) + +static void __p_type_id(char *buf, size_t size, u64 value) +{ + print_id_unsigned(stringify_perf_type_id(value)); +} + +static void __p_config_hw_id(char *buf, size_t size, u64 value) +{ + print_id_hex(stringify_perf_hw_id(value)); +} + +static void __p_config_sw_id(char *buf, size_t size, u64 value) +{ + print_id_hex(stringify_perf_sw_id(value)); +} + +static void __p_config_hw_cache_id(char *buf, size_t size, u64 value) +{ + const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff); + const char *hw_cache_op_str = + stringify_perf_hw_cache_op_id((value & 0xff00) >> 8); + const char *hw_cache_op_result_str = + stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16); + + if (hw_cache_str == NULL || hw_cache_op_str == NULL || + hw_cache_op_result_str == NULL) { + snprintf(buf, size, "%#"PRIx64, value); + } else { + snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } +} + +#ifdef HAVE_LIBTRACEEVENT +static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) +{ + char *str = tracepoint_id_to_name(value); + + print_id_hex(str); + free(str); +} +#endif + +static void __p_config_id(char *buf, size_t size, u32 type, u64 value) +{ + switch (type) { + case PERF_TYPE_HARDWARE: + return __p_config_hw_id(buf, size, value); + case PERF_TYPE_SOFTWARE: + return __p_config_sw_id(buf, size, value); + case PERF_TYPE_HW_CACHE: + return __p_config_hw_cache_id(buf, size, value); + case PERF_TYPE_TRACEPOINT: +#ifdef HAVE_LIBTRACEEVENT + return __p_config_tracepoint_id(buf, size, value); +#endif + case PERF_TYPE_RAW: + case PERF_TYPE_BREAKPOINT: + default: + snprintf(buf, size, "%#"PRIx64, value); + return; + } +} + #define BUF_SIZE 1024 #define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) @@ -79,16 +245,18 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) +#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val) +#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val) -#define PRINT_ATTRn(_n, _f, _p) \ +#define PRINT_ATTRn(_n, _f, _p, _a) \ do { \ - if (attr->_f) { \ + if (_a || attr->_f) { \ _p(attr->_f); \ ret += attr__fprintf(fp, _n, buf, priv);\ } \ } while (0) -#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p, false) int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv) @@ -96,10 +264,10 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, char buf[BUF_SIZE]; int ret = 0; - PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRn("type", type, p_type_id, true); PRINT_ATTRf(size, p_unsigned); - PRINT_ATTRf(config, p_hex); - PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRn("config", config, p_config_id, true); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned, false); PRINT_ATTRf(sample_type, p_sample_type); PRINT_ATTRf(read_format, p_read_format); @@ -141,10 +309,10 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); - PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false); PRINT_ATTRf(bp_type, p_unsigned); - PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); - PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex, false); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex, false); PRINT_ATTRf(branch_sample_type, p_branch_sample_type); PRINT_ATTRf(sample_regs_user, p_hex); PRINT_ATTRf(sample_stack_user, p_unsigned); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 64fa568a5426..7f984a7f16ca 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -928,6 +928,31 @@ err: return NULL; } +/* Creates the PMU when sysfs scanning fails. */ +struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus) +{ + struct perf_pmu *pmu = zalloc(sizeof(*pmu)); + + if (!pmu) + return NULL; + + pmu->name = strdup("cpu"); + if (!pmu->name) { + free(pmu); + return NULL; + } + + pmu->is_core = true; + pmu->type = PERF_TYPE_RAW; + pmu->cpus = cpu_map__online(); + + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->aliases); + INIT_LIST_HEAD(&pmu->caps); + list_add_tail(&pmu->list, core_pmus); + return pmu; +} + void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) { struct perf_pmu_format *format; @@ -1427,7 +1452,7 @@ bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu) bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) { - return pmu->is_core && perf_pmus__num_core_pmus() == 1; + return !pmu->is_core || perf_pmus__num_core_pmus() == 1; } bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 8807a624e918..203b92860e3c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -286,6 +286,7 @@ int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); +struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 0866dee3fc62..3cd9de42139e 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -153,7 +153,12 @@ static void pmu_read_sysfs(bool core_only) closedir(dir); if (core_only) { - read_sysfs_core_pmus = true; + if (!list_empty(&core_pmus)) + read_sysfs_core_pmus = true; + else { + if (perf_pmu__create_placeholder_core_pmu(&core_pmus)) + read_sysfs_core_pmus = true; + } } else { read_sysfs_core_pmus = true; read_sysfs_all_pmus = true; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6e2110d605fb..16822a8a540f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -74,7 +74,6 @@ int init_probe_symbol_maps(bool user_only) { int ret; - symbol_conf.sort_by_name = true; symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { @@ -382,6 +381,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, struct symbol *sym; u64 address = 0; int ret = -ENOENT; + size_t idx; /* This can work only for function-name based one */ if (!pp->function || pp->file) @@ -392,7 +392,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, return -EINVAL; /* Find the address of given function */ - map__for_each_symbol_by_name(map, pp->function, sym) { + map__for_each_symbol_by_name(map, pp->function, sym, idx) { if (uprobes) { address = sym->start; if (sym->type == STT_GNU_IFUNC) @@ -3738,7 +3738,6 @@ out: int show_available_funcs(const char *target, struct nsinfo *nsi, struct strfilter *_filter, bool user) { - struct rb_node *nd; struct map *map; struct dso *dso; int ret; @@ -3767,17 +3766,16 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, goto end; } dso = map__dso(map); - if (!dso__sorted_by_name(dso)) - dso__sort_by_name(dso); + dso__sort_by_name(dso); /* Show all (filtered) symbols */ setup_pager(); - for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { - struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); + for (size_t i = 0; i < dso->symbol_names_len; i++) { + struct symbol *pos = dso->symbol_names[i]; - if (strfilter__compare(_filter, pos->sym.name)) - printf("%s\n", pos->sym.name); + if (strfilter__compare(_filter, pos->name)) + printf("%s\n", pos->name); } end: map__put(map); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a7b2cb05dc86..4eed8ec23994 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -124,6 +124,14 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, } /* + * Add this one here not to drag util/trace-event-info.c + */ +char *tracepoint_id_to_name(u64 config) +{ + return NULL; +} + +/* * XXX: All these evsel destructors need some better mechanism, like a linked * list of destructors registered when the relevant code indeed is used instead * of having more and more calls in perf_evsel__delete(). -- acme diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d275d3bef7d5..f849f9ef68e6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -440,38 +440,35 @@ static struct symbol *symbols__next(struct symbol *sym) return NULL; } -static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym) +static int symbols__sort_name_cmp(const void *vlhs, const void *vrhs) { - struct rb_node **p = &symbols->rb_root.rb_node; - struct rb_node *parent = NULL; - struct symbol_name_rb_node *symn, *s; - bool leftmost = true; + const struct symbol *lhs = *((const struct symbol **)vlhs); + const struct symbol *rhs = *((const struct symbol **)vrhs); - symn = container_of(sym, struct symbol_name_rb_node, sym); - - while (*p != NULL) { - parent = *p; - s = rb_entry(parent, struct symbol_name_rb_node, rb_node); - if (strcmp(sym->name, s->sym.name) < 0) - p = &(*p)->rb_left; - else { - p = &(*p)->rb_right; - leftmost = false; - } - } - rb_link_node(&symn->rb_node, parent, p); - rb_insert_color_cached(&symn->rb_node, symbols, leftmost); + return strcmp(lhs->name, rhs->name); } -static void symbols__sort_by_name(struct rb_root_cached *symbols, - struct rb_root_cached *source) +static struct symbol **symbols__sort_by_name(struct rb_root_cached *source, size_t *len) { struct rb_node *nd; + struct symbol **result; + size_t i = 0, size = 0; + + for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) + size++; + + result = malloc(sizeof(*result) * size); + if (!result) + return NULL; for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); - symbols__insert_by_name(symbols, pos); + + result[i++] = pos; } + qsort(result, size, sizeof(*result), symbols__sort_name_cmp); + *len = size; + return result; } int symbol__match_symbol_name(const char *name, const char *str, @@ -491,48 +488,53 @@ int symbol__match_symbol_name(const char *name, const char *str, return arch__compare_symbol_names(name, str); } -static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols, +static struct symbol *symbols__find_by_name(struct symbol *symbols[], + size_t symbols_len, const char *name, - enum symbol_tag_include includes) + enum symbol_tag_include includes, + size_t *found_idx) { - struct rb_node *n; - struct symbol_name_rb_node *s = NULL; + size_t i, lower = 0, upper = symbols_len; + struct symbol *s = NULL; - if (symbols == NULL) - return NULL; + if (found_idx) + *found_idx = SIZE_MAX; - n = symbols->rb_root.rb_node; + if (!symbols_len) + return NULL; - while (n) { + while (lower < upper) { int cmp; - s = rb_entry(n, struct symbol_name_rb_node, rb_node); - cmp = symbol__match_symbol_name(s->sym.name, name, includes); + i = (lower + upper) / 2; + cmp = symbol__match_symbol_name(symbols[i]->name, name, includes); if (cmp > 0) - n = n->rb_left; + upper = i; else if (cmp < 0) - n = n->rb_right; - else + lower = i + 1; + else { + if (found_idx) + *found_idx = i; + s = symbols[i]; break; + } } - - if (n == NULL) - return NULL; - - if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY) + if (s && includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY) { /* return first symbol that has same name (if any) */ - for (n = rb_prev(n); n; n = rb_prev(n)) { - struct symbol_name_rb_node *tmp; - - tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); - if (arch__compare_symbol_names(tmp->sym.name, s->sym.name)) + for (; i > 0; i--) { + struct symbol *tmp = symbols[i - 1]; + + if (!arch__compare_symbol_names(tmp->name, s->name)) { + if (found_idx) + *found_idx = i - 1; + s = tmp; + } else break; - - s = tmp; } - - return &s->sym; + } + assert(!found_idx || !s || s == symbols[*found_idx]); + return s; } void dso__reset_find_symbol_cache(struct dso *dso) @@ -590,31 +592,41 @@ struct symbol *dso__next_symbol(struct symbol *sym) return symbols__next(sym); } -struct symbol *symbol__next_by_name(struct symbol *sym) +struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx) { - struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym); - struct rb_node *n = rb_next(&s->rb_node); + if (*idx + 1 >= dso->symbol_names_len) + return NULL; - return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL; + ++*idx; + return dso->symbol_names[*idx]; } /* * Returns first symbol that matched with @name. */ -struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name) +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx) { - struct symbol *s = symbols__find_by_name(&dso->symbol_names, name, - SYMBOL_TAG_INCLUDE__NONE); + struct symbol *s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len, + name, SYMBOL_TAG_INCLUDE__NONE, idx); if (!s) - s = symbols__find_by_name(&dso->symbol_names, name, - SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); + s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len, + name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY, idx); return s; } void dso__sort_by_name(struct dso *dso) { - dso__set_sorted_by_name(dso); - return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); + mutex_lock(&dso->lock); + if (!dso__sorted_by_name(dso)) { + size_t len; + + dso->symbol_names = symbols__sort_by_name(&dso->symbols, &len); + if (dso->symbol_names) { + dso->symbol_names_len = len; + dso__set_sorted_by_name(dso); + } + } + mutex_unlock(&dso->lock); } /* @@ -2656,10 +2668,6 @@ int symbol__init(struct perf_env *env) symbol__elf_init(); - if (symbol_conf.sort_by_name) - symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) - - sizeof(struct symbol)); - if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0) return -1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5ca8665dd2c1..af87c46b3f89 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -43,8 +43,7 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, /** * A symtab entry. When allocated this may be preceded by an annotation (see - * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node - * to sort by name (see struct symbol_name_rb_node). + * symbol__annotation) and/or a browser_index (see symbol__browser_index). */ struct symbol { struct rb_node rb_node; @@ -95,11 +94,6 @@ static inline size_t symbol__size(const struct symbol *sym) struct strlist; struct intlist; -struct symbol_name_rb_node { - struct rb_node rb_node; - struct symbol sym; -}; - static inline int __symbol__join_symfs(char *bf, size_t size, const char *path) { return path__join(bf, size, symbol_conf.symfs, path); @@ -136,9 +130,9 @@ void dso__delete_symbol(struct dso *dso, struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr); -struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); -struct symbol *symbol__next_by_name(struct symbol *sym); +struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx); +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx); struct symbol *dso__first_symbol(struct dso *dso); struct symbol *dso__last_symbol(struct dso *dso); diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index f26f81eb8252..0b589570d1d0 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -18,7 +18,6 @@ struct symbol_conf { show_kernel_path, use_modules, allow_aliases, - sort_by_name, show_nr_samples, show_total_period, use_callchain, diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index d9e5ad040b6a..088f4abf230f 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -63,13 +63,11 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp) { size_t ret = 0; - struct rb_node *nd; - struct symbol_name_rb_node *pos; - for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - ret += fprintf(fp, "%s\n", pos->sym.name); - } + for (size_t i = 0; i < dso->symbol_names_len; i++) { + struct symbol *pos = dso->symbol_names[i]; + ret += fprintf(fp, "%s\n", pos->name); + } return ret; } diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index c24b3a15e319..319ccf09a435 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -466,6 +466,18 @@ next: return NULL; } +char *tracepoint_id_to_name(u64 config) +{ + struct tracepoint_path *path = tracepoint_id_to_path(config); + char *buf = NULL; + + if (path && asprintf(&buf, "%s:%s", path->system, path->name) < 0) + buf = NULL; + + put_tracepoints_path(path); + return buf; +} + static struct tracepoint_path *tracepoint_name_to_path(const char *name) { struct tracepoint_path *path = zalloc(sizeof(*path)); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index a0cff184b1cd..a69ee29419f3 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -62,6 +62,12 @@ unsigned long long eval_flag(const char *flag); int read_tracing_data(int fd, struct list_head *pattrs); +/* + * Return the tracepoint name in the format "subsystem:event_name", + * callers should free the returned string. + */ +char *tracepoint_id_to_name(u64 config); + struct tracing_data { /* size is only valid if temp is 'true' */ ssize_t size; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 83eea968482e..2a96df4c8d42 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -66,9 +66,13 @@ static int __report_module(struct addr_location *al, u64 ip, mod = 0; } - if (!mod) - mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, + if (!mod) { + char filename[PATH_MAX]; + + __symbol__join_symfs(filename, sizeof(filename), dso->long_name); + mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, map__start(al->map) - map__pgoff(al->map), false); + } if (!mod) { char filename[PATH_MAX]; diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 62f3b0f56e4d..d3cdc2d33d4b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -655,4 +655,4 @@ fi # Control buffer size: --bootargs trace_buf_size=3k # Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops # Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu -# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn +# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn=1 diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 4f2b4e8a3b08..9ae7964491d5 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,2 +1,3 @@ vstate_exec_nolibc vstate_prctl +v_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index cd6e80bf995d..bfff0ff4f3be 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl +TEST_GEN_PROGS := vstate_prctl v_initval_nolibc TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc include ../../lib.mk @@ -13,3 +13,7 @@ $(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc + +$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c + $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ + -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c new file mode 100644 index 000000000000..66764edb0d52 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest.h" +#define MAX_VSIZE (8192 * 32) + +void dump(char *ptr, int size) +{ + int i = 0; + + for (i = 0; i < size; i++) { + if (i != 0) { + if (i % 16 == 0) + printf("\n"); + else if (i % 8 == 0) + printf(" "); + } + printf("%02x ", ptr[i]); + } + printf("\n"); +} + +int main(void) +{ + int i; + unsigned long vl; + char *datap, *tmp; + + datap = malloc(MAX_VSIZE); + if (!datap) { + ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + exit(-1); + } + + tmp = datap; + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%2)\n\t" + "add %1, %2, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); + + ksft_print_msg("vl = %lu\n", vl); + + if (datap[0] != 0x00 && datap[0] != 0xff) { + ksft_test_result_fail("v-regesters are not properly initialized\n"); + dump(datap, vl * 4); + exit(-1); + } + + for (i = 1; i < vl * 4; i++) { + if (datap[i] != datap[0]) { + ksft_test_result_fail("detect stale values on v-regesters\n"); + dump(datap, vl * 4); + exit(-2); + } + } + + free(datap); + ksft_exit_pass(); + return 0; +} |