From 63692df103e9c76b26c382ce8079283b7df9a99a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 23 Oct 2014 14:22:12 -0400 Subject: [PATCH] PCI: Allow numa_node override via sysfs NUMA systems with ACPI normally describe the physical topology via _PXM methods. But many BIOSes don't implement _PXM, which leaves the kernel with no way to discover the device topology, which reduces performance because we can't put memory and processes close to the device. The NUMA node of a PCI device is already exported in the sysfs "numa_node" file. Make that file writable so users can workaround the lack of _PXM methods in the BIOS. For example: echo 3 > /sys/devices/pci0000:ff/0000:03:1f.3/numa_node sets the node for PCI device 0000:03:1f.3. Writing the file emits a FW_BUG warning to encourage users to request firmware updates. It also taints the kernel with TAINT_FIRMWARE_WORKAROUND because overriding the node incorrectly can cause performance issues. [bhelgaas: changelog, documentation text] Signed-off-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas CC: Myron Stowe CC: Alexander Ducyk CC: Jiang Liu --- Documentation/ABI/testing/sysfs-bus-pci | 13 ++++++++++++ drivers/pci/pci-sysfs.c | 27 ++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index ee6c04036492..b3bc50f650ee 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -281,3 +281,16 @@ Description: opt-out of driver binding using a driver_override name such as "none". Only a single driver may be specified in the override, there is no support for parsing delimiters. + +What: /sys/bus/pci/devices/.../numa_node +Date: Oct 2014 +Contact: Prarit Bhargava +Description: + This file contains the NUMA node to which the PCI device is + attached, or -1 if the node is unknown. The initial value + comes from an ACPI _PXM method or a similar firmware + source. If that is missing or incorrect, this file can be + written to override the node. In that case, please report + a firmware bug to the system vendor. Writing to this file + taints the kernel with TAINT_FIRMWARE_WORKAROUND, which + reduces the supportability of your system. diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 92b6d9ab00e4..91e760f9655b 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -221,12 +221,37 @@ static ssize_t enabled_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(enabled); #ifdef CONFIG_NUMA +static ssize_t numa_node_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int node, ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = kstrtoint(buf, 0, &node); + if (ret) + return ret; + + if (!node_online(node)) + return -EINVAL; + + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + dev_alert(&pdev->dev, FW_BUG "Overriding NUMA node to %d. Contact your vendor for updates.", + node); + + dev->numa_node = node; + return count; +} + static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", dev->numa_node); } -static DEVICE_ATTR_RO(numa_node); +static DEVICE_ATTR_RW(numa_node); #endif static ssize_t dma_mask_bits_show(struct device *dev, -- 2.34.1